flowchart LR
subgraph Tauri["Tauri Shell"]
A["React UI"]
end
subgraph Sidecar["Python Sidecar"]
B["FastAPI Server"]
C["AI Models"]
D["PyTorch / LangChain"]
end
A <-->|"HTTP/WebSocket"| B
B --> C
C --> D
style A fill:#61dafb
style B fill:#3776ab
style D fill:#ee4c2c
The AI Sidecar Pattern: Python for Intelligence, React for Beauty
React is amazing for UIs. But let’s be honest: JavaScript isn’t the best language for heavy AI workloads. Python is the undisputed king of AI (PyTorch, TensorFlow, Pandas, LangChain).
So, how do you build a desktop app that has the interactivity of React but the intelligence of Python?
You use the Sidecar Pattern.
What is a Sidecar?
In microservices, a “sidecar” is a helper process that runs alongside your main application. In our architecture, the main application (Tauri) spawns a separate, invisible child process—a Python server.
| Component | Language | Responsibility |
|---|---|---|
| Main App | Rust/React | UI, window management, system access |
| Sidecar | Python | AI inference, heavy computation |
Architecture Deep Dive
Process Lifecycle
sequenceDiagram
participant T as Tauri
participant P as Python Sidecar
participant R as React UI
T->>P: Spawn process (server.py)
P->>P: Initialize FastAPI
P->>P: Load AI models
loop Health Check
T->>P: GET /health
P-->>T: 200 OK
end
T->>R: Signal ready
R->>P: POST /infer
P-->>R: AI Response
Note over T,R: User closes app
T->>P: SIGTERM
P->>P: Graceful shutdown
Directory Structure
apps/
├── desktop/ # Tauri + React
│ ├── src/
│ │ ├── App.tsx
│ │ └── hooks/
│ │ └── useSidecar.ts
│ └── src-tauri/
│ └── src/
│ └── sidecar.rs
└── sidecar/ # Python AI Server
├── server.py
├── models/
│ └── inference.py
├── requirements.txt
└── pyproject.toml
Setting Up the Python Sidecar
FastAPI Server
# apps/sidecar/server.py
import os
import signal
import sys
from contextlib import asynccontextmanager
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
import torch
# Global model reference
model = None
@asynccontextmanager
async def lifespan(app: FastAPI):
"""Handle startup and shutdown."""
global model
print("🚀 Loading AI models...")
model = load_model()
print("✅ Models loaded, server ready")
yield
print("👋 Shutting down gracefully...")
cleanup_resources()
app = FastAPI(lifespan=lifespan)
# Allow requests from Tauri
app.add_middleware(
CORSMiddleware,
allow_origins=["tauri://localhost", "http://localhost:1420"],
allow_methods=["*"],
allow_headers=["*"],
)
class InferenceRequest(BaseModel):
prompt: str
max_tokens: int = 256
temperature: float = 0.7
class InferenceResponse(BaseModel):
result: str
tokens_used: int
inference_time_ms: float
@app.get("/health")
async def health():
"""Health check endpoint for Tauri."""
return {
"status": "healthy",
"model_loaded": model is not None,
"gpu_available": torch.cuda.is_available()
}
@app.post("/infer", response_model=InferenceResponse)
async def infer(req: InferenceRequest):
"""Run AI inference."""
import time
start = time.perf_counter()
try:
result = model.generate(
req.prompt,
max_tokens=req.max_tokens,
temperature=req.temperature
)
elapsed = (time.perf_counter() - start) * 1000
return InferenceResponse(
result=result.text,
tokens_used=result.token_count,
inference_time_ms=elapsed
)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
def load_model():
"""Load your AI model here."""
# Example: Load a local LLM
# from transformers import AutoModelForCausalLM, AutoTokenizer
# return AutoModelForCausalLM.from_pretrained("microsoft/phi-2")
pass
def cleanup_resources():
"""Free GPU memory, close connections."""
global model
if model:
del model
torch.cuda.empty_cache()
if __name__ == "__main__":
import uvicorn
port = int(os.environ.get("SIDECAR_PORT", 8000))
uvicorn.run(app, host="127.0.0.1", port=port)Spawning from Tauri (Rust)
Sidecar Management
// src-tauri/src/sidecar.rs
use std::process::{Child, Command, Stdio};
use std::sync::Mutex;
use tauri::State;
pub struct SidecarState {
pub process: Mutex<Option<Child>>,
}
#[tauri::command]
pub fn start_sidecar(state: State<SidecarState>) -> Result<u16, String> {
let port = 8000u16;
let child = Command::new("python")
.args(["-m", "uvicorn", "server:app", "--host", "127.0.0.1", "--port", &port.to_string()])
.current_dir("../sidecar")
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.spawn()
.map_err(|e| format!("Failed to start sidecar: {}", e))?;
*state.process.lock().unwrap() = Some(child);
Ok(port)
}
#[tauri::command]
pub fn stop_sidecar(state: State<SidecarState>) -> Result<(), String> {
if let Some(mut child) = state.process.lock().unwrap().take() {
child.kill().map_err(|e| e.to_string())?;
}
Ok(())
}Register in main.rs
// src-tauri/src/main.rs
mod sidecar;
fn main() {
tauri::Builder::default()
.manage(sidecar::SidecarState {
process: Mutex::new(None),
})
.invoke_handler(tauri::generate_handler![
sidecar::start_sidecar,
sidecar::stop_sidecar,
])
.run(tauri::generate_context!())
.expect("error running tauri application");
}The React Integration
useSidecar Hook
// src/hooks/useSidecar.ts
import { invoke } from '@tauri-apps/api/core';
import { useEffect, useState } from 'react';
interface SidecarStatus {
ready: boolean;
port: number | null;
error: string | null;
}
export function useSidecar() {
const [status, setStatus] = useState<SidecarStatus>({
ready: false,
port: null,
error: null,
});
useEffect(() => {
let mounted = true;
let attempts = 0;
const maxAttempts = 30;
async function startAndWait() {
try {
// Start the Python sidecar
const port = await invoke<number>('start_sidecar');
// Poll health endpoint until ready
while (attempts < maxAttempts && mounted) {
try {
const res = await fetch(`http://localhost:${port}/health`);
if (res.ok) {
const health = await res.json();
if (health.model_loaded) {
setStatus({ ready: true, port, error: null });
return;
}
}
} catch {
// Server not ready yet
}
attempts++;
await new Promise(r => setTimeout(r, 500));
}
setStatus({ ready: false, port, error: 'Sidecar failed to start' });
} catch (e) {
setStatus({ ready: false, port: null, error: String(e) });
}
}
startAndWait();
return () => {
mounted = false;
invoke('stop_sidecar').catch(console.error);
};
}, []);
return status;
}
AI Inference Hook
// src/hooks/useInference.ts
import { useMutation } from '@tanstack/react-query';
import { useSidecar } from './useSidecar';
interface InferenceParams {
prompt: string;
maxTokens?: number;
temperature?: number;
}
interface InferenceResult {
result: string;
tokens_used: number;
inference_time_ms: number;
}
export function useInference() {
const { port, ready } = useSidecar();
return useMutation({
mutationFn: async (params: InferenceParams): Promise<InferenceResult> => {
if (!ready || !port) {
throw new Error('Sidecar not ready');
}
const res = await fetch(`http://localhost:${port}/infer`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
prompt: params.prompt,
max_tokens: params.maxTokens ?? 256,
temperature: params.temperature ?? 0.7,
}),
});
if (!res.ok) {
const error = await res.json();
throw new Error(error.detail || 'Inference failed');
}
return res.json();
},
});
}
Usage in a Component
// src/components/AIChat.tsx
import { useState } from 'react';
import { useInference } from '../hooks/useInference';
import { useSidecar } from '../hooks/useSidecar';
export function AIChat() {
const [prompt, setPrompt] = useState('');
const sidecar = useSidecar();
const inference = useInference();
if (sidecar.error) {
return <div className="error">Failed to start AI: {sidecar.error}</div>;
}
if (!sidecar.ready) {
return (
<div className="loading">
<span className="spinner" />
Loading AI models...
</div>
);
}
return (
<div className="chat">
<textarea
value={prompt}
onChange={(e) => setPrompt(e.target.value)}
placeholder="Ask the AI anything..."
/>
<button
onClick={() => inference.mutate({ prompt })}
disabled={inference.isPending}
>
{inference.isPending ? 'Thinking...' : 'Send'}
</button>
{inference.data && (
<div className="response">
<p>{inference.data.result}</p>
<small>
{inference.data.tokens_used} tokens in {inference.data.inference_time_ms.toFixed(0)}ms
</small>
</div>
)}
{inference.error && (
<div className="error">{inference.error.message}</div>
)}
</div>
);
}
Streaming Responses
For long AI outputs, stream tokens as they’re generated:
Python Streaming Endpoint
from fastapi.responses import StreamingResponse
import asyncio
@app.post("/infer/stream")
async def infer_stream(req: InferenceRequest):
"""Stream tokens as they're generated."""
async def generate():
for token in model.generate_stream(req.prompt):
yield f"data: {token}\n\n"
await asyncio.sleep(0) # Yield control
yield "data: [DONE]\n\n"
return StreamingResponse(
generate(),
media_type="text/event-stream"
)React Streaming Hook
export function useStreamingInference() {
const { port } = useSidecar();
const [streaming, setStreaming] = useState(false);
const [tokens, setTokens] = useState<string[]>([]);
const stream = async (prompt: string) => {
setStreaming(true);
setTokens([]);
const res = await fetch(`http://localhost:${port}/infer/stream`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ prompt }),
});
const reader = res.body?.getReader();
const decoder = new TextDecoder();
while (reader) {
const { done, value } = await reader.read();
if (done) break;
const chunk = decoder.decode(value);
const lines = chunk.split('\n');
for (const line of lines) {
if (line.startsWith('data: ')) {
const data = line.slice(6);
if (data === '[DONE]') {
setStreaming(false);
return;
}
setTokens(prev => [...prev, data]);
}
}
}
};
return { stream, streaming, tokens, text: tokens.join('') };
}
TanStack AI: The Modern Alternative
If you’re building web-based AI experiences (not desktop sidecars), TanStack AI offers a streamlined, type-safe SDK for React. It’s currently in Alpha but rapidly maturing.
Sidecar Pattern → Local models (Ollama, PyTorch) in desktop apps
TanStack AI → Cloud providers (OpenAI, Anthropic, Gemini) in web apps
Architecture
sequenceDiagram
participant U as React UI
participant H as useChat Hook
participant A as /api/chat
participant P as AI Provider
U->>H: User types message
H->>A: POST messages[]
A->>P: chat({ adapter, messages, tools })
P-->>A: Stream tokens (SSE)
A-->>H: Stream forwarded
H-->>U: Update UI in real-time
Installation
npm install @tanstack/ai @tanstack/ai-react @tanstack/ai-openai zodServer Setup (Next.js API Route)
// app/api/chat/route.ts
import { chat, toServerSentEventsResponse } from '@tanstack/ai';
import { openaiText } from '@tanstack/ai-openai';
export async function POST(request: Request) {
const { messages, conversationId } = await request.json();
// Create streaming chat response
const stream = chat({
adapter: openaiText('gpt-4'), // Or 'gpt-3.5-turbo'
messages,
conversationId,
});
// Convert to HTTP SSE response
return toServerSentEventsResponse(stream);
}
Set OPENAI_API_KEY in .env.local. For Anthropic use ANTHROPIC_API_KEY, for Gemini use GEMINI_API_KEY.
React Chat Component
// components/Chat.tsx
import { useState } from 'react';
import { useChat, fetchServerSentEvents } from '@tanstack/ai-react';
export function Chat() {
const [input, setInput] = useState('');
const { messages, sendMessage, isLoading } = useChat({
connection: fetchServerSentEvents('/api/chat'),
});
const handleSubmit = (e: React.FormEvent) => {
e.preventDefault();
if (input.trim() && !isLoading) {
sendMessage(input);
setInput('');
}
};
return (
<div className="chat-container">
<div className="messages">
{messages.map((message) => (
<div key={message.id} className={`message ${message.role}`}>
<strong>{message.role === 'assistant' ? 'AI' : 'You'}</strong>
<div>
{message.parts.map((part, idx) => {
if (part.type === 'thinking') {
return <em key={idx}>💭 {part.content}</em>;
}
if (part.type === 'text') {
return <p key={idx}>{part.content}</p>;
}
return null;
})}
</div>
</div>
))}
</div>
<form onSubmit={handleSubmit}>
<input
value={input}
onChange={(e) => setInput(e.target.value)}
placeholder="Ask anything..."
disabled={isLoading}
/>
<button disabled={!input.trim() || isLoading}>
{isLoading ? 'Thinking...' : 'Send'}
</button>
</form>
</div>
);
}
AI Tools (Function Calling)
The killer feature: define tools that the AI can call during conversation.
// lib/tools.ts
import { toolDefinition } from '@tanstack/ai';
import { z } from 'zod';
// Define the tool schema
const searchProductsDef = toolDefinition({
name: 'searchProducts',
description: 'Search for products in the catalog',
inputSchema: z.object({
query: z.string().describe('Search query'),
maxResults: z.number().optional().default(5),
}),
outputSchema: z.array(z.object({
id: z.string(),
name: z.string(),
price: z.number(),
})),
});
// Server-side implementation
export const searchProducts = searchProductsDef.server(async ({ query, maxResults }) => {
// This runs on your server
const results = await db.products.search(query, { limit: maxResults });
return results;
});
// app/api/chat/route.ts
import { chat, toServerSentEventsResponse } from '@tanstack/ai';
import { openaiText } from '@tanstack/ai-openai';
import { searchProducts } from '@/lib/tools';
export async function POST(request: Request) {
const { messages } = await request.json();
const stream = chat({
adapter: openaiText('gpt-4'),
messages,
tools: [searchProducts], // AI can now search products!
});
return toServerSentEventsResponse(stream);
}
Now when a user asks “Find me wireless headphones under $100”, the AI will: 1. Recognize the intent 2. Call searchProducts({ query: 'wireless headphones', maxResults: 5 }) 3. Receive results from your database 4. Format a helpful response
Provider Adapters
Swap providers without changing your code:
import { openaiText } from '@tanstack/ai-openai';
import { anthropicText } from '@tanstack/ai-anthropic';
import { geminiText } from '@tanstack/ai-gemini';
import { ollamaText } from '@tanstack/ai-ollama';
// OpenAI
chat({ adapter: openaiText('gpt-4'), messages });
// Anthropic Claude
chat({ adapter: anthropicText('claude-3-opus'), messages });
// Google Gemini
chat({ adapter: geminiText('gemini-pro'), messages });
// Local Ollama (no API key needed!)
chat({ adapter: ollamaText('llama2'), messages });
When to Use What
| Scenario | Recommendation |
|---|---|
| Web app + Cloud AI (OpenAI, Claude) | TanStack AI |
| Desktop app + Local models | Sidecar Pattern |
| Web app + Local AI (Ollama) | TanStack AI with Ollama adapter |
| Need GPU acceleration | Sidecar Pattern (Python + PyTorch) |
| Rapid prototyping | TanStack AI (fewer moving parts) |
For full documentation, see tanstack.com/ai/latest.
Error Handling & Recovery
Automatic Restart
function useSidecarWithRecovery() {
const [restartCount, setRestartCount] = useState(0);
const sidecar = useSidecar();
useEffect(() => {
if (sidecar.error && restartCount < 3) {
const timer = setTimeout(() => {
setRestartCount(c => c + 1);
}, 2000);
return () => clearTimeout(timer);
}
}, [sidecar.error, restartCount]);
return {
...sidecar,
isRecovering: sidecar.error && restartCount < 3,
};
}
Graceful Degradation
function AIFeature() {
const sidecar = useSidecar();
if (!sidecar.ready) {
return <FallbackUI />; // Show non-AI features
}
return <AIChat />;
}
Performance Tips
| Optimization | Impact |
|---|---|
| Preload models on startup | Eliminates first-inference latency |
| Use GPU if available | 10-100x faster inference |
| Batch requests | Reduce per-request overhead |
| Keep connection alive | Avoid TCP handshake per request |
| Use WebSockets for bidirectional | Lower latency than HTTP polls |
Model Loading Strategy
# Load once at startup, not per-request
@asynccontextmanager
async def lifespan(app: FastAPI):
global model
# Check GPU availability
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"🎮 Using device: {device}")
# Load with appropriate precision
model = load_model().to(device)
if device == "cuda":
model = model.half() # FP16 for GPU
yield
del model
torch.cuda.empty_cache()Why This Architecture Wins
Decoupling: The UI team iterates on React while the AI team iterates on Python. The API contract is the only touchpoint.
Stability: If Python crashes (OOM, model error), the UI stays responsive. React shows “Reconnecting…” and relaunches the sidecar.
Ecosystem: Full access to
npmfor UI andpipfor AI—no compromises.Debugging: Test Python server independently with
curlor Postman. Test React with mocked responses.Deployment: Bundle Python with PyInstaller or ship a conda environment alongside your app.
In the next chapter, we’ll discuss how to render the complex visual outputs of these AI models using a High-Performance Canvas.