diff --git a/.changeset/realtime-chat.md b/.changeset/realtime-chat.md
new file mode 100644
index 000000000..43d570b19
--- /dev/null
+++ b/.changeset/realtime-chat.md
@@ -0,0 +1,18 @@
+---
+'@tanstack/ai': minor
+'@tanstack/ai-client': minor
+'@tanstack/ai-openai': minor
+'@tanstack/ai-elevenlabs': minor
+'@tanstack/ai-react': minor
+---
+
+feat: add realtime voice chat with OpenAI and ElevenLabs adapters
+
+Adds realtime voice/text chat capabilities:
+
+- **@tanstack/ai**: `realtimeToken()` function and shared realtime types (`RealtimeToken`, `RealtimeMessage`, `RealtimeSessionConfig`, `RealtimeStatus`, `RealtimeMode`, `AudioVisualization`, events, and error types)
+- **@tanstack/ai-client**: Framework-agnostic `RealtimeClient` class with connection lifecycle, audio I/O, message state management, tool execution, and `RealtimeAdapter`/`RealtimeConnection` interfaces
+- **@tanstack/ai-openai**: `openaiRealtime()` client adapter (WebRTC) and `openaiRealtimeToken()` server token adapter with support for semantic VAD, multiple voices, and all realtime models
+- **@tanstack/ai-elevenlabs**: `elevenlabsRealtime()` client adapter (WebSocket) and `elevenlabsRealtimeToken()` server token adapter for ElevenLabs conversational AI agents
+- **@tanstack/ai-react**: `useRealtimeChat()` hook with reactive state for status, mode, messages, pending transcripts, audio visualization levels, VAD control, text/image input, and interruptions
+- **Docs**: Realtime Voice Chat guide and full API reference for all realtime classes, interfaces, functions, and type aliases
diff --git a/docs/config.json b/docs/config.json
index 23330cb22..490bbf3e7 100644
--- a/docs/config.json
+++ b/docs/config.json
@@ -78,6 +78,10 @@
"label": "Runtime Adapter Switching",
"to": "guides/runtime-adapter-switching"
},
+ {
+ "label": "Realtime Voice Chat",
+ "to": "guides/realtime-chat"
+ },
{
"label": "Text-to-Speech",
"to": "guides/text-to-speech"
@@ -228,6 +232,10 @@
"label": "ToolCallManager",
"to": "reference/classes/ToolCallManager"
},
+ {
+ "label": "RealtimeClient",
+ "to": "reference/classes/RealtimeClient"
+ },
{
"label": "WordBoundaryStrategy",
"to": "reference/classes/WordBoundaryStrategy"
@@ -315,6 +323,10 @@
"label": "uiMessageToModelMessages",
"to": "reference/functions/uiMessageToModelMessages"
},
+ {
+ "label": "realtimeToken",
+ "to": "reference/functions/realtimeToken"
+ },
{
"label": "untilFinishReason",
"to": "reference/functions/untilFinishReason"
@@ -346,6 +358,10 @@
"label": "AudioPart",
"to": "reference/interfaces/AudioPart"
},
+ {
+ "label": "AudioVisualization",
+ "to": "reference/interfaces/AudioVisualization"
+ },
{
"label": "BaseStreamChunk",
"to": "reference/interfaces/BaseStreamChunk"
@@ -426,6 +442,38 @@
"label": "ProcessorState",
"to": "reference/interfaces/ProcessorState"
},
+ {
+ "label": "RealtimeAdapter",
+ "to": "reference/interfaces/RealtimeAdapter"
+ },
+ {
+ "label": "RealtimeClientOptions",
+ "to": "reference/interfaces/RealtimeClientOptions"
+ },
+ {
+ "label": "RealtimeConnection",
+ "to": "reference/interfaces/RealtimeConnection"
+ },
+ {
+ "label": "RealtimeMessage",
+ "to": "reference/interfaces/RealtimeMessage"
+ },
+ {
+ "label": "RealtimeSessionConfig",
+ "to": "reference/interfaces/RealtimeSessionConfig"
+ },
+ {
+ "label": "RealtimeToken",
+ "to": "reference/interfaces/RealtimeToken"
+ },
+ {
+ "label": "RealtimeTokenAdapter",
+ "to": "reference/interfaces/RealtimeTokenAdapter"
+ },
+ {
+ "label": "RealtimeTokenOptions",
+ "to": "reference/interfaces/RealtimeTokenOptions"
+ },
{
"label": "ResponseFormat",
"to": "reference/interfaces/ResponseFormat"
@@ -581,6 +629,22 @@
"label": "MessagePart",
"to": "reference/type-aliases/MessagePart"
},
+ {
+ "label": "RealtimeEvent",
+ "to": "reference/type-aliases/RealtimeEvent"
+ },
+ {
+ "label": "RealtimeMessagePart",
+ "to": "reference/type-aliases/RealtimeMessagePart"
+ },
+ {
+ "label": "RealtimeMode",
+ "to": "reference/type-aliases/RealtimeMode"
+ },
+ {
+ "label": "RealtimeStatus",
+ "to": "reference/type-aliases/RealtimeStatus"
+ },
{
"label": "ModalitiesArrayToUnion",
"to": "reference/type-aliases/ModalitiesArrayToUnion"
diff --git a/docs/guides/realtime-chat.md b/docs/guides/realtime-chat.md
new file mode 100644
index 000000000..afe2943c6
--- /dev/null
+++ b/docs/guides/realtime-chat.md
@@ -0,0 +1,446 @@
+---
+title: Realtime Voice Chat
+id: realtime-chat
+order: 14
+---
+
+TanStack AI provides a complete realtime voice chat system for building voice-to-voice AI interactions. The realtime API supports multiple providers (OpenAI, ElevenLabs), automatic tool execution, audio visualization, and multimodal input including images.
+
+## Overview
+
+Realtime voice chat differs from text-based chat in several key ways:
+
+- **Bidirectional audio** - Users speak into a microphone, and the AI responds with synthesized voice
+- **Voice Activity Detection (VAD)** - Automatically detects when the user starts and stops speaking
+- **Interruptions** - Users can interrupt the AI mid-response
+- **Low latency** - Uses WebRTC or WebSocket connections for near-instant communication
+- **Multimodal** - Supports text input, image input, and tool calling alongside voice
+
+The realtime system follows the same adapter architecture as the rest of TanStack AI:
+
+1. **Server** generates ephemeral tokens using `realtimeToken()` with a provider-specific token adapter
+2. **Client** connects using `RealtimeClient` (or `useRealtimeChat` in React) with a provider-specific connection adapter
+3. **Provider adapters** handle the protocol differences between OpenAI WebRTC, ElevenLabs WebSocket, etc.
+
+## Quick Start
+
+### 1. Set Up the Server Token Endpoint
+
+The server generates short-lived tokens so your API keys never reach the client:
+
+```typescript
+import { realtimeToken } from '@tanstack/ai'
+import { openaiRealtimeToken } from '@tanstack/ai-openai'
+import { createServerFn } from '@tanstack/react-start'
+
+const getRealtimeToken = createServerFn({ method: 'POST' })
+ .handler(async () => {
+ return realtimeToken({
+ adapter: openaiRealtimeToken({
+ model: 'gpt-4o-realtime-preview',
+ }),
+ })
+ })
+```
+
+> **Note:** The `realtimeToken()` function works with any server framework. The example above uses TanStack Start, but you can use Express, Hono, Fastify, or any other framework that can handle HTTP requests.
+
+### 2. Connect from the Client (React)
+
+```typescript
+import { useRealtimeChat } from '@tanstack/ai-react'
+import { openaiRealtime } from '@tanstack/ai-openai'
+
+function VoiceChat() {
+ const {
+ status,
+ mode,
+ messages,
+ connect,
+ disconnect,
+ pendingUserTranscript,
+ pendingAssistantTranscript,
+ inputLevel,
+ outputLevel,
+ } = useRealtimeChat({
+ getToken: () => fetch('/api/realtime-token', { method: 'POST' }).then(r => r.json()),
+ adapter: openaiRealtime(),
+ instructions: 'You are a helpful voice assistant.',
+ voice: 'alloy',
+ })
+
+ return (
+
+
Status: {status}
+
Mode: {mode}
+
+ {pendingUserTranscript &&
You: {pendingUserTranscript}...
}
+ {pendingAssistantTranscript &&
AI: {pendingAssistantTranscript}...
}
+ {messages.map((msg) => (
+
+ {msg.role}:
+ {msg.parts.map((part, i) => (
+
+ {part.type === 'text' ? part.content : null}
+ {part.type === 'audio' ? part.transcript : null}
+
+ ))}
+
+ ))}
+
+ )
+}
+```
+
+## Providers
+
+### OpenAI Realtime
+
+OpenAI's realtime API uses WebRTC for low-latency voice communication.
+
+**Server (token generation):**
+
+```typescript
+import { realtimeToken } from '@tanstack/ai'
+import { openaiRealtimeToken } from '@tanstack/ai-openai'
+
+const token = await realtimeToken({
+ adapter: openaiRealtimeToken({
+ model: 'gpt-4o-realtime-preview',
+ }),
+})
+```
+
+**Client (connection):**
+
+```typescript
+import { openaiRealtime } from '@tanstack/ai-openai'
+
+const adapter = openaiRealtime()
+```
+
+**Environment variables:** `OPENAI_API_KEY`
+
+**Available models:**
+
+| Model | Description |
+|-------|-------------|
+| `gpt-4o-realtime-preview` | Full realtime model |
+| `gpt-4o-mini-realtime-preview` | Smaller, faster realtime model |
+| `gpt-realtime` | Latest realtime model |
+| `gpt-realtime-mini` | Latest mini realtime model |
+
+**Available voices:** `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`, `cedar`
+
+### ElevenLabs Realtime
+
+ElevenLabs uses WebSocket connections and requires an agent configured in their dashboard.
+
+**Server (token generation):**
+
+```typescript
+import { realtimeToken } from '@tanstack/ai'
+import { elevenlabsRealtimeToken } from '@tanstack/ai-elevenlabs'
+
+const token = await realtimeToken({
+ adapter: elevenlabsRealtimeToken({
+ agentId: 'your-agent-id',
+ }),
+})
+```
+
+**Client (connection):**
+
+```typescript
+import { elevenlabsRealtime } from '@tanstack/ai-elevenlabs'
+
+const adapter = elevenlabsRealtime()
+```
+
+**Environment variables:** `ELEVENLABS_API_KEY`, `ELEVENLABS_AGENT_ID` (optional)
+
+## Voice Activity Detection (VAD)
+
+VAD controls how the system detects when the user is speaking. Three modes are available:
+
+| Mode | Description |
+|------|-------------|
+| `server` | Provider handles speech detection server-side (default) |
+| `semantic` | Uses semantic understanding to detect turn boundaries (OpenAI only) |
+| `manual` | Application controls when to listen via `startListening()`/`stopListening()` |
+
+```typescript
+const chat = useRealtimeChat({
+ // ...
+ vadMode: 'semantic',
+ semanticEagerness: 'medium', // 'low' | 'medium' | 'high'
+})
+```
+
+With `manual` VAD mode, use push-to-talk style interactions:
+
+```typescript
+const { startListening, stopListening } = useRealtimeChat({
+ vadMode: 'manual',
+ autoCapture: false,
+ // ...
+})
+
+// In your UI
+
+```
+
+## Tools
+
+Realtime sessions support client-side tools. Define tools using the standard `toolDefinition()` API and pass their client implementations:
+
+```typescript
+import { toolDefinition } from '@tanstack/ai'
+import { z } from 'zod'
+
+const getWeatherDef = toolDefinition({
+ name: 'getWeather',
+ description: 'Get weather for a location',
+ inputSchema: z.object({
+ location: z.string().meta({ description: 'City name' }),
+ }),
+ outputSchema: z.object({
+ temperature: z.number(),
+ conditions: z.string(),
+ }),
+})
+
+const getWeather = getWeatherDef.client(async ({ location }) => {
+ const res = await fetch(`/api/weather?location=${location}`)
+ return res.json()
+})
+
+// Pass tools to the hook
+const chat = useRealtimeChat({
+ // ...
+ tools: [getWeather],
+})
+```
+
+The realtime client automatically executes tool calls and sends results back to the provider. Tool calls appear as `tool-call` and `tool-result` parts in messages.
+
+## Text and Image Input
+
+In addition to voice, you can send text messages and images:
+
+```typescript
+const { sendText, sendImage } = useRealtimeChat({ /* ... */ })
+
+// Send a text message
+sendText('What is the weather like today?')
+
+// Send an image (base64 data or URL)
+sendImage(base64ImageData, 'image/png')
+```
+
+## Audio Visualization
+
+The hook provides real-time audio level data for building visualizations:
+
+```typescript
+const {
+ inputLevel, // 0-1 normalized microphone volume
+ outputLevel, // 0-1 normalized speaker volume
+ getInputFrequencyData, // Uint8Array for frequency spectrum
+ getOutputFrequencyData,
+ getInputTimeDomainData, // Uint8Array for waveform
+ getOutputTimeDomainData,
+} = useRealtimeChat({ /* ... */ })
+```
+
+The `inputLevel` and `outputLevel` values update on every animation frame while connected, making them suitable for driving CSS animations or canvas visualizations:
+
+```typescript
+function AudioIndicator({ level }: { level: number }) {
+ return (
+
+ )
+}
+```
+
+For more detailed visualizations, use the frequency and time-domain data getters inside a `requestAnimationFrame` loop.
+
+## Session Configuration
+
+Configure the realtime session through the hook options:
+
+| Option | Type | Default | Description |
+|--------|------|---------|-------------|
+| `getToken` | `() => Promise` | required | Function to fetch a token from the server |
+| `adapter` | `RealtimeAdapter` | required | Provider adapter (`openaiRealtime()`, `elevenlabsRealtime()`) |
+| `instructions` | `string` | — | System instructions for the assistant |
+| `voice` | `string` | — | Voice to use for audio output |
+| `tools` | `AnyClientTool[]` | — | Client-side tools with execution logic |
+| `vadMode` | `'server' \| 'semantic' \| 'manual'` | `'server'` | Voice activity detection mode |
+| `semanticEagerness` | `'low' \| 'medium' \| 'high'` | — | Eagerness for semantic VAD |
+| `autoPlayback` | `boolean` | `true` | Auto-play assistant audio |
+| `autoCapture` | `boolean` | `true` | Request microphone on connect |
+| `outputModalities` | `Array<'audio' \| 'text'>` | — | Response modalities |
+| `temperature` | `number` | — | Generation temperature |
+| `maxOutputTokens` | `number \| 'inf'` | — | Max tokens in a response |
+
+## Connection Lifecycle
+
+The realtime client manages a connection lifecycle with these statuses:
+
+| Status | Description |
+|--------|-------------|
+| `idle` | Not connected |
+| `connecting` | Establishing connection |
+| `connected` | Active session |
+| `reconnecting` | Reconnecting after interruption |
+| `error` | Connection error occurred |
+
+And these modes while connected:
+
+| Mode | Description |
+|------|-------------|
+| `idle` | Connected but not actively interacting |
+| `listening` | Capturing user audio input |
+| `thinking` | Processing user input |
+| `speaking` | AI is generating a response |
+
+```typescript
+const { status, mode, error, connect, disconnect } = useRealtimeChat({ /* ... */ })
+
+// Handle connection
+useEffect(() => {
+ if (status === 'error' && error) {
+ console.error('Connection error:', error.message)
+ }
+}, [status, error])
+```
+
+## Interruptions
+
+Users can interrupt the AI while it's speaking:
+
+```typescript
+const { interrupt, mode } = useRealtimeChat({ /* ... */ })
+
+// Programmatically interrupt
+if (mode === 'speaking') {
+ interrupt()
+}
+```
+
+With server or semantic VAD, interruptions happen automatically when the user starts speaking. Interrupted messages are marked with `interrupted: true` in the messages array.
+
+## Using RealtimeClient Directly
+
+For non-React applications or more control, use `RealtimeClient` directly:
+
+```typescript
+import { RealtimeClient } from '@tanstack/ai-client'
+import { openaiRealtime } from '@tanstack/ai-openai'
+
+const client = new RealtimeClient({
+ getToken: () => fetch('/api/realtime-token', { method: 'POST' }).then(r => r.json()),
+ adapter: openaiRealtime(),
+ instructions: 'You are a helpful assistant.',
+ voice: 'alloy',
+ onMessage: (message) => {
+ console.log(`${message.role}:`, message.parts)
+ },
+ onStatusChange: (status) => {
+ console.log('Status:', status)
+ },
+ onModeChange: (mode) => {
+ console.log('Mode:', mode)
+ },
+})
+
+// Connect
+await client.connect()
+
+// Send text
+client.sendText('Hello!')
+
+// Subscribe to state changes
+const unsub = client.onStateChange((state) => {
+ console.log('Messages:', state.messages.length)
+})
+
+// Disconnect when done
+await client.disconnect()
+
+// Clean up
+client.destroy()
+```
+
+## Message Structure
+
+Realtime messages use a `parts`-based structure similar to `UIMessage`:
+
+```typescript
+interface RealtimeMessage {
+ id: string
+ role: 'user' | 'assistant'
+ timestamp: number
+ parts: Array
+ interrupted?: boolean
+}
+```
+
+Each part can be one of:
+
+| Part Type | Fields | Description |
+|-----------|--------|-------------|
+| `text` | `content` | Text content from `sendText()` |
+| `audio` | `transcript`, `durationMs` | Transcribed voice content |
+| `tool-call` | `id`, `name`, `arguments`, `input`, `output` | Tool invocation |
+| `tool-result` | `toolCallId`, `content` | Tool execution result |
+| `image` | `data`, `mimeType` | Image sent via `sendImage()` |
+
+## Error Handling
+
+Handle errors through the `onError` callback or the `error` state:
+
+```typescript
+const { error } = useRealtimeChat({
+ // ...
+ onError: (err) => {
+ if (err.message.includes('Permission denied')) {
+ alert('Microphone access is required for voice chat.')
+ } else {
+ console.error('Realtime error:', err)
+ }
+ },
+})
+```
+
+## Best Practices
+
+1. **Token security** - Always generate tokens server-side. Never expose API keys to the client.
+2. **Microphone permissions** - Handle the case where the user denies microphone access gracefully.
+3. **Cleanup** - Always disconnect when unmounting components. The `useRealtimeChat` hook handles this automatically.
+4. **Instructions** - Keep voice assistant instructions concise. Remind the model it's in a voice interface so responses stay conversational.
+5. **Tool design** - Keep tool descriptions clear and tool outputs small, since results are processed in real time.
+6. **Error recovery** - Implement retry logic for transient connection failures.
+
+## Next Steps
+
+- [Tools](./tools) - Learn about the isomorphic tool system
+- [Text-to-Speech](./text-to-speech) - Non-realtime speech generation
+- [Multimodal Content](./multimodal-content) - Working with images, audio, and video
diff --git a/docs/reference/classes/RealtimeClient.md b/docs/reference/classes/RealtimeClient.md
new file mode 100644
index 000000000..6e78100de
--- /dev/null
+++ b/docs/reference/classes/RealtimeClient.md
@@ -0,0 +1,276 @@
+---
+id: RealtimeClient
+title: RealtimeClient
+---
+
+# Class: RealtimeClient
+
+Defined in: [realtime-client.ts](https://github.com/TanStack/ai/blob/main/packages/typescript/ai-client/src/realtime-client.ts)
+
+Client for managing realtime voice conversations.
+
+Handles connection lifecycle, audio I/O, message state, and tool execution for realtime voice-to-voice AI interactions. This is the framework-agnostic core that powers `useRealtimeChat` in React.
+
+## Example
+
+```typescript
+import { RealtimeClient } from '@tanstack/ai-client'
+import { openaiRealtime } from '@tanstack/ai-openai'
+
+const client = new RealtimeClient({
+ getToken: () => fetch('/api/realtime-token').then(r => r.json()),
+ adapter: openaiRealtime(),
+ tools: [myTool.client(handler)],
+ onMessage: (msg) => console.log('Message:', msg),
+})
+
+await client.connect()
+```
+
+## Constructors
+
+### Constructor
+
+```ts
+new RealtimeClient(options): RealtimeClient;
+```
+
+#### Parameters
+
+##### options
+
+[`RealtimeClientOptions`](../interfaces/RealtimeClientOptions.md)
+
+Configuration options for the client.
+
+#### Returns
+
+`RealtimeClient`
+
+## Properties
+
+### status
+
+```ts
+readonly status: RealtimeStatus;
+```
+
+Current connection status (`'idle'`, `'connecting'`, `'connected'`, `'reconnecting'`, `'error'`).
+
+***
+
+### mode
+
+```ts
+readonly mode: RealtimeMode;
+```
+
+Current session mode (`'idle'`, `'listening'`, `'thinking'`, `'speaking'`).
+
+***
+
+### messages
+
+```ts
+readonly messages: Array;
+```
+
+Array of conversation messages. Updated as transcripts are finalized and messages complete.
+
+***
+
+### error
+
+```ts
+readonly error: Error | null;
+```
+
+Current error, if any.
+
+***
+
+### pendingUserTranscript
+
+```ts
+readonly pendingUserTranscript: string | null;
+```
+
+Partial transcript of what the user is currently saying (before finalization).
+
+***
+
+### pendingAssistantTranscript
+
+```ts
+readonly pendingAssistantTranscript: string | null;
+```
+
+Partial transcript of the assistant's current response (while speaking).
+
+***
+
+### audio
+
+```ts
+readonly audio: AudioVisualization | null;
+```
+
+Audio visualization data for the current connection. Returns `null` when not connected.
+
+## Methods
+
+### connect()
+
+```ts
+connect(): Promise;
+```
+
+Connect to the realtime session. Fetches a token via `getToken()` and establishes the connection through the adapter.
+
+#### Returns
+
+`Promise`
+
+#### Throws
+
+If token fetch or connection fails.
+
+***
+
+### disconnect()
+
+```ts
+disconnect(): Promise;
+```
+
+Disconnect from the realtime session. Cleans up audio resources, event subscriptions, and token refresh timers.
+
+#### Returns
+
+`Promise`
+
+***
+
+### startListening()
+
+```ts
+startListening(): void;
+```
+
+Start listening for voice input. Only needed when `vadMode` is `'manual'`.
+
+#### Returns
+
+`void`
+
+***
+
+### stopListening()
+
+```ts
+stopListening(): void;
+```
+
+Stop listening for voice input. Only needed when `vadMode` is `'manual'`.
+
+#### Returns
+
+`void`
+
+***
+
+### interrupt()
+
+```ts
+interrupt(): void;
+```
+
+Interrupt the current assistant response.
+
+#### Returns
+
+`void`
+
+***
+
+### sendText()
+
+```ts
+sendText(text): void;
+```
+
+Send a text message instead of voice.
+
+#### Parameters
+
+##### text
+
+`string`
+
+#### Returns
+
+`void`
+
+***
+
+### sendImage()
+
+```ts
+sendImage(imageData, mimeType): void;
+```
+
+Send an image to the conversation.
+
+#### Parameters
+
+##### imageData
+
+`string`
+
+Base64-encoded image data or a URL.
+
+##### mimeType
+
+`string`
+
+MIME type of the image (e.g., `'image/png'`, `'image/jpeg'`).
+
+#### Returns
+
+`void`
+
+***
+
+### onStateChange()
+
+```ts
+onStateChange(callback): () => void;
+```
+
+Subscribe to state changes. The callback is invoked whenever the internal state updates (status, mode, messages, transcripts, errors).
+
+#### Parameters
+
+##### callback
+
+[`RealtimeStateChangeCallback`](../type-aliases/RealtimeStateChangeCallback.md)
+
+#### Returns
+
+`() => void`
+
+Unsubscribe function.
+
+***
+
+### destroy()
+
+```ts
+destroy(): void;
+```
+
+Clean up all resources. Disconnects, clears subscriptions, and releases audio resources. Call this when disposing of the client.
+
+#### Returns
+
+`void`
diff --git a/docs/reference/functions/realtimeToken.md b/docs/reference/functions/realtimeToken.md
new file mode 100644
index 000000000..22515aef9
--- /dev/null
+++ b/docs/reference/functions/realtimeToken.md
@@ -0,0 +1,75 @@
+---
+id: realtimeToken
+title: realtimeToken
+---
+
+# Function: realtimeToken()
+
+```ts
+function realtimeToken(options): Promise;
+```
+
+Defined in: [realtime/index.ts:33](https://github.com/TanStack/ai/blob/main/packages/typescript/ai/src/realtime/index.ts#L33)
+
+Generate a realtime token using the provided adapter.
+
+This function is used on the **server** to generate ephemeral tokens that clients can use to establish realtime connections. The token contains authentication credentials and session configuration, and is typically short-lived (e.g., 10 minutes for OpenAI, 30 minutes for ElevenLabs).
+
+## Parameters
+
+### options
+
+[`RealtimeTokenOptions`](../interfaces/RealtimeTokenOptions.md)
+
+Token generation options including the provider-specific adapter.
+
+## Returns
+
+`Promise<`[`RealtimeToken`](../interfaces/RealtimeToken.md)`>`
+
+A token containing the provider credentials, expiration, and session config.
+
+## Examples
+
+### OpenAI
+
+```typescript
+import { realtimeToken } from '@tanstack/ai'
+import { openaiRealtimeToken } from '@tanstack/ai-openai'
+
+const token = await realtimeToken({
+ adapter: openaiRealtimeToken({
+ model: 'gpt-4o-realtime-preview',
+ }),
+})
+```
+
+### ElevenLabs
+
+```typescript
+import { realtimeToken } from '@tanstack/ai'
+import { elevenlabsRealtimeToken } from '@tanstack/ai-elevenlabs'
+
+const token = await realtimeToken({
+ adapter: elevenlabsRealtimeToken({
+ agentId: 'your-agent-id',
+ }),
+})
+```
+
+### TanStack Start Server Function
+
+```typescript
+import { createServerFn } from '@tanstack/react-start'
+import { realtimeToken } from '@tanstack/ai'
+import { openaiRealtimeToken } from '@tanstack/ai-openai'
+
+export const getRealtimeToken = createServerFn({ method: 'POST' })
+ .handler(async () => {
+ return realtimeToken({
+ adapter: openaiRealtimeToken({
+ model: 'gpt-4o-realtime-preview',
+ }),
+ })
+ })
+```
diff --git a/docs/reference/interfaces/AudioVisualization.md b/docs/reference/interfaces/AudioVisualization.md
new file mode 100644
index 000000000..48f9a8a2f
--- /dev/null
+++ b/docs/reference/interfaces/AudioVisualization.md
@@ -0,0 +1,126 @@
+---
+id: AudioVisualization
+title: AudioVisualization
+---
+
+# Interface: AudioVisualization
+
+Defined in: [realtime/types.ts](https://github.com/TanStack/ai/blob/main/packages/typescript/ai/src/realtime/types.ts)
+
+Interface for accessing audio visualization data from a realtime connection. Provides volume levels, frequency data, and time-domain data for both input (microphone) and output (speaker) audio.
+
+## Properties
+
+### inputLevel
+
+```ts
+readonly inputLevel: number;
+```
+
+Input volume level (0-1 normalized).
+
+***
+
+### outputLevel
+
+```ts
+readonly outputLevel: number;
+```
+
+Output volume level (0-1 normalized).
+
+***
+
+### inputSampleRate
+
+```ts
+readonly inputSampleRate: number;
+```
+
+Input audio sample rate in Hz.
+
+***
+
+### outputSampleRate
+
+```ts
+readonly outputSampleRate: number;
+```
+
+Output audio sample rate in Hz.
+
+## Methods
+
+### getInputFrequencyData()
+
+```ts
+getInputFrequencyData(): Uint8Array;
+```
+
+Get frequency data for input audio visualization.
+
+#### Returns
+
+`Uint8Array`
+
+***
+
+### getOutputFrequencyData()
+
+```ts
+getOutputFrequencyData(): Uint8Array;
+```
+
+Get frequency data for output audio visualization.
+
+#### Returns
+
+`Uint8Array`
+
+***
+
+### getInputTimeDomainData()
+
+```ts
+getInputTimeDomainData(): Uint8Array;
+```
+
+Get time domain data for input waveform visualization.
+
+#### Returns
+
+`Uint8Array`
+
+***
+
+### getOutputTimeDomainData()
+
+```ts
+getOutputTimeDomainData(): Uint8Array;
+```
+
+Get time domain data for output waveform visualization.
+
+#### Returns
+
+`Uint8Array`
+
+***
+
+### onInputAudio?
+
+```ts
+optional onInputAudio: (callback: (samples: Float32Array, sampleRate: number) => void) => () => void;
+```
+
+Subscribe to raw input audio samples. Returns an unsubscribe function.
+
+***
+
+### onOutputAudio?
+
+```ts
+optional onOutputAudio: (callback: (samples: Float32Array, sampleRate: number) => void) => () => void;
+```
+
+Subscribe to raw output audio samples. Returns an unsubscribe function.
diff --git a/docs/reference/interfaces/RealtimeAdapter.md b/docs/reference/interfaces/RealtimeAdapter.md
new file mode 100644
index 000000000..caea97869
--- /dev/null
+++ b/docs/reference/interfaces/RealtimeAdapter.md
@@ -0,0 +1,48 @@
+---
+id: RealtimeAdapter
+title: RealtimeAdapter
+---
+
+# Interface: RealtimeAdapter
+
+Defined in: [realtime-types.ts](https://github.com/TanStack/ai/blob/main/packages/typescript/ai-client/src/realtime-types.ts)
+
+Adapter interface for connecting to realtime providers. Each provider (OpenAI, ElevenLabs, etc.) implements this interface.
+
+## Properties
+
+### provider
+
+```ts
+provider: string;
+```
+
+Provider identifier (e.g., `'openai'`, `'elevenlabs'`).
+
+## Methods
+
+### connect()
+
+```ts
+connect(token, clientTools?): Promise;
+```
+
+Create a connection using the provided token.
+
+#### Parameters
+
+##### token
+
+[`RealtimeToken`](./RealtimeToken.md)
+
+The ephemeral token from the server.
+
+##### clientTools?
+
+`ReadonlyArray`
+
+Optional client-side tools to register with the provider.
+
+#### Returns
+
+`Promise<`[`RealtimeConnection`](./RealtimeConnection.md)`>`
diff --git a/docs/reference/interfaces/RealtimeClientOptions.md b/docs/reference/interfaces/RealtimeClientOptions.md
new file mode 100644
index 000000000..7a3a62034
--- /dev/null
+++ b/docs/reference/interfaces/RealtimeClientOptions.md
@@ -0,0 +1,204 @@
+---
+id: RealtimeClientOptions
+title: RealtimeClientOptions
+---
+
+# Interface: RealtimeClientOptions
+
+Defined in: [realtime-types.ts](https://github.com/TanStack/ai/blob/main/packages/typescript/ai-client/src/realtime-types.ts)
+
+Options for the `RealtimeClient` and `useRealtimeChat` hook.
+
+## Properties
+
+### getToken
+
+```ts
+getToken: () => Promise;
+```
+
+Function to fetch a realtime token from the server. Called on connect and when the token needs refresh.
+
+***
+
+### adapter
+
+```ts
+adapter: RealtimeAdapter;
+```
+
+The realtime adapter to use (e.g., `openaiRealtime()`, `elevenlabsRealtime()`).
+
+***
+
+### tools?
+
+```ts
+optional tools: ReadonlyArray;
+```
+
+Client-side tools with execution logic.
+
+***
+
+### autoPlayback?
+
+```ts
+optional autoPlayback: boolean;
+```
+
+Auto-play assistant audio responses. Default: `true`.
+
+***
+
+### autoCapture?
+
+```ts
+optional autoCapture: boolean;
+```
+
+Request microphone access on connect. Default: `true`.
+
+***
+
+### instructions?
+
+```ts
+optional instructions: string;
+```
+
+System instructions for the assistant.
+
+***
+
+### voice?
+
+```ts
+optional voice: string;
+```
+
+Voice to use for audio output (provider-specific, e.g., `'alloy'` for OpenAI).
+
+***
+
+### vadMode?
+
+```ts
+optional vadMode: 'server' | 'semantic' | 'manual';
+```
+
+Voice activity detection mode. Default: `'server'`.
+
+- `'server'` — Provider handles speech detection server-side
+- `'semantic'` — Semantic turn detection (OpenAI only)
+- `'manual'` — Application controls via `startListening()`/`stopListening()`
+
+***
+
+### outputModalities?
+
+```ts
+optional outputModalities: Array<'audio' | 'text'>;
+```
+
+Output modalities for responses.
+
+***
+
+### temperature?
+
+```ts
+optional temperature: number;
+```
+
+Temperature for generation (provider-specific range).
+
+***
+
+### maxOutputTokens?
+
+```ts
+optional maxOutputTokens: number | 'inf';
+```
+
+Maximum number of tokens in a response.
+
+***
+
+### semanticEagerness?
+
+```ts
+optional semanticEagerness: 'low' | 'medium' | 'high';
+```
+
+Eagerness level for semantic VAD.
+
+***
+
+### onStatusChange?
+
+```ts
+optional onStatusChange: (status: RealtimeStatus) => void;
+```
+
+Called when connection status changes.
+
+***
+
+### onModeChange?
+
+```ts
+optional onModeChange: (mode: RealtimeMode) => void;
+```
+
+Called when session mode changes.
+
+***
+
+### onMessage?
+
+```ts
+optional onMessage: (message: RealtimeMessage) => void;
+```
+
+Called when a new message is added to the conversation.
+
+***
+
+### onError?
+
+```ts
+optional onError: (error: Error) => void;
+```
+
+Called when an error occurs.
+
+***
+
+### onConnect?
+
+```ts
+optional onConnect: () => void;
+```
+
+Called when connection is established.
+
+***
+
+### onDisconnect?
+
+```ts
+optional onDisconnect: () => void;
+```
+
+Called when disconnected.
+
+***
+
+### onInterrupted?
+
+```ts
+optional onInterrupted: () => void;
+```
+
+Called when the assistant's response is interrupted.
diff --git a/docs/reference/interfaces/RealtimeConnection.md b/docs/reference/interfaces/RealtimeConnection.md
new file mode 100644
index 000000000..00eff5faf
--- /dev/null
+++ b/docs/reference/interfaces/RealtimeConnection.md
@@ -0,0 +1,210 @@
+---
+id: RealtimeConnection
+title: RealtimeConnection
+---
+
+# Interface: RealtimeConnection
+
+Defined in: [realtime-types.ts](https://github.com/TanStack/ai/blob/main/packages/typescript/ai-client/src/realtime-types.ts)
+
+Connection interface representing an active realtime session. Handles audio I/O, events, and session management. Returned by `RealtimeAdapter.connect()`.
+
+## Methods
+
+### disconnect()
+
+```ts
+disconnect(): Promise;
+```
+
+Disconnect from the realtime session.
+
+#### Returns
+
+`Promise`
+
+***
+
+### startAudioCapture()
+
+```ts
+startAudioCapture(): Promise;
+```
+
+Start capturing audio from the microphone.
+
+#### Returns
+
+`Promise`
+
+***
+
+### stopAudioCapture()
+
+```ts
+stopAudioCapture(): void;
+```
+
+Stop capturing audio.
+
+#### Returns
+
+`void`
+
+***
+
+### sendText()
+
+```ts
+sendText(text): void;
+```
+
+Send a text message (fallback for when voice isn't available).
+
+#### Parameters
+
+##### text
+
+`string`
+
+#### Returns
+
+`void`
+
+***
+
+### sendImage()
+
+```ts
+sendImage(imageData, mimeType): void;
+```
+
+Send an image to the conversation.
+
+#### Parameters
+
+##### imageData
+
+`string`
+
+Base64-encoded image data or a URL.
+
+##### mimeType
+
+`string`
+
+MIME type of the image.
+
+#### Returns
+
+`void`
+
+***
+
+### sendToolResult()
+
+```ts
+sendToolResult(callId, result): void;
+```
+
+Send a tool execution result back to the provider.
+
+#### Parameters
+
+##### callId
+
+`string`
+
+The tool call identifier.
+
+##### result
+
+`string`
+
+JSON-serialized result.
+
+#### Returns
+
+`void`
+
+***
+
+### updateSession()
+
+```ts
+updateSession(config): void;
+```
+
+Update session configuration.
+
+#### Parameters
+
+##### config
+
+`Partial`
+
+#### Returns
+
+`void`
+
+***
+
+### interrupt()
+
+```ts
+interrupt(): void;
+```
+
+Interrupt the current response.
+
+#### Returns
+
+`void`
+
+***
+
+### on()
+
+```ts
+on(event, handler): () => void;
+```
+
+Subscribe to connection events.
+
+#### Type Parameters
+
+##### TEvent
+
+`TEvent` *extends* `RealtimeEvent`
+
+#### Parameters
+
+##### event
+
+`TEvent`
+
+The event name (`'status_change'`, `'mode_change'`, `'transcript'`, `'audio_chunk'`, `'tool_call'`, `'message_complete'`, `'interrupted'`, `'error'`).
+
+##### handler
+
+`RealtimeEventHandler`
+
+#### Returns
+
+`() => void`
+
+Unsubscribe function.
+
+***
+
+### getAudioVisualization()
+
+```ts
+getAudioVisualization(): AudioVisualization;
+```
+
+Get audio visualization data for rendering level meters or waveforms.
+
+#### Returns
+
+[`AudioVisualization`](./AudioVisualization.md)
diff --git a/docs/reference/interfaces/RealtimeMessage.md b/docs/reference/interfaces/RealtimeMessage.md
new file mode 100644
index 000000000..ea83dfef8
--- /dev/null
+++ b/docs/reference/interfaces/RealtimeMessage.md
@@ -0,0 +1,80 @@
+---
+id: RealtimeMessage
+title: RealtimeMessage
+---
+
+# Interface: RealtimeMessage
+
+Defined in: [realtime/types.ts](https://github.com/TanStack/ai/blob/main/packages/typescript/ai/src/realtime/types.ts)
+
+A message in a realtime conversation. Contains one or more content parts representing text, audio, tool calls, or images.
+
+## Properties
+
+### id
+
+```ts
+id: string;
+```
+
+Unique message identifier.
+
+***
+
+### role
+
+```ts
+role: 'user' | 'assistant';
+```
+
+Message role.
+
+***
+
+### timestamp
+
+```ts
+timestamp: number;
+```
+
+Timestamp when the message was created (milliseconds since epoch).
+
+***
+
+### parts
+
+```ts
+parts: Array;
+```
+
+Content parts of the message. Can include `RealtimeTextPart`, `RealtimeAudioPart`, `RealtimeToolCallPart`, `RealtimeToolResultPart`, or `RealtimeImagePart`.
+
+***
+
+### interrupted?
+
+```ts
+optional interrupted: boolean;
+```
+
+Whether this message was interrupted by the user.
+
+***
+
+### audioId?
+
+```ts
+optional audioId: string;
+```
+
+Reference to audio buffer if stored.
+
+***
+
+### durationMs?
+
+```ts
+optional durationMs: number;
+```
+
+Duration of the audio in milliseconds.
diff --git a/docs/reference/interfaces/RealtimeSessionConfig.md b/docs/reference/interfaces/RealtimeSessionConfig.md
new file mode 100644
index 000000000..8986a73fc
--- /dev/null
+++ b/docs/reference/interfaces/RealtimeSessionConfig.md
@@ -0,0 +1,120 @@
+---
+id: RealtimeSessionConfig
+title: RealtimeSessionConfig
+---
+
+# Interface: RealtimeSessionConfig
+
+Defined in: [realtime/types.ts](https://github.com/TanStack/ai/blob/main/packages/typescript/ai/src/realtime/types.ts)
+
+Configuration for a realtime session. Passed to the provider to configure model behavior, voice, tools, and VAD settings.
+
+## Properties
+
+### model?
+
+```ts
+optional model: string;
+```
+
+Model to use for the session.
+
+***
+
+### voice?
+
+```ts
+optional voice: string;
+```
+
+Voice to use for audio output.
+
+***
+
+### instructions?
+
+```ts
+optional instructions: string;
+```
+
+System instructions for the assistant.
+
+***
+
+### tools?
+
+```ts
+optional tools: Array;
+```
+
+Tools available in the session.
+
+***
+
+### vadMode?
+
+```ts
+optional vadMode: 'server' | 'semantic' | 'manual';
+```
+
+Voice activity detection mode.
+
+***
+
+### vadConfig?
+
+```ts
+optional vadConfig: VADConfig;
+```
+
+Detailed VAD configuration (threshold, padding, silence duration).
+
+***
+
+### outputModalities?
+
+```ts
+optional outputModalities: Array<'audio' | 'text'>;
+```
+
+Output modalities for responses (e.g., `['audio', 'text']`).
+
+***
+
+### temperature?
+
+```ts
+optional temperature: number;
+```
+
+Temperature for generation (provider-specific range, e.g., 0.6-1.2 for OpenAI).
+
+***
+
+### maxOutputTokens?
+
+```ts
+optional maxOutputTokens: number | 'inf';
+```
+
+Maximum number of tokens in a response.
+
+***
+
+### semanticEagerness?
+
+```ts
+optional semanticEagerness: 'low' | 'medium' | 'high';
+```
+
+Eagerness level for semantic VAD.
+
+***
+
+### providerOptions?
+
+```ts
+optional providerOptions: Record;
+```
+
+Provider-specific options.
diff --git a/docs/reference/interfaces/RealtimeToken.md b/docs/reference/interfaces/RealtimeToken.md
new file mode 100644
index 000000000..43479fb1f
--- /dev/null
+++ b/docs/reference/interfaces/RealtimeToken.md
@@ -0,0 +1,50 @@
+---
+id: RealtimeToken
+title: RealtimeToken
+---
+
+# Interface: RealtimeToken
+
+Defined in: [realtime/types.ts](https://github.com/TanStack/ai/blob/main/packages/typescript/ai/src/realtime/types.ts)
+
+Token returned by the server for client authentication. Contains the ephemeral credentials, expiration time, and session configuration for a realtime connection.
+
+## Properties
+
+### provider
+
+```ts
+provider: string;
+```
+
+Provider identifier (e.g., `'openai'`, `'elevenlabs'`).
+
+***
+
+### token
+
+```ts
+token: string;
+```
+
+The ephemeral token value. For OpenAI, this is a client secret. For ElevenLabs, this is a signed URL.
+
+***
+
+### expiresAt
+
+```ts
+expiresAt: number;
+```
+
+Token expiration timestamp in milliseconds since epoch.
+
+***
+
+### config
+
+```ts
+config: RealtimeSessionConfig;
+```
+
+Session configuration embedded in the token (model, voice, instructions, etc.).
diff --git a/docs/reference/interfaces/RealtimeTokenAdapter.md b/docs/reference/interfaces/RealtimeTokenAdapter.md
new file mode 100644
index 000000000..223978103
--- /dev/null
+++ b/docs/reference/interfaces/RealtimeTokenAdapter.md
@@ -0,0 +1,34 @@
+---
+id: RealtimeTokenAdapter
+title: RealtimeTokenAdapter
+---
+
+# Interface: RealtimeTokenAdapter
+
+Defined in: [realtime/types.ts](https://github.com/TanStack/ai/blob/main/packages/typescript/ai/src/realtime/types.ts)
+
+Adapter interface for generating provider-specific tokens. Implemented by `openaiRealtimeToken()` and `elevenlabsRealtimeToken()`.
+
+## Properties
+
+### provider
+
+```ts
+provider: string;
+```
+
+Provider identifier (e.g., `'openai'`, `'elevenlabs'`).
+
+## Methods
+
+### generateToken()
+
+```ts
+generateToken(): Promise;
+```
+
+Generate an ephemeral token for client use.
+
+#### Returns
+
+`Promise<`[`RealtimeToken`](./RealtimeToken.md)`>`
diff --git a/docs/reference/interfaces/RealtimeTokenOptions.md b/docs/reference/interfaces/RealtimeTokenOptions.md
new file mode 100644
index 000000000..d34f43faf
--- /dev/null
+++ b/docs/reference/interfaces/RealtimeTokenOptions.md
@@ -0,0 +1,23 @@
+---
+id: RealtimeTokenOptions
+title: RealtimeTokenOptions
+---
+
+# Interface: RealtimeTokenOptions
+
+Defined in: [realtime/types.ts](https://github.com/TanStack/ai/blob/main/packages/typescript/ai/src/realtime/types.ts)
+
+Options for the `realtimeToken()` function.
+
+## Properties
+
+### adapter
+
+```ts
+adapter: RealtimeTokenAdapter;
+```
+
+The token adapter to use. Each provider has its own token adapter:
+
+- `openaiRealtimeToken()` from `@tanstack/ai-openai`
+- `elevenlabsRealtimeToken()` from `@tanstack/ai-elevenlabs`
diff --git a/docs/reference/type-aliases/RealtimeEvent.md b/docs/reference/type-aliases/RealtimeEvent.md
new file mode 100644
index 000000000..0d7011b81
--- /dev/null
+++ b/docs/reference/type-aliases/RealtimeEvent.md
@@ -0,0 +1,33 @@
+---
+id: RealtimeEvent
+title: RealtimeEvent
+---
+
+# Type Alias: RealtimeEvent
+
+```ts
+type RealtimeEvent =
+ | 'status_change'
+ | 'mode_change'
+ | 'transcript'
+ | 'audio_chunk'
+ | 'tool_call'
+ | 'message_complete'
+ | 'interrupted'
+ | 'error';
+```
+
+Defined in: [realtime/types.ts](https://github.com/TanStack/ai/blob/main/packages/typescript/ai/src/realtime/types.ts)
+
+Events emitted by the realtime connection. Used with `RealtimeConnection.on()` to subscribe to specific events.
+
+| Event | Payload | Description |
+|-------|---------|-------------|
+| `'status_change'` | `{ status: RealtimeStatus }` | Connection status changed |
+| `'mode_change'` | `{ mode: RealtimeMode }` | Session mode changed |
+| `'transcript'` | `{ role, transcript, isFinal }` | Speech transcript (partial or final) |
+| `'audio_chunk'` | `{ data: ArrayBuffer, sampleRate }` | Raw audio data received |
+| `'tool_call'` | `{ toolCallId, toolName, input }` | Tool call requested by the model |
+| `'message_complete'` | `{ message: RealtimeMessage }` | Complete message received |
+| `'interrupted'` | `{ messageId?: string }` | Response was interrupted |
+| `'error'` | `{ error: Error }` | Error occurred |
diff --git a/docs/reference/type-aliases/RealtimeMessagePart.md b/docs/reference/type-aliases/RealtimeMessagePart.md
new file mode 100644
index 000000000..75dae4b64
--- /dev/null
+++ b/docs/reference/type-aliases/RealtimeMessagePart.md
@@ -0,0 +1,27 @@
+---
+id: RealtimeMessagePart
+title: RealtimeMessagePart
+---
+
+# Type Alias: RealtimeMessagePart
+
+```ts
+type RealtimeMessagePart =
+ | RealtimeTextPart
+ | RealtimeAudioPart
+ | RealtimeToolCallPart
+ | RealtimeToolResultPart
+ | RealtimeImagePart;
+```
+
+Defined in: [realtime/types.ts](https://github.com/TanStack/ai/blob/main/packages/typescript/ai/src/realtime/types.ts)
+
+Union of all realtime message part types.
+
+| Part | `type` Field | Key Properties |
+|------|-------------|----------------|
+| `RealtimeTextPart` | `'text'` | `content: string` |
+| `RealtimeAudioPart` | `'audio'` | `transcript: string`, `durationMs?: number` |
+| `RealtimeToolCallPart` | `'tool-call'` | `id`, `name`, `arguments`, `input?`, `output?` |
+| `RealtimeToolResultPart` | `'tool-result'` | `toolCallId`, `content` |
+| `RealtimeImagePart` | `'image'` | `data: string`, `mimeType: string` |
diff --git a/docs/reference/type-aliases/RealtimeMode.md b/docs/reference/type-aliases/RealtimeMode.md
new file mode 100644
index 000000000..bb0f8beb6
--- /dev/null
+++ b/docs/reference/type-aliases/RealtimeMode.md
@@ -0,0 +1,21 @@
+---
+id: RealtimeMode
+title: RealtimeMode
+---
+
+# Type Alias: RealtimeMode
+
+```ts
+type RealtimeMode = 'idle' | 'listening' | 'thinking' | 'speaking';
+```
+
+Defined in: [realtime/types.ts](https://github.com/TanStack/ai/blob/main/packages/typescript/ai/src/realtime/types.ts)
+
+Current mode of the realtime session.
+
+| Value | Description |
+|-------|-------------|
+| `'idle'` | Connected but not actively interacting |
+| `'listening'` | Capturing user audio input |
+| `'thinking'` | Processing user input |
+| `'speaking'` | AI is generating a response |
diff --git a/docs/reference/type-aliases/RealtimeStateChangeCallback.md b/docs/reference/type-aliases/RealtimeStateChangeCallback.md
new file mode 100644
index 000000000..9508cf09b
--- /dev/null
+++ b/docs/reference/type-aliases/RealtimeStateChangeCallback.md
@@ -0,0 +1,14 @@
+---
+id: RealtimeStateChangeCallback
+title: RealtimeStateChangeCallback
+---
+
+# Type Alias: RealtimeStateChangeCallback
+
+```ts
+type RealtimeStateChangeCallback = (state: RealtimeClientState) => void;
+```
+
+Defined in: [ai-client/src/realtime-types.ts](https://github.com/TanStack/ai/blob/main/packages/typescript/ai-client/src/realtime-types.ts)
+
+Callback function invoked when the realtime client state changes. Receives the full current `RealtimeClientState`.
diff --git a/docs/reference/type-aliases/RealtimeStatus.md b/docs/reference/type-aliases/RealtimeStatus.md
new file mode 100644
index 000000000..1872253d5
--- /dev/null
+++ b/docs/reference/type-aliases/RealtimeStatus.md
@@ -0,0 +1,22 @@
+---
+id: RealtimeStatus
+title: RealtimeStatus
+---
+
+# Type Alias: RealtimeStatus
+
+```ts
+type RealtimeStatus = 'idle' | 'connecting' | 'connected' | 'reconnecting' | 'error';
+```
+
+Defined in: [realtime/types.ts](https://github.com/TanStack/ai/blob/main/packages/typescript/ai/src/realtime/types.ts)
+
+Connection status of the realtime client.
+
+| Value | Description |
+|-------|-------------|
+| `'idle'` | Not connected |
+| `'connecting'` | Establishing connection |
+| `'connected'` | Active session |
+| `'reconnecting'` | Reconnecting after interruption |
+| `'error'` | Connection error occurred |
diff --git a/examples/ts-react-chat/.env.example b/examples/ts-react-chat/.env.example
index 613cb664b..2bdb43f49 100644
--- a/examples/ts-react-chat/.env.example
+++ b/examples/ts-react-chat/.env.example
@@ -1,3 +1,11 @@
# OpenAI API Key
# Get yours at: https://platform.openai.com/api-keys
-OPENAI_API_KEY=sk-...
\ No newline at end of file
+OPENAI_API_KEY=sk-...
+
+# ElevenLabs API Key (for realtime voice)
+# Get yours at: https://elevenlabs.io/app/settings/api-keys
+ELEVENLABS_API_KEY=xi-...
+
+# ElevenLabs Agent ID (for realtime voice)
+# Create an agent at: https://elevenlabs.io/app/conversational-ai
+ELEVENLABS_AGENT_ID=...
diff --git a/examples/ts-react-chat/package.json b/examples/ts-react-chat/package.json
index db2974a56..3e1b9ca3b 100644
--- a/examples/ts-react-chat/package.json
+++ b/examples/ts-react-chat/package.json
@@ -13,6 +13,7 @@
"@tanstack/ai": "workspace:*",
"@tanstack/ai-anthropic": "workspace:*",
"@tanstack/ai-client": "workspace:*",
+ "@tanstack/ai-elevenlabs": "workspace:*",
"@tanstack/ai-gemini": "workspace:*",
"@tanstack/ai-grok": "workspace:*",
"@tanstack/ai-groq": "workspace:*",
diff --git a/examples/ts-react-chat/src/components/AudioSparkline.tsx b/examples/ts-react-chat/src/components/AudioSparkline.tsx
new file mode 100644
index 000000000..edc51b4a6
--- /dev/null
+++ b/examples/ts-react-chat/src/components/AudioSparkline.tsx
@@ -0,0 +1,81 @@
+import { useEffect, useRef } from 'react'
+
+export function AudioSparkline({
+ getData,
+ color,
+ label,
+}: {
+ getData: () => Uint8Array
+ color: string
+ label: string
+}) {
+ const canvasRef = useRef(null)
+ const animationRef = useRef(null)
+
+ useEffect(() => {
+ const canvas = canvasRef.current
+ if (!canvas) return
+
+ const ctx = canvas.getContext('2d')
+ if (!ctx) return
+
+ function draw() {
+ const data = getData()
+ const width = canvas!.width
+ const height = canvas!.height
+
+ ctx!.fillStyle = '#1f2937'
+ ctx!.fillRect(0, 0, width, height)
+
+ ctx!.strokeStyle = color
+ ctx!.lineWidth = 1
+ ctx!.beginPath()
+
+ const step = Math.max(1, Math.floor(data.length / width))
+
+ for (let i = 0; i < width; i++) {
+ const dataIndex = Math.min(i * step, data.length - 1)
+ const value = data[dataIndex] ?? 128
+ const y = height - (value / 255) * height
+
+ if (i === 0) {
+ ctx!.moveTo(i, y)
+ } else {
+ ctx!.lineTo(i, y)
+ }
+ }
+
+ ctx!.stroke()
+
+ ctx!.strokeStyle = '#4b5563'
+ ctx!.setLineDash([2, 2])
+ ctx!.beginPath()
+ ctx!.moveTo(0, height / 2)
+ ctx!.lineTo(width, height / 2)
+ ctx!.stroke()
+ ctx!.setLineDash([])
+
+ animationRef.current = requestAnimationFrame(draw)
+ }
+
+ draw()
+
+ return () => {
+ if (animationRef.current) {
+ cancelAnimationFrame(animationRef.current)
+ }
+ }
+ }, [getData, color])
+
+ return (
+
+ {label}
+
+
+ )
+}
diff --git a/examples/ts-react-chat/src/components/Header.tsx b/examples/ts-react-chat/src/components/Header.tsx
index 652036c9e..0b28cbc48 100644
--- a/examples/ts-react-chat/src/components/Header.tsx
+++ b/examples/ts-react-chat/src/components/Header.tsx
@@ -156,6 +156,19 @@ export default function Header() {
Guitar Demo
+
+ setIsOpen(false)}
+ className="flex items-center gap-3 p-3 rounded-lg hover:bg-gray-800 transition-colors mb-2"
+ activeProps={{
+ className:
+ 'flex items-center gap-3 p-3 rounded-lg bg-cyan-600 hover:bg-cyan-700 transition-colors mb-2',
+ }}
+ >
+
+ Voice Chat (Realtime)
+
>
diff --git a/examples/ts-react-chat/src/lib/realtime-tools.ts b/examples/ts-react-chat/src/lib/realtime-tools.ts
new file mode 100644
index 000000000..e19c52226
--- /dev/null
+++ b/examples/ts-react-chat/src/lib/realtime-tools.ts
@@ -0,0 +1,167 @@
+import { toolDefinition } from '@tanstack/ai'
+import { z } from 'zod'
+
+// Tool to get current time - useful for voice assistants
+export const getCurrentTimeToolDef = toolDefinition({
+ name: 'getCurrentTime',
+ description:
+ 'Get the current date and time. Use this when the user asks what time it is or the current date.',
+ inputSchema: z.object({
+ timezone: z
+ .string()
+ .optional()
+ .describe('Optional timezone like "America/New_York" or "Europe/London"'),
+ }),
+ outputSchema: z.object({
+ time: z.string(),
+ date: z.string(),
+ timezone: z.string(),
+ }),
+})
+
+// Tool to get weather - common voice assistant use case
+export const getWeatherToolDef = toolDefinition({
+ name: 'getWeather',
+ description:
+ 'Get the current weather for a location. Use this when the user asks about the weather.',
+ inputSchema: z.object({
+ location: z
+ .string()
+ .describe(
+ 'The city and state/country, e.g. "San Francisco, CA" or "London, UK"',
+ ),
+ }),
+ outputSchema: z.object({
+ location: z.string(),
+ temperature: z.number(),
+ unit: z.string(),
+ condition: z.string(),
+ humidity: z.number(),
+ }),
+})
+
+// Tool to set a reminder - demonstrates user interaction
+export const setReminderToolDef = toolDefinition({
+ name: 'setReminder',
+ description:
+ 'Set a reminder for the user. Use this when the user asks to be reminded about something.',
+ inputSchema: z.object({
+ message: z.string().describe('What to remind the user about'),
+ inMinutes: z.number().describe('How many minutes from now to remind'),
+ }),
+ outputSchema: z.object({
+ success: z.boolean(),
+ message: z.string(),
+ remindAt: z.string(),
+ }),
+})
+
+// Tool to search knowledge base - useful for assistants with specific knowledge
+export const searchKnowledgeToolDef = toolDefinition({
+ name: 'searchKnowledge',
+ description:
+ 'Search a knowledge base for information. Use this to find specific facts or documentation.',
+ inputSchema: z.object({
+ query: z.string().describe('The search query'),
+ }),
+ outputSchema: z.object({
+ results: z.array(
+ z.object({
+ title: z.string(),
+ snippet: z.string(),
+ }),
+ ),
+ }),
+})
+
+// Client-side implementation of getCurrentTime
+export const getCurrentTimeClient = getCurrentTimeToolDef.client(
+ ({ timezone }) => {
+ const now = new Date()
+ const tz = timezone || Intl.DateTimeFormat().resolvedOptions().timeZone
+
+ return {
+ time: now.toLocaleTimeString('en-US', { timeZone: tz }),
+ date: now.toLocaleDateString('en-US', {
+ weekday: 'long',
+ year: 'numeric',
+ month: 'long',
+ day: 'numeric',
+ timeZone: tz,
+ }),
+ timezone: tz,
+ }
+ },
+)
+
+// Client-side implementation of getWeather (mock data for demo)
+export const getWeatherClient = getWeatherToolDef.client(({ location }) => {
+ // Mock weather data for demo purposes
+ const conditions = ['Sunny', 'Partly Cloudy', 'Cloudy', 'Rainy', 'Snowy']
+ const randomCondition =
+ conditions[Math.floor(Math.random() * conditions.length)]!
+ const randomTemp = Math.floor(Math.random() * 30) + 50 // 50-80°F
+ const randomHumidity = Math.floor(Math.random() * 50) + 30 // 30-80%
+
+ return {
+ location,
+ temperature: randomTemp,
+ unit: 'F',
+ condition: randomCondition,
+ humidity: randomHumidity,
+ }
+})
+
+// Client-side implementation of setReminder
+export const setReminderClient = setReminderToolDef.client(
+ ({ message, inMinutes }) => {
+ const remindAt = new Date(Date.now() + inMinutes * 60 * 1000)
+
+ // In a real app, you'd schedule a notification here
+ console.log(
+ `[Reminder] Will remind about "${message}" at ${remindAt.toLocaleTimeString()}`,
+ )
+
+ // For demo purposes, show an alert after the specified time
+ setTimeout(
+ () => {
+ alert(`Reminder: ${message}`)
+ },
+ inMinutes * 60 * 1000,
+ )
+
+ return {
+ success: true,
+ message: `Reminder set: "${message}"`,
+ remindAt: remindAt.toLocaleTimeString(),
+ }
+ },
+)
+
+// Client-side implementation of searchKnowledge (mock data for demo)
+export const searchKnowledgeClient = searchKnowledgeToolDef.client(
+ ({ query }) => {
+ // Mock search results for demo
+ const mockResults = [
+ {
+ title: `Result for: ${query}`,
+ snippet: `This is a mock search result for the query "${query}". In a real application, this would return actual search results from a knowledge base.`,
+ },
+ {
+ title: 'Additional Information',
+ snippet:
+ 'More relevant information would appear here based on your search query.',
+ },
+ ]
+
+ return { results: mockResults }
+ },
+)
+
+// Export all client tools as an array for easy use
+export const realtimeClientTools = [
+ getCurrentTimeClient,
+ getWeatherClient,
+ setReminderClient,
+ searchKnowledgeClient,
+] as const
diff --git a/examples/ts-react-chat/src/lib/use-realtime.ts b/examples/ts-react-chat/src/lib/use-realtime.ts
new file mode 100644
index 000000000..848c702ca
--- /dev/null
+++ b/examples/ts-react-chat/src/lib/use-realtime.ts
@@ -0,0 +1,91 @@
+import { createServerFn } from '@tanstack/react-start'
+import { realtimeToken } from '@tanstack/ai'
+import { useRealtimeChat } from '@tanstack/ai-react'
+import { openaiRealtime, openaiRealtimeToken } from '@tanstack/ai-openai'
+import {
+ elevenlabsRealtime,
+ elevenlabsRealtimeToken,
+} from '@tanstack/ai-elevenlabs'
+import { realtimeClientTools } from '@/lib/realtime-tools'
+
+type Provider = 'openai' | 'elevenlabs'
+
+const getRealtimeTokenFn = createServerFn({ method: 'POST' })
+ .inputValidator((data: { provider: Provider; agentId?: string }) => {
+ if (!data.provider) throw new Error('Provider is required')
+ return data
+ })
+ .handler(async ({ data }) => {
+ if (data.provider === 'openai') {
+ return realtimeToken({
+ adapter: openaiRealtimeToken({
+ model: 'gpt-4o-realtime-preview',
+ }),
+ })
+ }
+
+ if (data.provider === 'elevenlabs') {
+ const agentId = data.agentId || process.env.ELEVENLABS_AGENT_ID
+ if (!agentId) {
+ throw new Error(
+ 'ElevenLabs agent ID is required. Set ELEVENLABS_AGENT_ID or pass agentId in request body.',
+ )
+ }
+ return realtimeToken({
+ adapter: elevenlabsRealtimeToken({ agentId }),
+ })
+ }
+
+ throw new Error(`Unknown provider: ${data.provider}`)
+ })
+
+export function useRealtime({
+ provider,
+ agentId,
+ outputModalities,
+ temperature,
+ maxOutputTokens,
+ semanticEagerness,
+}: {
+ provider: Provider
+ agentId: string
+ outputModalities?: Array<'audio' | 'text'>
+ temperature?: number
+ maxOutputTokens?: number | 'inf'
+ semanticEagerness?: 'low' | 'medium' | 'high'
+}) {
+ const adapter =
+ provider === 'openai' ? openaiRealtime() : elevenlabsRealtime()
+
+ return useRealtimeChat({
+ getToken: () =>
+ getRealtimeTokenFn({
+ data: {
+ provider,
+ ...(provider === 'elevenlabs' && agentId ? { agentId } : {}),
+ },
+ }),
+ adapter,
+ instructions: `You are a helpful, friendly voice assistant with access to several tools.
+
+You can:
+- Tell the user the current time and date (getCurrentTime)
+- Get weather information for any location (getWeather)
+- Set reminders for the user (setReminder)
+- Search a knowledge base for information (searchKnowledge)
+
+Keep your responses concise and conversational since this is a voice interface.
+When using tools, briefly explain what you're doing and then share the results naturally.
+If the user sends an image, describe what you see and answer any questions about it.
+Be friendly and engaging!`,
+ voice: 'alloy',
+ tools: realtimeClientTools,
+ outputModalities,
+ temperature,
+ maxOutputTokens,
+ semanticEagerness,
+ onError: (err) => {
+ console.error('Realtime error:', err)
+ },
+ })
+}
diff --git a/examples/ts-react-chat/src/routeTree.gen.ts b/examples/ts-react-chat/src/routeTree.gen.ts
index ce3e2520b..ab8ef90f3 100644
--- a/examples/ts-react-chat/src/routeTree.gen.ts
+++ b/examples/ts-react-chat/src/routeTree.gen.ts
@@ -9,6 +9,7 @@
// Additionally, you should also exclude this file from your linter and/or formatter to prevent it from being checked or modified.
import { Route as rootRouteImport } from './routes/__root'
+import { Route as RealtimeRouteImport } from './routes/realtime'
import { Route as IndexRouteImport } from './routes/index'
import { Route as GenerationsVideoRouteImport } from './routes/generations.video'
import { Route as GenerationsTranscriptionRouteImport } from './routes/generations.transcription'
@@ -24,6 +25,11 @@ import { Route as ApiGenerateVideoRouteImport } from './routes/api.generate.vide
import { Route as ApiGenerateSpeechRouteImport } from './routes/api.generate.speech'
import { Route as ApiGenerateImageRouteImport } from './routes/api.generate.image'
+const RealtimeRoute = RealtimeRouteImport.update({
+ id: '/realtime',
+ path: '/realtime',
+ getParentRoute: () => rootRouteImport,
+} as any)
const IndexRoute = IndexRouteImport.update({
id: '/',
path: '/',
@@ -98,6 +104,7 @@ const ApiGenerateImageRoute = ApiGenerateImageRouteImport.update({
export interface FileRoutesByFullPath {
'/': typeof IndexRoute
+ '/realtime': typeof RealtimeRoute
'/api/summarize': typeof ApiSummarizeRoute
'/api/tanchat': typeof ApiTanchatRoute
'/api/transcribe': typeof ApiTranscribeRoute
@@ -114,6 +121,7 @@ export interface FileRoutesByFullPath {
}
export interface FileRoutesByTo {
'/': typeof IndexRoute
+ '/realtime': typeof RealtimeRoute
'/api/summarize': typeof ApiSummarizeRoute
'/api/tanchat': typeof ApiTanchatRoute
'/api/transcribe': typeof ApiTranscribeRoute
@@ -131,6 +139,7 @@ export interface FileRoutesByTo {
export interface FileRoutesById {
__root__: typeof rootRouteImport
'/': typeof IndexRoute
+ '/realtime': typeof RealtimeRoute
'/api/summarize': typeof ApiSummarizeRoute
'/api/tanchat': typeof ApiTanchatRoute
'/api/transcribe': typeof ApiTranscribeRoute
@@ -149,6 +158,7 @@ export interface FileRouteTypes {
fileRoutesByFullPath: FileRoutesByFullPath
fullPaths:
| '/'
+ | '/realtime'
| '/api/summarize'
| '/api/tanchat'
| '/api/transcribe'
@@ -165,6 +175,7 @@ export interface FileRouteTypes {
fileRoutesByTo: FileRoutesByTo
to:
| '/'
+ | '/realtime'
| '/api/summarize'
| '/api/tanchat'
| '/api/transcribe'
@@ -181,6 +192,7 @@ export interface FileRouteTypes {
id:
| '__root__'
| '/'
+ | '/realtime'
| '/api/summarize'
| '/api/tanchat'
| '/api/transcribe'
@@ -198,6 +210,7 @@ export interface FileRouteTypes {
}
export interface RootRouteChildren {
IndexRoute: typeof IndexRoute
+ RealtimeRoute: typeof RealtimeRoute
ApiSummarizeRoute: typeof ApiSummarizeRoute
ApiTanchatRoute: typeof ApiTanchatRoute
ApiTranscribeRoute: typeof ApiTranscribeRoute
@@ -215,6 +228,13 @@ export interface RootRouteChildren {
declare module '@tanstack/react-router' {
interface FileRoutesByPath {
+ '/realtime': {
+ id: '/realtime'
+ path: '/realtime'
+ fullPath: '/realtime'
+ preLoaderRoute: typeof RealtimeRouteImport
+ parentRoute: typeof rootRouteImport
+ }
'/': {
id: '/'
path: '/'
@@ -318,6 +338,7 @@ declare module '@tanstack/react-router' {
const rootRouteChildren: RootRouteChildren = {
IndexRoute: IndexRoute,
+ RealtimeRoute: RealtimeRoute,
ApiSummarizeRoute: ApiSummarizeRoute,
ApiTanchatRoute: ApiTanchatRoute,
ApiTranscribeRoute: ApiTranscribeRoute,
diff --git a/examples/ts-react-chat/src/routes/realtime.tsx b/examples/ts-react-chat/src/routes/realtime.tsx
new file mode 100644
index 000000000..3225249e2
--- /dev/null
+++ b/examples/ts-react-chat/src/routes/realtime.tsx
@@ -0,0 +1,538 @@
+import { useEffect, useRef, useState } from 'react'
+import { createFileRoute } from '@tanstack/react-router'
+import {
+ Image,
+ Mic,
+ MicOff,
+ Phone,
+ PhoneOff,
+ Send,
+ Volume2,
+ Wrench,
+} from 'lucide-react'
+import { AudioSparkline } from '@/components/AudioSparkline'
+import { useRealtime } from '@/lib/use-realtime'
+
+type Provider = 'openai' | 'elevenlabs'
+type OutputMode = 'audio+text' | 'text-only' | 'audio-only'
+
+const PROVIDER_OPTIONS: Array<{ value: Provider; label: string }> = [
+ { value: 'openai', label: 'OpenAI Realtime' },
+ { value: 'elevenlabs', label: 'ElevenLabs' },
+]
+
+const OUTPUT_MODE_OPTIONS: Array<{ value: OutputMode; label: string }> = [
+ { value: 'audio+text', label: 'Audio + Text' },
+ { value: 'text-only', label: 'Text Only' },
+ { value: 'audio-only', label: 'Audio Only' },
+]
+
+function outputModeToModalities(
+ mode: OutputMode,
+): Array<'audio' | 'text'> | undefined {
+ switch (mode) {
+ case 'text-only':
+ return ['text']
+ case 'audio-only':
+ return ['audio']
+ case 'audio+text':
+ return ['audio', 'text']
+ default:
+ return undefined
+ }
+}
+
+function RealtimePage() {
+ const [provider, setProvider] = useState('openai')
+ const [agentId, setAgentId] = useState('')
+ const [textInput, setTextInput] = useState('')
+ const [outputMode, setOutputMode] = useState('audio+text')
+ const [temperature, setTemperature] = useState(0.8)
+ const [semanticEagerness, setSemanticEagerness] = useState<
+ 'low' | 'medium' | 'high'
+ >('medium')
+ const messagesEndRef = useRef(null)
+ const imageInputRef = useRef(null)
+
+ const {
+ status,
+ mode,
+ messages,
+ pendingUserTranscript,
+ pendingAssistantTranscript,
+ error,
+ connect,
+ disconnect,
+ interrupt,
+ sendText,
+ sendImage,
+ inputLevel,
+ outputLevel,
+ getInputTimeDomainData,
+ getOutputTimeDomainData,
+ } = useRealtime({
+ provider,
+ agentId,
+ outputModalities: outputModeToModalities(outputMode),
+ temperature,
+ semanticEagerness,
+ })
+
+ // Handle image file selection
+ const handleImageUpload = (e: React.ChangeEvent) => {
+ const file = e.target.files?.[0]
+ if (!file) return
+
+ const reader = new FileReader()
+ reader.onload = () => {
+ const result = reader.result as string
+ // Extract base64 data (remove data:image/xxx;base64, prefix)
+ const base64 = result.split(',')[1]
+ if (base64) {
+ sendImage(base64, file.type)
+ }
+ }
+ reader.readAsDataURL(file)
+
+ // Reset input so the same file can be selected again
+ e.target.value = ''
+ }
+
+ // Auto-scroll to bottom when messages change
+ useEffect(() => {
+ messagesEndRef.current?.scrollIntoView({ behavior: 'smooth' })
+ }, [messages, pendingUserTranscript, pendingAssistantTranscript])
+
+ // Get status color
+ const getStatusColor = () => {
+ switch (status) {
+ case 'connected':
+ return 'bg-green-500'
+ case 'connecting':
+ case 'reconnecting':
+ return 'bg-yellow-500'
+ case 'error':
+ return 'bg-red-500'
+ default:
+ return 'bg-gray-500'
+ }
+ }
+
+ // Get mode icon
+ const getModeIndicator = () => {
+ switch (mode) {
+ case 'listening':
+ return (
+
+
+ Listening...
+
+ )
+ case 'thinking':
+ return (
+
+ )
+ case 'speaking':
+ return (
+
+
+ Speaking...
+
+ )
+ default:
+ return (
+
+
+ Idle
+
+ )
+ }
+ }
+
+ return (
+
+
+ {/* Header */}
+
+
+
+ {/* Provider selector */}
+
+
+
+
+
+ {/* ElevenLabs Agent ID (conditional) */}
+ {provider === 'elevenlabs' && (
+
+
+ setAgentId(e.target.value)}
+ placeholder="Your ElevenLabs Agent ID"
+ disabled={status !== 'idle'}
+ className="rounded-lg border border-orange-500/20 bg-gray-900 px-3 py-2 text-sm text-white focus:outline-none focus:ring-2 focus:ring-orange-500/50 disabled:opacity-50 w-64"
+ />
+
+ )}
+
+ {/* Output mode selector (OpenAI only) */}
+ {provider === 'openai' && (
+
+
+
+
+ )}
+
+ {/* Temperature slider */}
+ {provider === 'openai' && (
+
+
+ setTemperature(parseFloat(e.target.value))}
+ disabled={status !== 'idle'}
+ className="w-24 accent-orange-500 disabled:opacity-50"
+ />
+
+ )}
+
+ {/* Semantic eagerness */}
+ {provider === 'openai' && (
+
+
+
+
+ )}
+
+
+ {/* Status */}
+
+
+ {getModeIndicator()}
+
+
+
+
+ {/* Tools indicator */}
+ {provider === 'openai' && (
+
+
+
+ Tools enabled:
+ getCurrentTime
+ •
+ getWeather
+ •
+ setReminder
+ •
+ searchKnowledge
+
+
+ )}
+
+ {/* Messages area */}
+
+ {messages.length === 0 && status === 'idle' && (
+
+
+
Voice Chat with Tools & Vision
+
+ Click "Start Conversation" to begin talking with the AI
+
+
+ Try asking: "What time is it?" or "What's the weather?" — or
+ send an image!
+
+
+ )}
+
+ {messages.map((message) => (
+
+
+ {message.role === 'assistant' ? (
+
+ AI
+
+ ) : (
+
+ U
+
+ )}
+
+ {message.parts.map((part, idx) => {
+ if (part.type === 'audio') {
+ return (
+
+ {part.transcript}
+
+ )
+ }
+ if (part.type === 'text') {
+ return (
+
+ {part.content}
+
+ )
+ }
+ if (part.type === 'image') {
+ const src = part.data.startsWith('http')
+ ? part.data
+ : `data:${part.mimeType};base64,${part.data}`
+ return (
+

+ )
+ }
+ return null
+ })}
+ {message.interrupted && (
+
+ (interrupted)
+
+ )}
+
+
+
+ ))}
+
+ {/* Pending transcripts */}
+ {pendingUserTranscript && (
+
+
+
+ U
+
+
{pendingUserTranscript}...
+
+
+ )}
+
+ {pendingAssistantTranscript && (
+
+
+
+ AI
+
+
+ {pendingAssistantTranscript}...
+
+
+
+ )}
+
+
+
+
+ {/* Error display */}
+ {error && (
+
+ Error: {error.message}
+
+ )}
+
+ {/* Text input */}
+ {status === 'connected' && (
+
+
+
+ )}
+
+ {/* Audio visualization & controls */}
+
+ {/* Volume meters and waveforms */}
+ {status === 'connected' && (
+
+ {/* Input (Microphone) */}
+
+
+
+
+ {Math.round(inputLevel * 100)}%
+
+
+
+ {/* Output (Speaker) */}
+
+
+
+
+ {Math.round(outputLevel * 100)}%
+
+
+
+
+ )}
+
+ {/* Controls */}
+
+ {status === 'idle' ? (
+
+ ) : (
+ <>
+ {mode === 'speaking' && (
+
+ )}
+
+ >
+ )}
+
+
+
+
+ )
+}
+
+export const Route = createFileRoute('/realtime')({
+ component: RealtimePage,
+})
diff --git a/packages/typescript/ai-client/src/index.ts b/packages/typescript/ai-client/src/index.ts
index 6762a00bd..c30a9c3ad 100644
--- a/packages/typescript/ai-client/src/index.ts
+++ b/packages/typescript/ai-client/src/index.ts
@@ -1,4 +1,5 @@
export { ChatClient } from './chat-client'
+export { RealtimeClient } from './realtime-client'
export { GenerationClient } from './generation-client'
export { VideoGenerationClient } from './video-generation-client'
export type {
@@ -42,6 +43,13 @@ export type {
ExtractToolOutput,
} from './tool-types'
export type { AnyClientTool } from '@tanstack/ai'
+export type {
+ RealtimeAdapter,
+ RealtimeConnection,
+ RealtimeClientOptions,
+ RealtimeClientState,
+ RealtimeStateChangeCallback,
+} from './realtime-types'
export {
fetchServerSentEvents,
fetchHttpStream,
diff --git a/packages/typescript/ai-client/src/realtime-client.ts b/packages/typescript/ai-client/src/realtime-client.ts
new file mode 100644
index 000000000..2683294c0
--- /dev/null
+++ b/packages/typescript/ai-client/src/realtime-client.ts
@@ -0,0 +1,526 @@
+import { convertSchemaToJsonSchema } from '@tanstack/ai'
+import type {
+ AnyClientTool,
+ AudioVisualization,
+ RealtimeMessage,
+ RealtimeMode,
+ RealtimeStatus,
+ RealtimeToken,
+} from '@tanstack/ai'
+import type {
+ RealtimeClientOptions,
+ RealtimeClientState,
+ RealtimeConnection,
+ RealtimeStateChangeCallback,
+} from './realtime-types'
+
+// Token refresh buffer - refresh 1 minute before expiry
+const TOKEN_REFRESH_BUFFER_MS = 60_000
+
+/**
+ * Client for managing realtime voice conversations.
+ *
+ * Handles connection lifecycle, audio I/O, message state,
+ * and tool execution for realtime voice-to-voice AI interactions.
+ *
+ * @example
+ * ```typescript
+ * import { RealtimeClient } from '@tanstack/ai-client'
+ * import { openaiRealtime } from '@tanstack/ai-openai'
+ *
+ * const client = new RealtimeClient({
+ * getToken: () => fetch('/api/realtime-token').then(r => r.json()),
+ * adapter: openaiRealtime(),
+ * tools: [myTool.client(handler)],
+ * onMessage: (msg) => console.log('Message:', msg),
+ * })
+ *
+ * await client.connect()
+ * ```
+ */
+export class RealtimeClient {
+ private options: RealtimeClientOptions
+ private connection: RealtimeConnection | null = null
+ private token: RealtimeToken | null = null
+ private tokenRefreshTimeout: ReturnType | null = null
+ private clientTools: Map
+ private stateChangeCallbacks: Set = new Set()
+ private unsubscribers: Array<() => void> = []
+
+ private state: RealtimeClientState = {
+ status: 'idle',
+ mode: 'idle',
+ messages: [],
+ pendingUserTranscript: null,
+ pendingAssistantTranscript: null,
+ error: null,
+ }
+
+ constructor(options: RealtimeClientOptions) {
+ this.options = {
+ autoPlayback: true,
+ autoCapture: true,
+ vadMode: 'server',
+ ...options,
+ }
+
+ // Build client tools map
+ this.clientTools = new Map()
+ if (options.tools) {
+ for (const tool of options.tools) {
+ this.clientTools.set(tool.name, tool)
+ }
+ }
+ }
+
+ // ============================================================================
+ // Connection Lifecycle
+ // ============================================================================
+
+ /**
+ * Connect to the realtime session.
+ * Fetches a token and establishes the connection.
+ */
+ async connect(): Promise {
+ if (this.state.status === 'connected') {
+ return
+ }
+
+ this.updateState({ status: 'connecting', error: null })
+
+ try {
+ // Fetch token from server
+ this.token = await this.options.getToken()
+
+ // Schedule token refresh
+ this.scheduleTokenRefresh()
+
+ // Connect via adapter (pass tools for providers like ElevenLabs that need them at connect time)
+ const toolsList =
+ this.clientTools.size > 0
+ ? Array.from(this.clientTools.values())
+ : undefined
+ this.connection = await this.options.adapter.connect(
+ this.token,
+ toolsList,
+ )
+
+ // Subscribe to connection events
+ this.subscribeToConnectionEvents()
+
+ // Auto-configure session with client-provided settings
+ this.applySessionConfig()
+
+ // Start audio capture if configured
+ if (this.options.autoCapture) {
+ await this.connection.startAudioCapture()
+ }
+
+ this.updateState({ status: 'connected', mode: 'listening' })
+ this.options.onConnect?.()
+ } catch (error) {
+ const err = error instanceof Error ? error : new Error(String(error))
+ this.updateState({ status: 'error', error: err })
+ this.options.onError?.(err)
+ throw err
+ }
+ }
+
+ /**
+ * Disconnect from the realtime session.
+ */
+ async disconnect(): Promise {
+ if (this.tokenRefreshTimeout) {
+ clearTimeout(this.tokenRefreshTimeout)
+ this.tokenRefreshTimeout = null
+ }
+
+ // Unsubscribe from all events
+ for (const unsub of this.unsubscribers) {
+ unsub()
+ }
+ this.unsubscribers = []
+
+ if (this.connection) {
+ await this.connection.disconnect()
+ this.connection = null
+ }
+
+ this.token = null
+ this.updateState({
+ status: 'idle',
+ mode: 'idle',
+ pendingUserTranscript: null,
+ pendingAssistantTranscript: null,
+ })
+ this.options.onDisconnect?.()
+ }
+
+ // ============================================================================
+ // Voice Control
+ // ============================================================================
+
+ /**
+ * Start listening for voice input.
+ * Only needed when vadMode is 'manual'.
+ */
+ startListening(): void {
+ if (!this.connection || this.state.status !== 'connected') {
+ return
+ }
+ this.connection.startAudioCapture()
+ this.updateState({ mode: 'listening' })
+ }
+
+ /**
+ * Stop listening for voice input.
+ * Only needed when vadMode is 'manual'.
+ */
+ stopListening(): void {
+ if (!this.connection) {
+ return
+ }
+ this.connection.stopAudioCapture()
+ this.updateState({ mode: 'idle' })
+ }
+
+ /**
+ * Interrupt the current assistant response.
+ */
+ interrupt(): void {
+ if (!this.connection) {
+ return
+ }
+ this.connection.interrupt()
+ }
+
+ // ============================================================================
+ // Text Input
+ // ============================================================================
+
+ /**
+ * Send a text message instead of voice.
+ */
+ sendText(text: string): void {
+ if (!this.connection || this.state.status !== 'connected') {
+ return
+ }
+
+ // Add user message
+ const userMessage: RealtimeMessage = {
+ id: this.generateId(),
+ role: 'user',
+ timestamp: Date.now(),
+ parts: [{ type: 'text', content: text }],
+ }
+ this.addMessage(userMessage)
+
+ // Send to provider
+ this.connection.sendText(text)
+ }
+
+ /**
+ * Send an image to the conversation.
+ * @param imageData - Base64-encoded image data or a URL
+ * @param mimeType - MIME type of the image (e.g., 'image/png', 'image/jpeg')
+ */
+ sendImage(imageData: string, mimeType: string): void {
+ if (!this.connection || this.state.status !== 'connected') {
+ return
+ }
+
+ // Add user message with image part
+ const userMessage: RealtimeMessage = {
+ id: this.generateId(),
+ role: 'user',
+ timestamp: Date.now(),
+ parts: [{ type: 'image', data: imageData, mimeType }],
+ }
+ this.addMessage(userMessage)
+
+ // Send to provider
+ this.connection.sendImage(imageData, mimeType)
+ }
+
+ // ============================================================================
+ // State Access
+ // ============================================================================
+
+ /** Get current connection status */
+ get status(): RealtimeStatus {
+ return this.state.status
+ }
+
+ /** Get current mode */
+ get mode(): RealtimeMode {
+ return this.state.mode
+ }
+
+ /** Get conversation messages */
+ get messages(): Array {
+ return this.state.messages
+ }
+
+ /** Get current error, if any */
+ get error(): Error | null {
+ return this.state.error
+ }
+
+ /** Get pending user transcript (while user is speaking) */
+ get pendingUserTranscript(): string | null {
+ return this.state.pendingUserTranscript
+ }
+
+ /** Get pending assistant transcript (while assistant is speaking) */
+ get pendingAssistantTranscript(): string | null {
+ return this.state.pendingAssistantTranscript
+ }
+
+ /** Get audio visualization data */
+ get audio(): AudioVisualization | null {
+ return this.connection?.getAudioVisualization() ?? null
+ }
+
+ // ============================================================================
+ // State Subscription
+ // ============================================================================
+
+ /**
+ * Subscribe to state changes.
+ * @returns Unsubscribe function
+ */
+ onStateChange(callback: RealtimeStateChangeCallback): () => void {
+ this.stateChangeCallbacks.add(callback)
+ return () => {
+ this.stateChangeCallbacks.delete(callback)
+ }
+ }
+
+ // ============================================================================
+ // Cleanup
+ // ============================================================================
+
+ /**
+ * Clean up resources.
+ * Call this when disposing of the client.
+ */
+ destroy(): void {
+ this.disconnect()
+ this.stateChangeCallbacks.clear()
+ }
+
+ // ============================================================================
+ // Private Methods
+ // ============================================================================
+
+ private updateState(updates: Partial): void {
+ this.state = { ...this.state, ...updates }
+
+ // Notify callbacks
+ for (const callback of this.stateChangeCallbacks) {
+ callback(this.state)
+ }
+
+ // Notify specific callbacks
+ if ('status' in updates && updates.status !== undefined) {
+ this.options.onStatusChange?.(updates.status)
+ }
+ if ('mode' in updates && updates.mode !== undefined) {
+ this.options.onModeChange?.(updates.mode)
+ }
+ }
+
+ private addMessage(message: RealtimeMessage): void {
+ this.updateState({
+ messages: [...this.state.messages, message],
+ })
+ this.options.onMessage?.(message)
+ }
+
+ private scheduleTokenRefresh(): void {
+ if (!this.token) return
+
+ const timeUntilExpiry = this.token.expiresAt - Date.now()
+ const refreshIn = Math.max(0, timeUntilExpiry - TOKEN_REFRESH_BUFFER_MS)
+
+ this.tokenRefreshTimeout = setTimeout(() => {
+ this.refreshToken()
+ }, refreshIn)
+ }
+
+ private async refreshToken(): Promise {
+ try {
+ this.token = await this.options.getToken()
+ this.scheduleTokenRefresh()
+ // Note: Some providers may require reconnection with new token
+ // This is handled by the adapter implementation
+ } catch (error) {
+ const err = error instanceof Error ? error : new Error(String(error))
+ this.updateState({ error: err })
+ this.options.onError?.(err)
+ }
+ }
+
+ private subscribeToConnectionEvents(): void {
+ if (!this.connection) return
+
+ // Status changes
+ this.unsubscribers.push(
+ this.connection.on('status_change', ({ status }) => {
+ this.updateState({ status })
+ }),
+ )
+
+ // Mode changes
+ this.unsubscribers.push(
+ this.connection.on('mode_change', ({ mode }) => {
+ this.updateState({ mode })
+ }),
+ )
+
+ // Transcripts (streaming)
+ // User transcripts are added as messages when final (no separate message_complete for user input)
+ // Assistant transcripts are streamed, final message comes via message_complete
+ this.unsubscribers.push(
+ this.connection.on('transcript', ({ role, transcript, isFinal }) => {
+ if (role === 'user') {
+ this.updateState({
+ pendingUserTranscript: isFinal ? null : transcript,
+ })
+ // Add user message when transcript is finalized
+ if (isFinal && transcript) {
+ this.addMessage({
+ id: this.generateId(),
+ role: 'user',
+ timestamp: Date.now(),
+ parts: [{ type: 'audio', transcript, durationMs: 0 }],
+ })
+ }
+ } else {
+ // Assistant transcripts - just update pending, message_complete handles final
+ this.updateState({
+ pendingAssistantTranscript: isFinal ? null : transcript,
+ })
+ }
+ }),
+ )
+
+ // Tool calls
+ this.unsubscribers.push(
+ this.connection.on(
+ 'tool_call',
+ async ({ toolCallId, toolName, input }) => {
+ const tool = this.clientTools.get(toolName)
+ if (tool?.execute) {
+ try {
+ const output = await tool.execute(input)
+ this.connection?.sendToolResult(
+ toolCallId,
+ typeof output === 'string' ? output : JSON.stringify(output),
+ )
+ } catch (error) {
+ const errMsg =
+ error instanceof Error ? error.message : String(error)
+ this.connection?.sendToolResult(
+ toolCallId,
+ JSON.stringify({ error: errMsg }),
+ )
+ }
+ }
+ },
+ ),
+ )
+
+ // Message complete
+ this.unsubscribers.push(
+ this.connection.on('message_complete', ({ message }) => {
+ // Replace pending message with final version if needed
+ const existingIndex = this.state.messages.findIndex(
+ (m) => m.id === message.id,
+ )
+ if (existingIndex >= 0) {
+ const newMessages = [...this.state.messages]
+ newMessages[existingIndex] = message
+ this.updateState({ messages: newMessages })
+ } else {
+ this.addMessage(message)
+ }
+ }),
+ )
+
+ // Interruption
+ this.unsubscribers.push(
+ this.connection.on('interrupted', ({ messageId }) => {
+ if (messageId) {
+ const newMessages = this.state.messages.map((m) =>
+ m.id === messageId ? { ...m, interrupted: true } : m,
+ )
+ this.updateState({ messages: newMessages })
+ }
+ this.updateState({
+ mode: 'listening',
+ pendingAssistantTranscript: null,
+ })
+ this.options.onInterrupted?.()
+ }),
+ )
+
+ // Errors
+ this.unsubscribers.push(
+ this.connection.on('error', ({ error }) => {
+ this.updateState({ error })
+ this.options.onError?.(error)
+ }),
+ )
+ }
+
+ private applySessionConfig(): void {
+ if (!this.connection) return
+
+ const {
+ instructions,
+ voice,
+ vadMode,
+ tools,
+ outputModalities,
+ temperature,
+ maxOutputTokens,
+ semanticEagerness,
+ } = this.options
+ const hasConfig =
+ instructions ||
+ voice ||
+ vadMode ||
+ (tools && tools.length > 0) ||
+ outputModalities ||
+ temperature !== undefined ||
+ maxOutputTokens !== undefined ||
+ semanticEagerness
+ if (!hasConfig) return
+
+ const toolsConfig = tools
+ ? Array.from(this.clientTools.values()).map((t) => ({
+ name: t.name,
+ description: t.description,
+ inputSchema: t.inputSchema
+ ? convertSchemaToJsonSchema(t.inputSchema)
+ : undefined,
+ }))
+ : undefined
+
+ this.connection.updateSession({
+ instructions,
+ voice,
+ vadMode,
+ tools: toolsConfig,
+ outputModalities,
+ temperature,
+ maxOutputTokens,
+ semanticEagerness,
+ })
+ }
+
+ private generateId(): string {
+ return `msg-${Date.now()}-${Math.random().toString(36).substring(7)}`
+ }
+}
diff --git a/packages/typescript/ai-client/src/realtime-types.ts b/packages/typescript/ai-client/src/realtime-types.ts
new file mode 100644
index 000000000..bffd6df34
--- /dev/null
+++ b/packages/typescript/ai-client/src/realtime-types.ts
@@ -0,0 +1,180 @@
+import type {
+ AnyClientTool,
+ AudioVisualization,
+ RealtimeEvent,
+ RealtimeEventHandler,
+ RealtimeMessage,
+ RealtimeMode,
+ RealtimeSessionConfig,
+ RealtimeStatus,
+ RealtimeToken,
+} from '@tanstack/ai'
+
+// ============================================================================
+// Adapter Interface
+// ============================================================================
+
+/**
+ * Adapter interface for connecting to realtime providers.
+ * Each provider (OpenAI, ElevenLabs, etc.) implements this interface.
+ */
+export interface RealtimeAdapter {
+ /** Provider identifier */
+ provider: string
+
+ /**
+ * Create a connection using the provided token
+ * @param token - The ephemeral token from the server
+ * @param clientTools - Optional client-side tools to register with the provider
+ * @returns A connection instance
+ */
+ connect: (
+ token: RealtimeToken,
+ clientTools?: ReadonlyArray,
+ ) => Promise
+}
+
+/**
+ * Connection interface representing an active realtime session.
+ * Handles audio I/O, events, and session management.
+ */
+export interface RealtimeConnection {
+ // Lifecycle
+ /** Disconnect from the realtime session */
+ disconnect: () => Promise
+
+ // Audio I/O
+ /** Start capturing audio from the microphone */
+ startAudioCapture: () => Promise
+ /** Stop capturing audio */
+ stopAudioCapture: () => void
+
+ // Text input
+ /** Send a text message (fallback for when voice isn't available) */
+ sendText: (text: string) => void
+
+ // Image input
+ /** Send an image to the conversation */
+ sendImage: (imageData: string, mimeType: string) => void
+
+ // Tool results
+ /** Send a tool execution result back to the provider */
+ sendToolResult: (callId: string, result: string) => void
+
+ // Session management
+ /** Update session configuration */
+ updateSession: (config: Partial) => void
+ /** Interrupt the current response */
+ interrupt: () => void
+
+ // Events
+ /** Subscribe to connection events */
+ on: (
+ event: TEvent,
+ handler: RealtimeEventHandler,
+ ) => () => void
+
+ // Audio visualization
+ /** Get audio visualization data */
+ getAudioVisualization: () => AudioVisualization
+}
+
+// ============================================================================
+// Client Options
+// ============================================================================
+
+/**
+ * Options for the RealtimeClient
+ */
+export interface RealtimeClientOptions {
+ /**
+ * Function to fetch a realtime token from the server.
+ * Called on connect and when token needs refresh.
+ */
+ getToken: () => Promise
+
+ /**
+ * The realtime adapter to use (e.g., openaiRealtime())
+ */
+ adapter: RealtimeAdapter
+
+ /**
+ * Client-side tools with execution logic
+ */
+ tools?: ReadonlyArray
+
+ /**
+ * Auto-play assistant audio (default: true)
+ */
+ autoPlayback?: boolean
+
+ /**
+ * Request microphone access on connect (default: true)
+ */
+ autoCapture?: boolean
+
+ /**
+ * System instructions for the assistant
+ */
+ instructions?: string
+
+ /**
+ * Voice to use for audio output
+ */
+ voice?: string
+
+ /**
+ * Voice activity detection mode (default: 'server')
+ */
+ vadMode?: 'server' | 'semantic' | 'manual'
+
+ /**
+ * Output modalities for responses (e.g., ['audio', 'text'])
+ */
+ outputModalities?: Array<'audio' | 'text'>
+
+ /**
+ * Temperature for generation (provider-specific range)
+ */
+ temperature?: number
+
+ /**
+ * Maximum number of tokens in a response
+ */
+ maxOutputTokens?: number | 'inf'
+
+ /**
+ * Eagerness level for semantic VAD ('low', 'medium', 'high')
+ */
+ semanticEagerness?: 'low' | 'medium' | 'high'
+
+ // Callbacks
+ onStatusChange?: (status: RealtimeStatus) => void
+ onModeChange?: (mode: RealtimeMode) => void
+ onMessage?: (message: RealtimeMessage) => void
+ onError?: (error: Error) => void
+ onConnect?: () => void
+ onDisconnect?: () => void
+ onInterrupted?: () => void
+}
+
+// ============================================================================
+// Client State
+// ============================================================================
+
+/**
+ * Internal state of the RealtimeClient
+ */
+export interface RealtimeClientState {
+ status: RealtimeStatus
+ mode: RealtimeMode
+ messages: Array
+ pendingUserTranscript: string | null
+ pendingAssistantTranscript: string | null
+ error: Error | null
+}
+
+/**
+ * Callback type for state changes
+ */
+export type RealtimeStateChangeCallback = (state: RealtimeClientState) => void
diff --git a/packages/typescript/ai-elevenlabs/README.md b/packages/typescript/ai-elevenlabs/README.md
new file mode 100644
index 000000000..71b0d979b
--- /dev/null
+++ b/packages/typescript/ai-elevenlabs/README.md
@@ -0,0 +1,76 @@
+# @tanstack/ai-elevenlabs
+
+ElevenLabs adapter for TanStack AI realtime voice conversations.
+
+## Installation
+
+```bash
+npm install @tanstack/ai-elevenlabs @tanstack/ai @tanstack/ai-client
+```
+
+## Usage
+
+### Server-Side Token Generation
+
+```typescript
+import { realtimeToken } from '@tanstack/ai'
+import { elevenlabsRealtimeToken } from '@tanstack/ai-elevenlabs'
+
+// Generate a signed URL for client use
+const token = await realtimeToken({
+ adapter: elevenlabsRealtimeToken({
+ agentId: 'your-agent-id',
+ }),
+})
+```
+
+### Client-Side Usage
+
+```typescript
+import { RealtimeClient } from '@tanstack/ai-client'
+import { elevenlabsRealtime } from '@tanstack/ai-elevenlabs'
+
+const client = new RealtimeClient({
+ getToken: () => fetch('/api/realtime-token').then((r) => r.json()),
+ adapter: elevenlabsRealtime(),
+})
+
+await client.connect()
+```
+
+### With React
+
+```typescript
+import { useRealtimeChat } from '@tanstack/ai-react'
+import { elevenlabsRealtime } from '@tanstack/ai-elevenlabs'
+
+function VoiceChat() {
+ const { status, mode, messages, connect, disconnect } = useRealtimeChat({
+ getToken: () => fetch('/api/realtime-token').then(r => r.json()),
+ adapter: elevenlabsRealtime(),
+ })
+
+ return (
+
+
Status: {status}
+
Mode: {mode}
+
+
+ )
+}
+```
+
+## Environment Variables
+
+Set `ELEVENLABS_API_KEY` in your environment for server-side token generation.
+
+## Requirements
+
+- ElevenLabs account with Conversational AI agent configured
+- Agent ID from ElevenLabs dashboard
+
+## License
+
+MIT
diff --git a/packages/typescript/ai-elevenlabs/package.json b/packages/typescript/ai-elevenlabs/package.json
new file mode 100644
index 000000000..4ff1754e8
--- /dev/null
+++ b/packages/typescript/ai-elevenlabs/package.json
@@ -0,0 +1,55 @@
+{
+ "name": "@tanstack/ai-elevenlabs",
+ "version": "0.0.1",
+ "description": "ElevenLabs adapter for TanStack AI realtime voice",
+ "author": "",
+ "license": "MIT",
+ "repository": {
+ "type": "git",
+ "url": "git+https://github.com/TanStack/ai.git",
+ "directory": "packages/typescript/ai-elevenlabs"
+ },
+ "keywords": [
+ "ai",
+ "elevenlabs",
+ "voice",
+ "realtime",
+ "tanstack",
+ "adapter"
+ ],
+ "type": "module",
+ "module": "./dist/esm/index.js",
+ "types": "./dist/esm/index.d.ts",
+ "exports": {
+ ".": {
+ "types": "./dist/esm/index.d.ts",
+ "import": "./dist/esm/index.js"
+ }
+ },
+ "files": [
+ "dist",
+ "src"
+ ],
+ "scripts": {
+ "build": "vite build",
+ "clean": "premove ./build ./dist",
+ "lint:fix": "eslint ./src --fix",
+ "test:build": "publint --strict",
+ "test:eslint": "eslint ./src",
+ "test:lib": "vitest --passWithNoTests",
+ "test:lib:dev": "pnpm test:lib --watch",
+ "test:types": "tsc"
+ },
+ "dependencies": {
+ "@11labs/client": "^0.2.0"
+ },
+ "peerDependencies": {
+ "@tanstack/ai": "workspace:^",
+ "@tanstack/ai-client": "workspace:^"
+ },
+ "devDependencies": {
+ "@tanstack/ai": "workspace:*",
+ "@tanstack/ai-client": "workspace:*",
+ "@vitest/coverage-v8": "4.0.14"
+ }
+}
diff --git a/packages/typescript/ai-elevenlabs/src/index.ts b/packages/typescript/ai-elevenlabs/src/index.ts
new file mode 100644
index 000000000..8f3789e84
--- /dev/null
+++ b/packages/typescript/ai-elevenlabs/src/index.ts
@@ -0,0 +1,13 @@
+// ============================================================================
+// ElevenLabs Realtime (Voice) Adapters
+// ============================================================================
+
+export { elevenlabsRealtimeToken, elevenlabsRealtime } from './realtime/index'
+
+export type {
+ ElevenLabsRealtimeTokenOptions,
+ ElevenLabsRealtimeOptions,
+ ElevenLabsConversationMode,
+ ElevenLabsVADConfig,
+ ElevenLabsClientTool,
+} from './realtime/index'
diff --git a/packages/typescript/ai-elevenlabs/src/realtime/adapter.ts b/packages/typescript/ai-elevenlabs/src/realtime/adapter.ts
new file mode 100644
index 000000000..33bc5344e
--- /dev/null
+++ b/packages/typescript/ai-elevenlabs/src/realtime/adapter.ts
@@ -0,0 +1,300 @@
+import { Conversation } from '@11labs/client'
+import type {
+ AnyClientTool,
+ AudioVisualization,
+ RealtimeEvent,
+ RealtimeEventHandler,
+ RealtimeMessage,
+ RealtimeMode,
+ RealtimeSessionConfig,
+ RealtimeStatus,
+ RealtimeToken,
+} from '@tanstack/ai'
+import type { RealtimeAdapter, RealtimeConnection } from '@tanstack/ai-client'
+import type { ElevenLabsRealtimeOptions } from './types'
+
+/**
+ * Creates an ElevenLabs realtime adapter for client-side use.
+ *
+ * Wraps the @11labs/client SDK for voice conversations.
+ *
+ * @param options - Optional configuration
+ * @returns A RealtimeAdapter for use with RealtimeClient
+ *
+ * @example
+ * ```typescript
+ * import { RealtimeClient } from '@tanstack/ai-client'
+ * import { elevenlabsRealtime } from '@tanstack/ai-elevenlabs'
+ *
+ * const client = new RealtimeClient({
+ * getToken: () => fetch('/api/realtime-token').then(r => r.json()),
+ * adapter: elevenlabsRealtime(),
+ * })
+ * ```
+ */
+export function elevenlabsRealtime(
+ options: ElevenLabsRealtimeOptions = {},
+): RealtimeAdapter {
+ return {
+ provider: 'elevenlabs',
+
+ async connect(
+ token: RealtimeToken,
+ clientToolDefs?: ReadonlyArray,
+ ): Promise {
+ return createElevenLabsConnection(token, options, clientToolDefs)
+ },
+ }
+}
+
+/**
+ * Creates a connection to ElevenLabs conversational AI
+ */
+async function createElevenLabsConnection(
+ token: RealtimeToken,
+ _options: ElevenLabsRealtimeOptions,
+ clientToolDefs?: ReadonlyArray,
+): Promise {
+ const eventHandlers = new Map>>()
+ let conversation: Awaited<
+ ReturnType
+ > | null = null
+ let messageIdCounter = 0
+
+ // Empty arrays for when visualization isn't available
+ const emptyFrequencyData = new Uint8Array(128)
+ const emptyTimeDomainData = new Uint8Array(128).fill(128)
+
+ // Helper to emit events
+ function emit(
+ event: TEvent,
+ payload: Parameters>[0],
+ ) {
+ const handlers = eventHandlers.get(event)
+ if (handlers) {
+ for (const handler of handlers) {
+ handler(payload)
+ }
+ }
+ }
+
+ function generateMessageId(): string {
+ return `el-msg-${Date.now()}-${++messageIdCounter}`
+ }
+
+ // Convert TanStack tool definitions to ElevenLabs clientTools format
+ const elevenLabsClientTools: Record<
+ string,
+ {
+ handler: (params: unknown) => Promise
+ description: string
+ parameters: Record
+ }
+ > = {}
+
+ if (clientToolDefs) {
+ for (const tool of clientToolDefs) {
+ elevenLabsClientTools[tool.name] = {
+ handler: async (params: unknown) => {
+ if (tool.execute) {
+ const result = await tool.execute(params)
+ return typeof result === 'string' ? result : JSON.stringify(result)
+ }
+ return JSON.stringify({
+ error: `No execute function for tool ${tool.name}`,
+ })
+ },
+ description: tool.description,
+ parameters: tool.inputSchema
+ ? (tool.inputSchema as Record)
+ : { type: 'object', properties: {} },
+ }
+ }
+ }
+
+ // Build session options
+ const sessionOptions: Record = {
+ signedUrl: token.token,
+
+ onConnect: () => {
+ emit('status_change', { status: 'connected' as RealtimeStatus })
+ emit('mode_change', { mode: 'listening' })
+ },
+
+ onDisconnect: () => {
+ emit('status_change', { status: 'idle' as RealtimeStatus })
+ emit('mode_change', { mode: 'idle' })
+ },
+
+ onModeChange: ({ mode }: { mode: string }) => {
+ const mappedMode: RealtimeMode =
+ mode === 'speaking' ? 'speaking' : 'listening'
+ emit('mode_change', { mode: mappedMode })
+ },
+
+ onMessage: ({ message, source }: { message: string; source: string }) => {
+ const role = source === 'user' ? 'user' : 'assistant'
+
+ // Emit transcript update
+ emit('transcript', {
+ role,
+ transcript: message,
+ isFinal: true,
+ })
+
+ // Create and emit message
+ const realtimeMessage: RealtimeMessage = {
+ id: generateMessageId(),
+ role,
+ timestamp: Date.now(),
+ parts: [{ type: 'audio', transcript: message }],
+ }
+ emit('message_complete', { message: realtimeMessage })
+ },
+
+ onError: (error: string | Error) => {
+ emit('error', {
+ error: new Error(
+ typeof error === 'string' ? error : error.message || 'Unknown error',
+ ),
+ })
+ },
+ }
+
+ // Only add clientTools if we have any
+ if (Object.keys(elevenLabsClientTools).length > 0) {
+ sessionOptions.clientTools = elevenLabsClientTools
+ }
+
+ // Start the conversation session
+ conversation = await Conversation.startSession(
+ sessionOptions as Parameters[0],
+ )
+
+ // Connection implementation
+ const connection: RealtimeConnection = {
+ async disconnect() {
+ if (conversation) {
+ await conversation.endSession()
+ conversation = null
+ }
+ emit('status_change', { status: 'idle' as RealtimeStatus })
+ },
+
+ async startAudioCapture() {
+ // ElevenLabs SDK handles audio capture automatically
+ // This is called when the session starts
+ emit('mode_change', { mode: 'listening' })
+ },
+
+ stopAudioCapture() {
+ // ElevenLabs SDK handles this
+ emit('mode_change', { mode: 'idle' })
+ },
+
+ sendText(text: string) {
+ if (!conversation) return
+ conversation.sendUserMessage(text)
+ },
+
+ sendImage(_imageData: string, _mimeType: string) {
+ // ElevenLabs does not support direct image input in the conversation API
+ console.warn(
+ 'ElevenLabs realtime does not support sending images directly.',
+ )
+ },
+
+ sendToolResult(_callId: string, _result: string) {
+ // ElevenLabs client tools are handled via the clientTools handlers
+ // registered at session start — results are returned automatically
+ },
+
+ updateSession(_config: Partial) {
+ // ElevenLabs session config is set at creation time
+ console.warn(
+ 'ElevenLabs does not support runtime session updates. Configure at connection time.',
+ )
+ },
+
+ interrupt() {
+ // ElevenLabs handles interruption automatically via barge-in
+ // No explicit API to call
+ emit('mode_change', { mode: 'listening' })
+ emit('interrupted', {})
+ },
+
+ on(
+ event: TEvent,
+ handler: RealtimeEventHandler,
+ ): () => void {
+ if (!eventHandlers.has(event)) {
+ eventHandlers.set(event, new Set())
+ }
+ eventHandlers.get(event)!.add(handler)
+
+ return () => {
+ eventHandlers.get(event)?.delete(handler)
+ }
+ },
+
+ getAudioVisualization(): AudioVisualization {
+ return {
+ get inputLevel() {
+ if (!conversation) return 0
+ try {
+ return conversation.getInputVolume()
+ } catch {
+ return 0
+ }
+ },
+
+ get outputLevel() {
+ if (!conversation) return 0
+ try {
+ return conversation.getOutputVolume()
+ } catch {
+ return 0
+ }
+ },
+
+ getInputFrequencyData() {
+ if (!conversation) return emptyFrequencyData
+ try {
+ return conversation.getInputByteFrequencyData()
+ } catch {
+ return emptyFrequencyData
+ }
+ },
+
+ getOutputFrequencyData() {
+ if (!conversation) return emptyFrequencyData
+ try {
+ return conversation.getOutputByteFrequencyData()
+ } catch {
+ return emptyFrequencyData
+ }
+ },
+
+ getInputTimeDomainData() {
+ // ElevenLabs SDK doesn't expose time domain data
+ return emptyTimeDomainData
+ },
+
+ getOutputTimeDomainData() {
+ // ElevenLabs SDK doesn't expose time domain data
+ return emptyTimeDomainData
+ },
+
+ get inputSampleRate() {
+ return 16000
+ },
+
+ get outputSampleRate() {
+ return 16000
+ },
+ }
+ },
+ }
+
+ return connection
+}
diff --git a/packages/typescript/ai-elevenlabs/src/realtime/index.ts b/packages/typescript/ai-elevenlabs/src/realtime/index.ts
new file mode 100644
index 000000000..db176897e
--- /dev/null
+++ b/packages/typescript/ai-elevenlabs/src/realtime/index.ts
@@ -0,0 +1,14 @@
+// Token adapter for server-side use
+export { elevenlabsRealtimeToken } from './token'
+
+// Client adapter for browser use
+export { elevenlabsRealtime } from './adapter'
+
+// Types
+export type {
+ ElevenLabsRealtimeTokenOptions,
+ ElevenLabsRealtimeOptions,
+ ElevenLabsConversationMode,
+ ElevenLabsVADConfig,
+ ElevenLabsClientTool,
+} from './types'
diff --git a/packages/typescript/ai-elevenlabs/src/realtime/token.ts b/packages/typescript/ai-elevenlabs/src/realtime/token.ts
new file mode 100644
index 000000000..030d0c9a9
--- /dev/null
+++ b/packages/typescript/ai-elevenlabs/src/realtime/token.ts
@@ -0,0 +1,103 @@
+import type { RealtimeToken, RealtimeTokenAdapter } from '@tanstack/ai'
+import type { ElevenLabsRealtimeTokenOptions } from './types'
+
+const ELEVENLABS_API_URL = 'https://api.elevenlabs.io/v1'
+
+/**
+ * Get ElevenLabs API key from environment
+ */
+function getElevenLabsApiKey(): string {
+ // Check process.env (Node.js)
+ if (typeof process !== 'undefined' && process.env.ELEVENLABS_API_KEY) {
+ return process.env.ELEVENLABS_API_KEY
+ }
+
+ // Check window.env (Browser with injected env)
+ if (
+ typeof window !== 'undefined' &&
+ (window as unknown as { env?: { ELEVENLABS_API_KEY?: string } }).env
+ ?.ELEVENLABS_API_KEY
+ ) {
+ return (window as unknown as { env: { ELEVENLABS_API_KEY: string } }).env
+ .ELEVENLABS_API_KEY
+ }
+
+ throw new Error(
+ 'ELEVENLABS_API_KEY not found in environment variables. ' +
+ 'Please set ELEVENLABS_API_KEY in your environment.',
+ )
+}
+
+/**
+ * Creates an ElevenLabs realtime token adapter.
+ *
+ * This adapter generates signed URLs for client-side connections.
+ * The signed URL is valid for 30 minutes.
+ *
+ * @param options - Configuration options including agentId
+ * @returns A RealtimeTokenAdapter for use with realtimeToken()
+ *
+ * @example
+ * ```typescript
+ * import { realtimeToken } from '@tanstack/ai'
+ * import { elevenlabsRealtimeToken } from '@tanstack/ai-elevenlabs'
+ *
+ * const token = await realtimeToken({
+ * adapter: elevenlabsRealtimeToken({
+ * agentId: 'your-agent-id',
+ * }),
+ * })
+ * ```
+ */
+export function elevenlabsRealtimeToken(
+ options: ElevenLabsRealtimeTokenOptions,
+): RealtimeTokenAdapter {
+ const apiKey = getElevenLabsApiKey()
+
+ return {
+ provider: 'elevenlabs',
+
+ async generateToken(): Promise {
+ const { agentId, overrides } = options
+
+ // Get signed URL from ElevenLabs
+ const response = await fetch(
+ `${ELEVENLABS_API_URL}/convai/conversation/get_signed_url?agent_id=${agentId}`,
+ {
+ method: 'GET',
+ headers: {
+ 'xi-api-key': apiKey,
+ },
+ },
+ )
+
+ if (!response.ok) {
+ const errorText = await response.text()
+ throw new Error(
+ `ElevenLabs signed URL request failed: ${response.status} ${errorText}`,
+ )
+ }
+
+ const data = await response.json()
+ const signedUrl = data.signed_url as string
+
+ // Signed URLs are valid for 30 minutes
+ const expiresAt = Date.now() + 30 * 60 * 1000
+
+ return {
+ provider: 'elevenlabs',
+ token: signedUrl,
+ expiresAt,
+ config: {
+ voice: overrides?.voiceId,
+ instructions: overrides?.systemPrompt,
+ providerOptions: {
+ agentId,
+ firstMessage: overrides?.firstMessage,
+ language: overrides?.language,
+ },
+ },
+ }
+ },
+ }
+}
diff --git a/packages/typescript/ai-elevenlabs/src/realtime/types.ts b/packages/typescript/ai-elevenlabs/src/realtime/types.ts
new file mode 100644
index 000000000..c3f5227f7
--- /dev/null
+++ b/packages/typescript/ai-elevenlabs/src/realtime/types.ts
@@ -0,0 +1,55 @@
+/**
+ * Options for the ElevenLabs realtime token adapter
+ */
+export interface ElevenLabsRealtimeTokenOptions {
+ /** Agent ID configured in ElevenLabs dashboard */
+ agentId: string
+ /** Optional override values for the agent */
+ overrides?: {
+ /** Custom voice ID to use */
+ voiceId?: string
+ /** Custom system prompt */
+ systemPrompt?: string
+ /** First message the agent should speak */
+ firstMessage?: string
+ /** Language code (e.g., 'en') */
+ language?: string
+ }
+}
+
+/**
+ * Options for the ElevenLabs realtime client adapter
+ */
+export interface ElevenLabsRealtimeOptions {
+ /** Connection mode (default: auto-detect) */
+ connectionMode?: 'websocket' | 'webrtc'
+ /** Enable debug logging */
+ debug?: boolean
+}
+
+/**
+ * ElevenLabs conversation mode
+ */
+export type ElevenLabsConversationMode = 'speaking' | 'listening'
+
+/**
+ * ElevenLabs voice activity detection configuration
+ */
+export interface ElevenLabsVADConfig {
+ /** VAD threshold (0.1-0.9) */
+ vadThreshold?: number
+ /** Silence threshold in seconds (0.3-3.0) */
+ vadSilenceThresholdSecs?: number
+ /** Minimum speech duration in ms */
+ minSpeechDurationMs?: number
+ /** Minimum silence duration in ms */
+ minSilenceDurationMs?: number
+}
+
+/**
+ * Client tool definition for ElevenLabs
+ */
+export interface ElevenLabsClientTool {
+ /** Tool handler function */
+ handler: (params: TParams) => Promise | TResult
+}
diff --git a/packages/typescript/ai-elevenlabs/tsconfig.json b/packages/typescript/ai-elevenlabs/tsconfig.json
new file mode 100644
index 000000000..e5e872741
--- /dev/null
+++ b/packages/typescript/ai-elevenlabs/tsconfig.json
@@ -0,0 +1,8 @@
+{
+ "extends": "../../../tsconfig.json",
+ "compilerOptions": {
+ "outDir": "dist"
+ },
+ "include": ["vite.config.ts", "./src"],
+ "exclude": ["node_modules", "dist", "**/*.config.ts"]
+}
diff --git a/packages/typescript/ai-elevenlabs/vite.config.ts b/packages/typescript/ai-elevenlabs/vite.config.ts
new file mode 100644
index 000000000..11f5b20b7
--- /dev/null
+++ b/packages/typescript/ai-elevenlabs/vite.config.ts
@@ -0,0 +1,37 @@
+import { defineConfig, mergeConfig } from 'vitest/config'
+import { tanstackViteConfig } from '@tanstack/vite-config'
+import packageJson from './package.json'
+
+const config = defineConfig({
+ test: {
+ name: packageJson.name,
+ dir: './',
+ watch: false,
+
+ globals: true,
+ environment: 'node',
+ include: ['tests/**/*.test.ts'],
+ coverage: {
+ provider: 'v8',
+ reporter: ['text', 'json', 'html', 'lcov'],
+ exclude: [
+ 'node_modules/',
+ 'dist/',
+ 'tests/',
+ '**/*.test.ts',
+ '**/*.config.ts',
+ '**/types.ts',
+ ],
+ include: ['src/**/*.ts'],
+ },
+ },
+})
+
+export default mergeConfig(
+ config,
+ tanstackViteConfig({
+ entry: ['./src/index.ts'],
+ srcDir: './src',
+ cjs: false,
+ }),
+)
diff --git a/packages/typescript/ai-openai/package.json b/packages/typescript/ai-openai/package.json
index 917eb13aa..a137eed03 100644
--- a/packages/typescript/ai-openai/package.json
+++ b/packages/typescript/ai-openai/package.json
@@ -44,10 +44,12 @@
},
"peerDependencies": {
"@tanstack/ai": "workspace:^",
+ "@tanstack/ai-client": "workspace:^",
"zod": "^4.0.0"
},
"devDependencies": {
"@tanstack/ai": "workspace:*",
+ "@tanstack/ai-client": "workspace:*",
"@vitest/coverage-v8": "4.0.14",
"vite": "^7.2.7",
"zod": "^4.2.0"
diff --git a/packages/typescript/ai-openai/src/index.ts b/packages/typescript/ai-openai/src/index.ts
index ffba8da87..afadc4529 100644
--- a/packages/typescript/ai-openai/src/index.ts
+++ b/packages/typescript/ai-openai/src/index.ts
@@ -100,3 +100,19 @@ export type {
OpenAIMessageMetadataByModality,
} from './message-types'
export type { OpenAIClientConfig } from './utils/client'
+
+// ============================================================================
+// Realtime (Voice) Adapters
+// ============================================================================
+
+export { openaiRealtimeToken, openaiRealtime } from './realtime/index'
+
+export type {
+ OpenAIRealtimeVoice,
+ OpenAIRealtimeModel,
+ OpenAIRealtimeTokenOptions,
+ OpenAIRealtimeOptions,
+ OpenAITurnDetection,
+ OpenAISemanticVADConfig,
+ OpenAIServerVADConfig,
+} from './realtime/index'
diff --git a/packages/typescript/ai-openai/src/realtime/adapter.ts b/packages/typescript/ai-openai/src/realtime/adapter.ts
new file mode 100644
index 000000000..35187a5d2
--- /dev/null
+++ b/packages/typescript/ai-openai/src/realtime/adapter.ts
@@ -0,0 +1,683 @@
+import type {
+ AnyClientTool,
+ AudioVisualization,
+ RealtimeEvent,
+ RealtimeEventHandler,
+ RealtimeMessage,
+ RealtimeMode,
+ RealtimeSessionConfig,
+ RealtimeStatus,
+ RealtimeToken,
+} from '@tanstack/ai'
+import type { RealtimeAdapter, RealtimeConnection } from '@tanstack/ai-client'
+import type { OpenAIRealtimeOptions } from './types'
+
+const OPENAI_REALTIME_URL = 'https://api.openai.com/v1/realtime'
+
+/**
+ * Creates an OpenAI realtime adapter for client-side use.
+ *
+ * Uses WebRTC for browser connections (default) or WebSocket for Node.js.
+ *
+ * @param options - Optional configuration
+ * @returns A RealtimeAdapter for use with RealtimeClient
+ *
+ * @example
+ * ```typescript
+ * import { RealtimeClient } from '@tanstack/ai-client'
+ * import { openaiRealtime } from '@tanstack/ai-openai'
+ *
+ * const client = new RealtimeClient({
+ * getToken: () => fetch('/api/realtime-token').then(r => r.json()),
+ * adapter: openaiRealtime(),
+ * })
+ * ```
+ */
+export function openaiRealtime(
+ options: OpenAIRealtimeOptions = {},
+): RealtimeAdapter {
+ const connectionMode = options.connectionMode ?? 'webrtc'
+
+ return {
+ provider: 'openai',
+
+ async connect(
+ token: RealtimeToken,
+ _clientTools?: ReadonlyArray,
+ ): Promise {
+ if (connectionMode === 'webrtc') {
+ return createWebRTCConnection(token)
+ }
+ throw new Error('WebSocket connection mode not yet implemented')
+ },
+ }
+}
+
+/**
+ * Creates a WebRTC connection to OpenAI's realtime API
+ */
+async function createWebRTCConnection(
+ token: RealtimeToken,
+): Promise {
+ const model = token.config.model ?? 'gpt-4o-realtime-preview'
+ const eventHandlers = new Map>>()
+
+ // WebRTC peer connection
+ const pc = new RTCPeerConnection()
+
+ // Audio context for visualization
+ let audioContext: AudioContext | null = null
+ let inputAnalyser: AnalyserNode | null = null
+ let outputAnalyser: AnalyserNode | null = null
+ let inputSource: MediaStreamAudioSourceNode | null = null
+ let outputSource: MediaStreamAudioSourceNode | null = null
+ let localStream: MediaStream | null = null
+
+ // Audio element for playback (more reliable than AudioContext.destination)
+ let audioElement: HTMLAudioElement | null = null
+
+ // Data channel for events
+ let dataChannel: RTCDataChannel | null = null
+
+ // Current state
+ let currentMode: RealtimeMode = 'idle'
+ let currentMessageId: string | null = null
+
+ // Empty arrays for when visualization isn't available
+ // frequencyBinCount = fftSize / 2 = 1024
+ const emptyFrequencyData = new Uint8Array(1024)
+ const emptyTimeDomainData = new Uint8Array(2048).fill(128) // 128 is silence
+
+ // Helper to emit events (defined early so it can be used during setup)
+ function emit(
+ event: TEvent,
+ payload: Parameters>[0],
+ ) {
+ const handlers = eventHandlers.get(event)
+ if (handlers) {
+ for (const handler of handlers) {
+ handler(payload)
+ }
+ }
+ }
+
+ // Set up data channel for bidirectional communication
+ dataChannel = pc.createDataChannel('oai-events')
+
+ // Promise that resolves when the data channel is open and ready
+ const dataChannelReady = new Promise((resolve) => {
+ dataChannel!.onopen = () => {
+ flushPendingEvents()
+ emit('status_change', { status: 'connected' as RealtimeStatus })
+ resolve()
+ }
+ })
+
+ dataChannel.onmessage = (event) => {
+ try {
+ const message = JSON.parse(event.data)
+ handleServerEvent(message)
+ } catch (e) {
+ console.error('Failed to parse realtime event:', e)
+ }
+ }
+
+ dataChannel.onerror = (error) => {
+ emit('error', { error: new Error(`Data channel error: ${error}`) })
+ }
+
+ // Handle incoming audio track
+ pc.ontrack = (event) => {
+ if (event.track.kind === 'audio' && event.streams[0]) {
+ setupOutputAudioAnalysis(event.streams[0])
+ }
+ }
+
+ // IMPORTANT: Request microphone access and add audio track BEFORE creating offer
+ // OpenAI's Realtime API requires an audio track in the SDP offer
+ try {
+ localStream = await navigator.mediaDevices.getUserMedia({
+ audio: {
+ echoCancellation: true,
+ noiseSuppression: true,
+ sampleRate: 24000,
+ },
+ })
+
+ // Add audio track to peer connection
+ for (const track of localStream.getAudioTracks()) {
+ pc.addTrack(track, localStream)
+ }
+ } catch (error) {
+ throw new Error(
+ `Microphone access required for realtime voice: ${error instanceof Error ? error.message : error}`,
+ )
+ }
+
+ // Create and set local description (now includes audio track)
+ const offer = await pc.createOffer()
+ await pc.setLocalDescription(offer)
+
+ // Send SDP to OpenAI and get answer
+ const sdpResponse = await fetch(`${OPENAI_REALTIME_URL}?model=${model}`, {
+ method: 'POST',
+ headers: {
+ Authorization: `Bearer ${token.token}`,
+ 'Content-Type': 'application/sdp',
+ },
+ body: offer.sdp,
+ })
+
+ if (!sdpResponse.ok) {
+ const errorText = await sdpResponse.text()
+ throw new Error(
+ `Failed to establish WebRTC connection: ${sdpResponse.status} - ${errorText}`,
+ )
+ }
+
+ const answerSdp = await sdpResponse.text()
+ await pc.setRemoteDescription({ type: 'answer', sdp: answerSdp })
+
+ // Set up input audio analysis now that we have the stream
+ setupInputAudioAnalysis(localStream)
+
+ // Handle server events
+ function handleServerEvent(event: Record) {
+ const type = event.type as string
+
+ switch (type) {
+ case 'session.created':
+ case 'session.updated':
+ // Session ready
+ break
+
+ case 'input_audio_buffer.speech_started':
+ currentMode = 'listening'
+ emit('mode_change', { mode: 'listening' })
+ break
+
+ case 'input_audio_buffer.speech_stopped':
+ currentMode = 'thinking'
+ emit('mode_change', { mode: 'thinking' })
+ break
+
+ case 'input_audio_buffer.committed':
+ // Audio buffer committed for processing
+ break
+
+ case 'conversation.item.input_audio_transcription.completed': {
+ const transcript = event.transcript as string
+ emit('transcript', { role: 'user', transcript, isFinal: true })
+ break
+ }
+
+ case 'response.created':
+ currentMode = 'thinking'
+ emit('mode_change', { mode: 'thinking' })
+ break
+
+ case 'response.output_item.added': {
+ const item = event.item as Record
+ if (item.type === 'message') {
+ currentMessageId = item.id as string
+ }
+ break
+ }
+
+ case 'response.audio_transcript.delta': {
+ const delta = event.delta as string
+ emit('transcript', {
+ role: 'assistant',
+ transcript: delta,
+ isFinal: false,
+ })
+ break
+ }
+
+ case 'response.audio_transcript.done': {
+ const transcript = event.transcript as string
+ emit('transcript', { role: 'assistant', transcript, isFinal: true })
+ break
+ }
+
+ case 'response.output_text.delta': {
+ const delta = event.delta as string
+ emit('transcript', {
+ role: 'assistant',
+ transcript: delta,
+ isFinal: false,
+ })
+ break
+ }
+
+ case 'response.output_text.done': {
+ const text = event.text as string
+ emit('transcript', {
+ role: 'assistant',
+ transcript: text,
+ isFinal: true,
+ })
+ break
+ }
+
+ case 'response.audio.delta':
+ if (currentMode !== 'speaking') {
+ currentMode = 'speaking'
+ emit('mode_change', { mode: 'speaking' })
+ }
+ break
+
+ case 'response.audio.done':
+ break
+
+ case 'response.function_call_arguments.done': {
+ const callId = event.call_id as string
+ const name = event.name as string
+ const args = event.arguments as string
+ try {
+ const input = JSON.parse(args)
+ emit('tool_call', { toolCallId: callId, toolName: name, input })
+ } catch {
+ emit('tool_call', { toolCallId: callId, toolName: name, input: args })
+ }
+ break
+ }
+
+ case 'response.done': {
+ const response = event.response as Record
+ const output = response.output as
+ | Array>
+ | undefined
+
+ currentMode = 'listening'
+ emit('mode_change', { mode: 'listening' })
+
+ // Emit message complete if we have a current message
+ if (currentMessageId) {
+ const message: RealtimeMessage = {
+ id: currentMessageId,
+ role: 'assistant',
+ timestamp: Date.now(),
+ parts: [],
+ }
+
+ // Extract content from output items
+ for (const item of output || []) {
+ if (item.type === 'message' && item.content) {
+ const content = item.content as Array>
+ for (const part of content) {
+ if (part.type === 'audio' && part.transcript) {
+ message.parts.push({
+ type: 'audio',
+ transcript: part.transcript as string,
+ })
+ } else if (part.type === 'text' && part.text) {
+ message.parts.push({
+ type: 'text',
+ content: part.text as string,
+ })
+ }
+ }
+ }
+ }
+
+ emit('message_complete', { message })
+ currentMessageId = null
+ }
+ break
+ }
+
+ case 'conversation.item.truncated':
+ emit('interrupted', { messageId: currentMessageId ?? undefined })
+ break
+
+ case 'error': {
+ const error = event.error as Record
+ emit('error', {
+ error: new Error((error.message as string) || 'Unknown error'),
+ })
+ break
+ }
+ }
+ }
+
+ // Set up audio analysis for output
+ function setupOutputAudioAnalysis(stream: MediaStream) {
+ // Create audio element for playback - this is the standard way to play WebRTC audio
+ audioElement = new Audio()
+ audioElement.srcObject = stream
+ audioElement.autoplay = true
+ // Some browsers require this for autoplay
+ audioElement.play().catch((e) => {
+ console.warn('Audio autoplay failed:', e)
+ })
+
+ // Set up AudioContext for visualization only (not playback)
+ if (!audioContext) {
+ audioContext = new AudioContext()
+ }
+
+ // Resume AudioContext if suspended (browsers require user interaction)
+ if (audioContext.state === 'suspended') {
+ audioContext.resume().catch(() => {
+ // Ignore - visualization just won't work
+ })
+ }
+
+ outputAnalyser = audioContext.createAnalyser()
+ outputAnalyser.fftSize = 2048 // Larger size for more accurate level detection
+ outputAnalyser.smoothingTimeConstant = 0.3
+
+ outputSource = audioContext.createMediaStreamSource(stream)
+ outputSource.connect(outputAnalyser)
+ // Don't connect to destination - the Audio element handles playback
+ }
+
+ // Set up audio analysis for input
+ function setupInputAudioAnalysis(stream: MediaStream) {
+ if (!audioContext) {
+ audioContext = new AudioContext()
+ }
+
+ // Resume AudioContext if suspended (browsers require user interaction)
+ if (audioContext.state === 'suspended') {
+ audioContext.resume().catch(() => {
+ // Ignore - visualization just won't work
+ })
+ }
+
+ inputAnalyser = audioContext.createAnalyser()
+ inputAnalyser.fftSize = 2048 // Larger size for more accurate level detection
+ inputAnalyser.smoothingTimeConstant = 0.3
+
+ inputSource = audioContext.createMediaStreamSource(stream)
+ inputSource.connect(inputAnalyser)
+ }
+
+ // Queue for events sent before the data channel is open
+ const pendingEvents: Array> = []
+
+ // Send event to server (queues if data channel not yet open)
+ function sendEvent(event: Record) {
+ if (dataChannel?.readyState === 'open') {
+ dataChannel.send(JSON.stringify(event))
+ } else {
+ pendingEvents.push(event)
+ }
+ }
+
+ // Flush any queued events (called when data channel opens)
+ function flushPendingEvents() {
+ for (const event of pendingEvents) {
+ dataChannel!.send(JSON.stringify(event))
+ }
+ pendingEvents.length = 0
+ }
+
+ // Connection implementation
+ const connection: RealtimeConnection = {
+ async disconnect() {
+ if (localStream) {
+ for (const track of localStream.getTracks()) {
+ track.stop()
+ }
+ localStream = null
+ }
+
+ if (audioElement) {
+ audioElement.pause()
+ audioElement.srcObject = null
+ audioElement = null
+ }
+
+ if (dataChannel) {
+ dataChannel.close()
+ dataChannel = null
+ }
+
+ pc.close()
+
+ if (audioContext) {
+ await audioContext.close()
+ audioContext = null
+ }
+
+ emit('status_change', { status: 'idle' as RealtimeStatus })
+ },
+
+ async startAudioCapture() {
+ // Audio capture is established during connection setup
+ // This method enables the tracks and signals listening mode
+ if (localStream) {
+ for (const track of localStream.getAudioTracks()) {
+ track.enabled = true
+ }
+ }
+ currentMode = 'listening'
+ emit('mode_change', { mode: 'listening' })
+ },
+
+ stopAudioCapture() {
+ // Disable tracks rather than stopping them to allow re-enabling
+ if (localStream) {
+ for (const track of localStream.getAudioTracks()) {
+ track.enabled = false
+ }
+ }
+ currentMode = 'idle'
+ emit('mode_change', { mode: 'idle' })
+ },
+
+ sendText(text: string) {
+ sendEvent({
+ type: 'conversation.item.create',
+ item: {
+ type: 'message',
+ role: 'user',
+ content: [{ type: 'input_text', text }],
+ },
+ })
+ sendEvent({
+ type: 'response.create',
+ })
+ },
+
+ sendImage(imageData: string, mimeType: string) {
+ // Determine if imageData is a URL or base64 data
+ const isUrl =
+ imageData.startsWith('http://') || imageData.startsWith('https://')
+ const imageContent = isUrl
+ ? { type: 'input_image', image_url: imageData }
+ : {
+ type: 'input_image',
+ image_url: `data:${mimeType};base64,${imageData}`,
+ }
+
+ sendEvent({
+ type: 'conversation.item.create',
+ item: {
+ type: 'message',
+ role: 'user',
+ content: [imageContent],
+ },
+ })
+ sendEvent({
+ type: 'response.create',
+ })
+ },
+
+ sendToolResult(callId: string, result: string) {
+ sendEvent({
+ type: 'conversation.item.create',
+ item: {
+ type: 'function_call_output',
+ call_id: callId,
+ output: result,
+ },
+ })
+ sendEvent({ type: 'response.create' })
+ },
+
+ updateSession(config: Partial) {
+ const sessionUpdate: Record = {}
+
+ if (config.instructions) {
+ sessionUpdate.instructions = config.instructions
+ }
+
+ if (config.voice) {
+ sessionUpdate.voice = config.voice
+ }
+
+ if (config.vadMode) {
+ if (config.vadMode === 'semantic') {
+ sessionUpdate.turn_detection = {
+ type: 'semantic_vad',
+ eagerness: config.semanticEagerness ?? 'medium',
+ }
+ } else if (config.vadMode === 'server') {
+ sessionUpdate.turn_detection = {
+ type: 'server_vad',
+ threshold: config.vadConfig?.threshold ?? 0.5,
+ prefix_padding_ms: config.vadConfig?.prefixPaddingMs ?? 300,
+ silence_duration_ms: config.vadConfig?.silenceDurationMs ?? 500,
+ }
+ } else {
+ sessionUpdate.turn_detection = null
+ }
+ }
+
+ if (config.tools !== undefined) {
+ sessionUpdate.tools = config.tools.map((t) => ({
+ type: 'function',
+ name: t.name,
+ description: t.description,
+ parameters: t.inputSchema ?? { type: 'object', properties: {} },
+ }))
+ sessionUpdate.tool_choice = 'auto'
+ }
+
+ if (config.outputModalities) {
+ sessionUpdate.modalities = config.outputModalities
+ }
+
+ if (config.temperature !== undefined) {
+ sessionUpdate.temperature = config.temperature
+ }
+
+ if (config.maxOutputTokens !== undefined) {
+ sessionUpdate.max_response_output_tokens = config.maxOutputTokens
+ }
+
+ // Always enable input audio transcription so user speech is transcribed
+ sessionUpdate.input_audio_transcription = { model: 'whisper-1' }
+
+ if (Object.keys(sessionUpdate).length > 0) {
+ sendEvent({
+ type: 'session.update',
+ session: sessionUpdate,
+ })
+ }
+ },
+
+ interrupt() {
+ sendEvent({ type: 'response.cancel' })
+ currentMode = 'listening'
+ emit('mode_change', { mode: 'listening' })
+ emit('interrupted', { messageId: currentMessageId ?? undefined })
+ },
+
+ on(
+ event: TEvent,
+ handler: RealtimeEventHandler,
+ ): () => void {
+ if (!eventHandlers.has(event)) {
+ eventHandlers.set(event, new Set())
+ }
+ eventHandlers.get(event)!.add(handler)
+
+ return () => {
+ eventHandlers.get(event)?.delete(handler)
+ }
+ },
+
+ getAudioVisualization(): AudioVisualization {
+ // Helper to calculate audio level from time domain data
+ // Uses peak amplitude which is more responsive for voice audio meters
+ function calculateLevel(analyser: AnalyserNode): number {
+ const data = new Uint8Array(analyser.fftSize)
+ analyser.getByteTimeDomainData(data)
+
+ // Find peak deviation from center (128 is silence)
+ // This is more responsive than RMS for voice level meters
+ let maxDeviation = 0
+ for (const sample of data) {
+ const deviation = Math.abs(sample - 128)
+ if (deviation > maxDeviation) {
+ maxDeviation = deviation
+ }
+ }
+
+ // Normalize to 0-1 range (max deviation is 128)
+ // Scale by 1.5x so that ~66% amplitude reads as full scale
+ // This provides good visual feedback without pegging too early
+ const normalized = maxDeviation / 128
+ return Math.min(1, normalized * 1.5)
+ }
+
+ return {
+ get inputLevel() {
+ if (!inputAnalyser) return 0
+ return calculateLevel(inputAnalyser)
+ },
+
+ get outputLevel() {
+ if (!outputAnalyser) return 0
+ return calculateLevel(outputAnalyser)
+ },
+
+ getInputFrequencyData() {
+ if (!inputAnalyser) return emptyFrequencyData
+ const data = new Uint8Array(inputAnalyser.frequencyBinCount)
+ inputAnalyser.getByteFrequencyData(data)
+ return data
+ },
+
+ getOutputFrequencyData() {
+ if (!outputAnalyser) return emptyFrequencyData
+ const data = new Uint8Array(outputAnalyser.frequencyBinCount)
+ outputAnalyser.getByteFrequencyData(data)
+ return data
+ },
+
+ getInputTimeDomainData() {
+ if (!inputAnalyser) return emptyTimeDomainData
+ const data = new Uint8Array(inputAnalyser.fftSize)
+ inputAnalyser.getByteTimeDomainData(data)
+ return data
+ },
+
+ getOutputTimeDomainData() {
+ if (!outputAnalyser) return emptyTimeDomainData
+ const data = new Uint8Array(outputAnalyser.fftSize)
+ outputAnalyser.getByteTimeDomainData(data)
+ return data
+ },
+
+ get inputSampleRate() {
+ return 24000
+ },
+
+ get outputSampleRate() {
+ return 24000
+ },
+ }
+ },
+ }
+
+ // Wait for the data channel to be open before returning the connection.
+ // This ensures session.update (tools, instructions, etc.) can be sent immediately.
+ await dataChannelReady
+
+ return connection
+}
diff --git a/packages/typescript/ai-openai/src/realtime/index.ts b/packages/typescript/ai-openai/src/realtime/index.ts
new file mode 100644
index 000000000..d5ea156e6
--- /dev/null
+++ b/packages/typescript/ai-openai/src/realtime/index.ts
@@ -0,0 +1,16 @@
+// Token adapter for server-side use
+export { openaiRealtimeToken } from './token'
+
+// Client adapter for browser use
+export { openaiRealtime } from './adapter'
+
+// Types
+export type {
+ OpenAIRealtimeVoice,
+ OpenAIRealtimeModel,
+ OpenAIRealtimeTokenOptions,
+ OpenAIRealtimeOptions,
+ OpenAITurnDetection,
+ OpenAISemanticVADConfig,
+ OpenAIServerVADConfig,
+} from './types'
diff --git a/packages/typescript/ai-openai/src/realtime/token.ts b/packages/typescript/ai-openai/src/realtime/token.ts
new file mode 100644
index 000000000..6bff9c9c2
--- /dev/null
+++ b/packages/typescript/ai-openai/src/realtime/token.ts
@@ -0,0 +1,82 @@
+import { getOpenAIApiKeyFromEnv } from '../utils/client'
+import type { RealtimeToken, RealtimeTokenAdapter } from '@tanstack/ai'
+import type {
+ OpenAIRealtimeModel,
+ OpenAIRealtimeSessionResponse,
+ OpenAIRealtimeTokenOptions,
+} from './types'
+
+const OPENAI_REALTIME_SESSIONS_URL =
+ 'https://api.openai.com/v1/realtime/sessions'
+
+/**
+ * Creates an OpenAI realtime token adapter.
+ *
+ * This adapter generates ephemeral tokens for client-side WebRTC connections.
+ * The token is valid for 10 minutes.
+ *
+ * @param options - Configuration options for the realtime session
+ * @returns A RealtimeTokenAdapter for use with realtimeToken()
+ *
+ * @example
+ * ```typescript
+ * import { realtimeToken } from '@tanstack/ai'
+ * import { openaiRealtimeToken } from '@tanstack/ai-openai'
+ *
+ * const token = await realtimeToken({
+ * adapter: openaiRealtimeToken({
+ * model: 'gpt-4o-realtime-preview',
+ * voice: 'alloy',
+ * instructions: 'You are a helpful assistant.',
+ * turnDetection: {
+ * type: 'semantic_vad',
+ * eagerness: 'medium',
+ * },
+ * }),
+ * })
+ * ```
+ */
+export function openaiRealtimeToken(
+ options: OpenAIRealtimeTokenOptions = {},
+): RealtimeTokenAdapter {
+ const apiKey = getOpenAIApiKeyFromEnv()
+
+ return {
+ provider: 'openai',
+
+ async generateToken(): Promise {
+ const model: OpenAIRealtimeModel =
+ options.model ?? 'gpt-4o-realtime-preview'
+
+ // Call OpenAI API to create session and get ephemeral token.
+ // Only the model is sent server-side; all other session config
+ // (instructions, voice, tools, VAD) is applied client-side via session.update.
+ const response = await fetch(OPENAI_REALTIME_SESSIONS_URL, {
+ method: 'POST',
+ headers: {
+ Authorization: `Bearer ${apiKey}`,
+ 'Content-Type': 'application/json',
+ },
+ body: JSON.stringify({ model }),
+ })
+
+ if (!response.ok) {
+ const errorText = await response.text()
+ throw new Error(
+ `OpenAI realtime session creation failed: ${response.status} ${errorText}`,
+ )
+ }
+
+ const sessionData: OpenAIRealtimeSessionResponse = await response.json()
+
+ return {
+ provider: 'openai',
+ token: sessionData.client_secret.value,
+ expiresAt: sessionData.client_secret.expires_at * 1000,
+ config: {
+ model: sessionData.model,
+ },
+ }
+ },
+ }
+}
diff --git a/packages/typescript/ai-openai/src/realtime/types.ts b/packages/typescript/ai-openai/src/realtime/types.ts
new file mode 100644
index 000000000..f4d36d9cc
--- /dev/null
+++ b/packages/typescript/ai-openai/src/realtime/types.ts
@@ -0,0 +1,104 @@
+import type { VADConfig } from '@tanstack/ai'
+
+/**
+ * OpenAI realtime voice options
+ */
+export type OpenAIRealtimeVoice =
+ | 'alloy'
+ | 'ash'
+ | 'ballad'
+ | 'coral'
+ | 'echo'
+ | 'sage'
+ | 'shimmer'
+ | 'verse'
+ | 'marin'
+ | 'cedar'
+
+/**
+ * OpenAI realtime model options
+ */
+export type OpenAIRealtimeModel =
+ | 'gpt-4o-realtime-preview'
+ | 'gpt-4o-realtime-preview-2024-10-01'
+ | 'gpt-4o-mini-realtime-preview'
+ | 'gpt-4o-mini-realtime-preview-2024-12-17'
+ | 'gpt-realtime'
+ | 'gpt-realtime-mini'
+
+/**
+ * OpenAI semantic VAD configuration
+ */
+export interface OpenAISemanticVADConfig {
+ type: 'semantic_vad'
+ /** Eagerness level for turn detection */
+ eagerness?: 'low' | 'medium' | 'high'
+}
+
+/**
+ * OpenAI server VAD configuration
+ */
+export interface OpenAIServerVADConfig extends VADConfig {
+ type: 'server_vad'
+}
+
+/**
+ * OpenAI turn detection configuration
+ */
+export type OpenAITurnDetection =
+ | OpenAISemanticVADConfig
+ | OpenAIServerVADConfig
+ | null
+
+/**
+ * Options for the OpenAI realtime token adapter
+ */
+export interface OpenAIRealtimeTokenOptions {
+ /** Model to use (default: 'gpt-4o-realtime-preview') */
+ model?: OpenAIRealtimeModel
+}
+
+/**
+ * Options for the OpenAI realtime client adapter
+ */
+export interface OpenAIRealtimeOptions {
+ /** Connection mode (default: 'webrtc' in browser) */
+ connectionMode?: 'webrtc' | 'websocket'
+}
+
+/**
+ * OpenAI realtime session response from the API
+ */
+export interface OpenAIRealtimeSessionResponse {
+ id: string
+ object: 'realtime.session'
+ model: string
+ modalities: Array
+ instructions: string
+ voice: string
+ input_audio_format: string
+ output_audio_format: string
+ input_audio_transcription: {
+ model: string
+ } | null
+ turn_detection: {
+ type: string
+ threshold?: number
+ prefix_padding_ms?: number
+ silence_duration_ms?: number
+ eagerness?: string
+ } | null
+ tools: Array<{
+ type: string
+ name: string
+ description: string
+ parameters: Record
+ }>
+ tool_choice: string
+ temperature: number
+ max_response_output_tokens: number | string
+ client_secret: {
+ value: string
+ expires_at: number
+ }
+}
diff --git a/packages/typescript/ai-react/src/index.ts b/packages/typescript/ai-react/src/index.ts
index 24b460cdf..b261e803b 100644
--- a/packages/typescript/ai-react/src/index.ts
+++ b/packages/typescript/ai-react/src/index.ts
@@ -1,10 +1,15 @@
export { useChat } from './use-chat'
+export { useRealtimeChat } from './use-realtime-chat'
export type {
UseChatOptions,
UseChatReturn,
UIMessage,
ChatRequestBody,
} from './types'
+export type {
+ UseRealtimeChatOptions,
+ UseRealtimeChatReturn,
+} from './realtime-types'
// Generation hooks
export { useGeneration } from './use-generation'
diff --git a/packages/typescript/ai-react/src/realtime-types.ts b/packages/typescript/ai-react/src/realtime-types.ts
new file mode 100644
index 000000000..bad512d29
--- /dev/null
+++ b/packages/typescript/ai-react/src/realtime-types.ts
@@ -0,0 +1,143 @@
+import type {
+ AnyClientTool,
+ RealtimeMessage,
+ RealtimeMode,
+ RealtimeStatus,
+ RealtimeToken,
+} from '@tanstack/ai'
+import type { RealtimeAdapter } from '@tanstack/ai-client'
+
+/**
+ * Options for the useRealtimeChat hook.
+ */
+export interface UseRealtimeChatOptions {
+ /**
+ * Function to fetch a realtime token from the server.
+ * Called on connect and when token needs refresh.
+ */
+ getToken: () => Promise
+
+ /**
+ * The realtime adapter to use (e.g., openaiRealtime())
+ */
+ adapter: RealtimeAdapter
+
+ /**
+ * Client-side tools with execution logic
+ */
+ tools?: ReadonlyArray
+
+ /**
+ * Auto-play assistant audio (default: true)
+ */
+ autoPlayback?: boolean
+
+ /**
+ * Request microphone access on connect (default: true)
+ */
+ autoCapture?: boolean
+
+ /**
+ * System instructions for the assistant
+ */
+ instructions?: string
+
+ /**
+ * Voice to use for audio output
+ */
+ voice?: string
+
+ /**
+ * Voice activity detection mode (default: 'server')
+ */
+ vadMode?: 'server' | 'semantic' | 'manual'
+
+ /**
+ * Output modalities for responses (e.g., ['audio', 'text'])
+ */
+ outputModalities?: Array<'audio' | 'text'>
+
+ /**
+ * Temperature for generation (provider-specific range)
+ */
+ temperature?: number
+
+ /**
+ * Maximum number of tokens in a response
+ */
+ maxOutputTokens?: number | 'inf'
+
+ /**
+ * Eagerness level for semantic VAD ('low', 'medium', 'high')
+ */
+ semanticEagerness?: 'low' | 'medium' | 'high'
+
+ // Callbacks
+ onConnect?: () => void
+ onDisconnect?: () => void
+ onError?: (error: Error) => void
+ onMessage?: (message: RealtimeMessage) => void
+ onModeChange?: (mode: RealtimeMode) => void
+ onInterrupted?: () => void
+}
+
+/**
+ * Return type for the useRealtimeChat hook.
+ */
+export interface UseRealtimeChatReturn {
+ // Connection state
+ /** Current connection status */
+ status: RealtimeStatus
+ /** Current error, if any */
+ error: Error | null
+ /** Connect to the realtime session */
+ connect: () => Promise
+ /** Disconnect from the realtime session */
+ disconnect: () => Promise
+
+ // Conversation state
+ /** Current mode (idle, listening, thinking, speaking) */
+ mode: RealtimeMode
+ /** Conversation messages */
+ messages: Array
+ /** User transcript while speaking (before finalized) */
+ pendingUserTranscript: string | null
+ /** Assistant transcript while speaking (before finalized) */
+ pendingAssistantTranscript: string | null
+
+ // Voice control
+ /** Start listening for voice input (manual VAD mode) */
+ startListening: () => void
+ /** Stop listening for voice input (manual VAD mode) */
+ stopListening: () => void
+ /** Interrupt the current assistant response */
+ interrupt: () => void
+
+ // Text input
+ /** Send a text message instead of voice */
+ sendText: (text: string) => void
+
+ // Image input
+ /** Send an image to the conversation */
+ sendImage: (imageData: string, mimeType: string) => void
+
+ // Audio visualization (0-1 normalized)
+ /** Current input (microphone) volume level */
+ inputLevel: number
+ /** Current output (speaker) volume level */
+ outputLevel: number
+ /** Get frequency data for input audio visualization */
+ getInputFrequencyData: () => Uint8Array
+ /** Get frequency data for output audio visualization */
+ getOutputFrequencyData: () => Uint8Array
+ /** Get time domain data for input waveform */
+ getInputTimeDomainData: () => Uint8Array
+ /** Get time domain data for output waveform */
+ getOutputTimeDomainData: () => Uint8Array
+
+ // VAD control
+ /** Current VAD mode */
+ vadMode: 'server' | 'semantic' | 'manual'
+ /** Change VAD mode at runtime */
+ setVADMode: (mode: 'server' | 'semantic' | 'manual') => void
+}
diff --git a/packages/typescript/ai-react/src/use-realtime-chat.ts b/packages/typescript/ai-react/src/use-realtime-chat.ts
new file mode 100644
index 000000000..63272821f
--- /dev/null
+++ b/packages/typescript/ai-react/src/use-realtime-chat.ts
@@ -0,0 +1,277 @@
+import { useCallback, useEffect, useRef, useState } from 'react'
+import { RealtimeClient } from '@tanstack/ai-client'
+import type {
+ RealtimeMessage,
+ RealtimeMode,
+ RealtimeStatus,
+} from '@tanstack/ai'
+import type {
+ UseRealtimeChatOptions,
+ UseRealtimeChatReturn,
+} from './realtime-types'
+
+// Empty frequency data for when client is not connected
+const emptyFrequencyData = new Uint8Array(128)
+const emptyTimeDomainData = new Uint8Array(128).fill(128)
+
+/**
+ * React hook for realtime voice conversations.
+ *
+ * Provides a simple interface for voice-to-voice AI interactions
+ * with support for multiple providers (OpenAI, ElevenLabs, etc.).
+ *
+ * @param options - Configuration options including adapter and callbacks
+ * @returns Hook return value with state and control methods
+ *
+ * @example
+ * ```typescript
+ * import { useRealtimeChat } from '@tanstack/ai-react'
+ * import { openaiRealtime } from '@tanstack/ai-openai'
+ *
+ * function VoiceChat() {
+ * const {
+ * status,
+ * mode,
+ * messages,
+ * connect,
+ * disconnect,
+ * inputLevel,
+ * outputLevel,
+ * } = useRealtimeChat({
+ * getToken: () => fetch('/api/realtime-token').then(r => r.json()),
+ * adapter: openaiRealtime(),
+ * })
+ *
+ * return (
+ *
+ *
Status: {status}
+ *
Mode: {mode}
+ *
+ *
+ * )
+ * }
+ * ```
+ */
+export function useRealtimeChat(
+ options: UseRealtimeChatOptions,
+): UseRealtimeChatReturn {
+ // State
+ const [status, setStatus] = useState('idle')
+ const [mode, setMode] = useState('idle')
+ const [messages, setMessages] = useState>([])
+ const [pendingUserTranscript, setPendingUserTranscript] = useState<
+ string | null
+ >(null)
+ const [pendingAssistantTranscript, setPendingAssistantTranscript] = useState<
+ string | null
+ >(null)
+ const [error, setError] = useState(null)
+ const [inputLevel, setInputLevel] = useState(0)
+ const [outputLevel, setOutputLevel] = useState(0)
+ const [vadMode, setVADModeState] = useState<'server' | 'semantic' | 'manual'>(
+ options.vadMode ?? 'server',
+ )
+
+ // Refs
+ const clientRef = useRef(null)
+ const optionsRef = useRef(options)
+ optionsRef.current = options
+ const animationFrameRef = useRef(null)
+
+ // Create client instance - use ref to ensure we reuse the same instance
+ // This handles React StrictMode double-rendering
+ if (!clientRef.current) {
+ clientRef.current = new RealtimeClient({
+ getToken: optionsRef.current.getToken,
+ adapter: optionsRef.current.adapter,
+ tools: optionsRef.current.tools,
+ instructions: optionsRef.current.instructions,
+ voice: optionsRef.current.voice,
+ autoPlayback: optionsRef.current.autoPlayback,
+ autoCapture: optionsRef.current.autoCapture,
+ vadMode: optionsRef.current.vadMode,
+ outputModalities: optionsRef.current.outputModalities,
+ temperature: optionsRef.current.temperature,
+ maxOutputTokens: optionsRef.current.maxOutputTokens,
+ semanticEagerness: optionsRef.current.semanticEagerness,
+ onStatusChange: (newStatus) => {
+ setStatus(newStatus)
+ },
+ onModeChange: (newMode) => {
+ setMode(newMode)
+ optionsRef.current.onModeChange?.(newMode)
+ },
+ onMessage: (message) => {
+ setMessages((prev) => [...prev, message])
+ optionsRef.current.onMessage?.(message)
+ },
+ onError: (err) => {
+ setError(err)
+ optionsRef.current.onError?.(err)
+ },
+ onConnect: () => {
+ setError(null)
+ optionsRef.current.onConnect?.()
+ },
+ onDisconnect: () => {
+ optionsRef.current.onDisconnect?.()
+ },
+ onInterrupted: () => {
+ setPendingAssistantTranscript(null)
+ optionsRef.current.onInterrupted?.()
+ },
+ })
+
+ // Subscribe to state changes for transcripts
+ clientRef.current.onStateChange((state) => {
+ setPendingUserTranscript(state.pendingUserTranscript)
+ setPendingAssistantTranscript(state.pendingAssistantTranscript)
+ })
+ }
+
+ const client = clientRef.current
+
+ // Audio level animation loop
+ useEffect(() => {
+ function updateLevels() {
+ if (clientRef.current?.audio) {
+ setInputLevel(clientRef.current.audio.inputLevel)
+ setOutputLevel(clientRef.current.audio.outputLevel)
+ }
+ animationFrameRef.current = requestAnimationFrame(updateLevels)
+ }
+
+ if (status === 'connected') {
+ updateLevels()
+ }
+
+ return () => {
+ if (animationFrameRef.current) {
+ cancelAnimationFrame(animationFrameRef.current)
+ animationFrameRef.current = null
+ }
+ }
+ }, [status])
+
+ // Cleanup on unmount
+ useEffect(() => {
+ return () => {
+ clientRef.current?.destroy()
+ }
+ }, [])
+
+ // Connection methods
+ const connect = useCallback(async () => {
+ setError(null)
+ setMessages([])
+ setPendingUserTranscript(null)
+ setPendingAssistantTranscript(null)
+ await client.connect()
+ }, [client])
+
+ const disconnect = useCallback(async () => {
+ await client.disconnect()
+ }, [client])
+
+ // Voice control methods
+ const startListening = useCallback(() => {
+ client.startListening()
+ }, [client])
+
+ const stopListening = useCallback(() => {
+ client.stopListening()
+ }, [client])
+
+ const interrupt = useCallback(() => {
+ client.interrupt()
+ }, [client])
+
+ // Text input
+ const sendText = useCallback(
+ (text: string) => {
+ client.sendText(text)
+ },
+ [client],
+ )
+
+ // Image input
+ const sendImage = useCallback(
+ (imageData: string, mimeType: string) => {
+ client.sendImage(imageData, mimeType)
+ },
+ [client],
+ )
+
+ // Audio visualization
+ const getInputFrequencyData = useCallback(() => {
+ return (
+ clientRef.current?.audio?.getInputFrequencyData() ?? emptyFrequencyData
+ )
+ }, [])
+
+ const getOutputFrequencyData = useCallback(() => {
+ return (
+ clientRef.current?.audio?.getOutputFrequencyData() ?? emptyFrequencyData
+ )
+ }, [])
+
+ const getInputTimeDomainData = useCallback(() => {
+ return (
+ clientRef.current?.audio?.getInputTimeDomainData() ?? emptyTimeDomainData
+ )
+ }, [])
+
+ const getOutputTimeDomainData = useCallback(() => {
+ return (
+ clientRef.current?.audio?.getOutputTimeDomainData() ?? emptyTimeDomainData
+ )
+ }, [])
+
+ // VAD mode control
+ const setVADMode = useCallback(
+ (newMode: 'server' | 'semantic' | 'manual') => {
+ setVADModeState(newMode)
+ // TODO: Update session config if connected
+ },
+ [],
+ )
+
+ return {
+ // Connection state
+ status,
+ error,
+ connect,
+ disconnect,
+
+ // Conversation state
+ mode,
+ messages,
+ pendingUserTranscript,
+ pendingAssistantTranscript,
+
+ // Voice control
+ startListening,
+ stopListening,
+ interrupt,
+
+ // Text input
+ sendText,
+
+ // Image input
+ sendImage,
+
+ // Audio visualization
+ inputLevel,
+ outputLevel,
+ getInputFrequencyData,
+ getOutputFrequencyData,
+ getInputTimeDomainData,
+ getOutputTimeDomainData,
+
+ // VAD control
+ vadMode,
+ setVADMode,
+ }
+}
diff --git a/packages/typescript/ai/src/index.ts b/packages/typescript/ai/src/index.ts
index 7f0d4fece..8bf6d0a83 100644
--- a/packages/typescript/ai/src/index.ts
+++ b/packages/typescript/ai/src/index.ts
@@ -79,6 +79,31 @@ export { detectImageMimeType } from './utils'
// Event client + event types
export * from './event-client'
+// Realtime
+export { realtimeToken } from './realtime/index'
+export type {
+ RealtimeToken,
+ RealtimeTokenAdapter,
+ RealtimeTokenOptions,
+ RealtimeSessionConfig,
+ VADConfig,
+ RealtimeMessage,
+ RealtimeMessagePart,
+ RealtimeTextPart,
+ RealtimeAudioPart,
+ RealtimeToolCallPart,
+ RealtimeToolResultPart,
+ RealtimeImagePart,
+ RealtimeStatus,
+ RealtimeMode,
+ AudioVisualization,
+ RealtimeEvent,
+ RealtimeEventPayloads,
+ RealtimeEventHandler,
+ RealtimeErrorCode,
+ RealtimeError,
+} from './realtime/index'
+
// Message converters
export {
convertMessagesToModelMessages,
diff --git a/packages/typescript/ai/src/realtime/index.ts b/packages/typescript/ai/src/realtime/index.ts
new file mode 100644
index 000000000..74c450c1d
--- /dev/null
+++ b/packages/typescript/ai/src/realtime/index.ts
@@ -0,0 +1,38 @@
+import type { RealtimeToken, RealtimeTokenOptions } from './types'
+
+// Re-export all types
+export * from './types'
+
+/**
+ * Generate a realtime token using the provided adapter.
+ *
+ * This function is used on the server to generate ephemeral tokens
+ * that clients can use to establish realtime connections.
+ *
+ * @param options - Token generation options including the adapter
+ * @returns Promise resolving to a RealtimeToken
+ *
+ * @example
+ * ```typescript
+ * import { realtimeToken } from '@tanstack/ai'
+ * import { openaiRealtimeToken } from '@tanstack/ai-openai'
+ *
+ * // Server function (TanStack Start example)
+ * export const getRealtimeToken = createServerFn()
+ * .handler(async () => {
+ * return realtimeToken({
+ * adapter: openaiRealtimeToken({
+ * model: 'gpt-4o-realtime-preview',
+ * voice: 'alloy',
+ * instructions: 'You are a helpful assistant...',
+ * }),
+ * })
+ * })
+ * ```
+ */
+export async function realtimeToken(
+ options: RealtimeTokenOptions,
+): Promise {
+ const { adapter } = options
+ return adapter.generateToken()
+}
diff --git a/packages/typescript/ai/src/realtime/types.ts b/packages/typescript/ai/src/realtime/types.ts
new file mode 100644
index 000000000..daaf6f57c
--- /dev/null
+++ b/packages/typescript/ai/src/realtime/types.ts
@@ -0,0 +1,294 @@
+// ============================================================================
+// Token Types
+// ============================================================================
+
+/**
+ * Voice activity detection configuration
+ */
+export interface VADConfig {
+ /** Sensitivity threshold (0.0-1.0) */
+ threshold?: number
+ /** Audio to include before speech detection (ms) */
+ prefixPaddingMs?: number
+ /** Silence duration to end turn (ms) */
+ silenceDurationMs?: number
+}
+
+/**
+ * Serializable tool descriptor for realtime session configuration.
+ * Contains only the metadata needed by providers, not Zod schemas or execute functions.
+ */
+export interface RealtimeToolConfig {
+ name: string
+ description: string
+ inputSchema?: Record
+}
+
+/**
+ * Configuration for a realtime session
+ */
+export interface RealtimeSessionConfig {
+ /** Model to use for the session */
+ model?: string
+ /** Voice to use for audio output */
+ voice?: string
+ /** System instructions for the assistant */
+ instructions?: string
+ /** Tools available in the session */
+ tools?: Array
+ /** VAD mode */
+ vadMode?: 'server' | 'semantic' | 'manual'
+ /** VAD configuration */
+ vadConfig?: VADConfig
+ /** Output modalities for responses (e.g., ['audio', 'text'], ['text']) */
+ outputModalities?: Array<'audio' | 'text'>
+ /** Temperature for generation (provider-specific range, e.g., 0.6-1.2 for OpenAI) */
+ temperature?: number
+ /** Maximum number of tokens in a response */
+ maxOutputTokens?: number | 'inf'
+ /** Eagerness level for semantic VAD ('low', 'medium', 'high') */
+ semanticEagerness?: 'low' | 'medium' | 'high'
+ /** Provider-specific options */
+ providerOptions?: Record
+}
+
+/**
+ * Token returned by the server for client authentication
+ */
+export interface RealtimeToken {
+ /** Provider identifier */
+ provider: string
+ /** The ephemeral token value */
+ token: string
+ /** Token expiration timestamp (ms since epoch) */
+ expiresAt: number
+ /** Session configuration embedded in the token */
+ config: RealtimeSessionConfig
+}
+
+/**
+ * Adapter interface for generating provider-specific tokens
+ */
+export interface RealtimeTokenAdapter {
+ /** Provider identifier */
+ provider: string
+ /** Generate an ephemeral token for client use */
+ generateToken: () => Promise
+}
+
+/**
+ * Options for the realtimeToken function
+ */
+export interface RealtimeTokenOptions {
+ /** The token adapter to use */
+ adapter: RealtimeTokenAdapter
+}
+
+// ============================================================================
+// Message Types
+// ============================================================================
+
+/**
+ * Text content part in a realtime message
+ */
+export interface RealtimeTextPart {
+ type: 'text'
+ content: string
+}
+
+/**
+ * Audio content part in a realtime message
+ */
+export interface RealtimeAudioPart {
+ type: 'audio'
+ /** Transcription of the audio */
+ transcript: string
+ /** Raw audio data (optional, if stored) */
+ audioData?: ArrayBuffer
+ /** Duration of the audio in milliseconds */
+ durationMs?: number
+}
+
+/**
+ * Tool call part in a realtime message
+ */
+export interface RealtimeToolCallPart {
+ type: 'tool-call'
+ id: string
+ name: string
+ arguments: string
+ input?: unknown
+ output?: unknown
+}
+
+/**
+ * Tool result part in a realtime message
+ */
+export interface RealtimeToolResultPart {
+ type: 'tool-result'
+ toolCallId: string
+ content: string
+}
+
+/**
+ * Image content part in a realtime message
+ */
+export interface RealtimeImagePart {
+ type: 'image'
+ /** Base64-encoded image data or a URL */
+ data: string
+ /** MIME type of the image (e.g., 'image/png', 'image/jpeg') */
+ mimeType: string
+}
+
+/**
+ * Union of all realtime message parts
+ */
+export type RealtimeMessagePart =
+ | RealtimeTextPart
+ | RealtimeAudioPart
+ | RealtimeToolCallPart
+ | RealtimeToolResultPart
+ | RealtimeImagePart
+
+/**
+ * A message in a realtime conversation
+ */
+export interface RealtimeMessage {
+ /** Unique message identifier */
+ id: string
+ /** Message role */
+ role: 'user' | 'assistant'
+ /** Timestamp when the message was created */
+ timestamp: number
+ /** Content parts of the message */
+ parts: Array
+ /** Whether this message was interrupted */
+ interrupted?: boolean
+ /** Reference to audio buffer if stored */
+ audioId?: string
+ /** Duration of the audio in milliseconds */
+ durationMs?: number
+}
+
+// ============================================================================
+// Status Types
+// ============================================================================
+
+/**
+ * Connection status of the realtime client
+ */
+export type RealtimeStatus =
+ | 'idle'
+ | 'connecting'
+ | 'connected'
+ | 'reconnecting'
+ | 'error'
+
+/**
+ * Current mode of the realtime session
+ */
+export type RealtimeMode = 'idle' | 'listening' | 'thinking' | 'speaking'
+
+// ============================================================================
+// Audio Visualization Types
+// ============================================================================
+
+/**
+ * Interface for accessing audio visualization data
+ */
+export interface AudioVisualization {
+ /** Input volume level (0-1 normalized) */
+ readonly inputLevel: number
+ /** Output volume level (0-1 normalized) */
+ readonly outputLevel: number
+
+ /** Get frequency data for input audio visualization */
+ getInputFrequencyData: () => Uint8Array
+ /** Get frequency data for output audio visualization */
+ getOutputFrequencyData: () => Uint8Array
+
+ /** Get time domain data for input waveform */
+ getInputTimeDomainData: () => Uint8Array
+ /** Get time domain data for output waveform */
+ getOutputTimeDomainData: () => Uint8Array
+
+ /** Input sample rate */
+ readonly inputSampleRate: number
+ /** Output sample rate */
+ readonly outputSampleRate: number
+
+ /** Subscribe to raw input audio samples */
+ onInputAudio?: (
+ callback: (samples: Float32Array, sampleRate: number) => void,
+ ) => () => void
+ /** Subscribe to raw output audio samples */
+ onOutputAudio?: (
+ callback: (samples: Float32Array, sampleRate: number) => void,
+ ) => () => void
+}
+
+// ============================================================================
+// Event Types
+// ============================================================================
+
+/**
+ * Events emitted by the realtime connection
+ */
+export type RealtimeEvent =
+ | 'status_change'
+ | 'mode_change'
+ | 'transcript'
+ | 'audio_chunk'
+ | 'tool_call'
+ | 'message_complete'
+ | 'interrupted'
+ | 'error'
+
+/**
+ * Event payloads for realtime events
+ */
+export interface RealtimeEventPayloads {
+ status_change: { status: RealtimeStatus }
+ mode_change: { mode: RealtimeMode }
+ transcript: {
+ role: 'user' | 'assistant'
+ transcript: string
+ isFinal: boolean
+ }
+ audio_chunk: { data: ArrayBuffer; sampleRate: number }
+ tool_call: { toolCallId: string; toolName: string; input: unknown }
+ message_complete: { message: RealtimeMessage }
+ interrupted: { messageId?: string }
+ error: { error: Error }
+}
+
+/**
+ * Handler type for realtime events
+ */
+export type RealtimeEventHandler = (
+ payload: RealtimeEventPayloads[TEvent],
+) => void
+
+// ============================================================================
+// Error Types
+// ============================================================================
+
+/**
+ * Error codes for realtime errors
+ */
+export type RealtimeErrorCode =
+ | 'TOKEN_EXPIRED'
+ | 'CONNECTION_FAILED'
+ | 'PERMISSION_DENIED'
+ | 'PROVIDER_ERROR'
+ | 'UNKNOWN'
+
+/**
+ * Extended error with realtime-specific information
+ */
+export interface RealtimeError extends Error {
+ code: RealtimeErrorCode
+ provider?: string
+ details?: unknown
+}
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index e6c1ee218..5b9787208 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -201,6 +201,9 @@ importers:
'@tanstack/ai-client':
specifier: workspace:*
version: link:../../packages/typescript/ai-client
+ '@tanstack/ai-elevenlabs':
+ specifier: workspace:*
+ version: link:../../packages/typescript/ai-elevenlabs
'@tanstack/ai-gemini':
specifier: workspace:*
version: link:../../packages/typescript/ai-gemini
@@ -755,6 +758,22 @@ importers:
specifier: ^2.11.10
version: 2.11.10(solid-js@1.9.10)(vite@7.2.7(@types/node@25.0.1)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2))
+ packages/typescript/ai-elevenlabs:
+ dependencies:
+ '@11labs/client':
+ specifier: ^0.2.0
+ version: 0.2.0(@types/dom-mediacapture-record@1.0.22)
+ devDependencies:
+ '@tanstack/ai':
+ specifier: workspace:*
+ version: link:../ai
+ '@tanstack/ai-client':
+ specifier: workspace:*
+ version: link:../ai-client
+ '@vitest/coverage-v8':
+ specifier: 4.0.14
+ version: 4.0.14(vitest@4.0.18(@types/node@25.0.1)(happy-dom@20.0.11)(jiti@2.6.1)(jsdom@27.3.0(postcss@8.5.6))(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2))
+
packages/typescript/ai-fal:
dependencies:
'@fal-ai/client':
@@ -850,6 +869,9 @@ importers:
'@tanstack/ai':
specifier: workspace:*
version: link:../ai
+ '@tanstack/ai-client':
+ specifier: workspace:*
+ version: link:../ai-client
'@vitest/coverage-v8':
specifier: 4.0.14
version: 4.0.14(vitest@4.0.18(@types/node@25.0.1)(happy-dom@20.0.11)(jiti@2.6.1)(jsdom@27.3.0(postcss@8.5.6))(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2))
@@ -1440,6 +1462,10 @@ importers:
packages:
+ '@11labs/client@0.2.0':
+ resolution: {integrity: sha512-GBplAV4WDbcoThsIzdSDPN3xbcitK0ZZ4iJfJZKfltqvgvS6Uw8GZxHwVgiPwnQoA3uosYyY3L9TuPwmel18xQ==}
+ deprecated: This package is no longer maintained. Please use @elevenlabs/client for the latest version
+
'@acemir/cssom@0.9.29':
resolution: {integrity: sha512-G90x0VW+9nW4dFajtjCoT+NM0scAfH9Mb08IcjgFHYbfiL/lU04dTF9JuVOi3/OH+DJCQdcIseSXkdCB9Ky6JA==}
@@ -1632,6 +1658,9 @@ packages:
resolution: {integrity: sha512-6zABk/ECA/QYSCQ1NGiVwwbQerUCZ+TQbp64Q3AgmfNvurHH0j8TtXa1qbShXA6qqkpAj4V5W8pP6mLe1mcMqA==}
engines: {node: '>=18'}
+ '@bufbuild/protobuf@1.10.1':
+ resolution: {integrity: sha512-wJ8ReQbHxsAfXhrf9ixl0aYbZorRuOWpBNzm8pL8ftmSxQx/wnJD5Eg861NwJU/czy2VXFIebCeZnZrI9rktIQ==}
+
'@changesets/apply-release-plan@7.0.14':
resolution: {integrity: sha512-ddBvf9PHdy2YY0OUiEl3TV78mH9sckndJR14QAt87KLEbIov81XO0q0QAmvooBxXlqRRP8I9B7XOzZwQG7JkWA==}
@@ -2505,6 +2534,12 @@ packages:
'@jridgewell/trace-mapping@0.3.31':
resolution: {integrity: sha512-zzNR+SdQSDJzc8joaeP8QQoCQr8NuYx2dIIytl1QeBEZHJ9uW6hebsrYgbz8hJwUQao3TWCMtmfV8Nu1twOLAw==}
+ '@livekit/mutex@1.1.1':
+ resolution: {integrity: sha512-EsshAucklmpuUAfkABPxJNhzj9v2sG7JuzFDL4ML1oJQSV14sqrpTYnsaOudMAw9yOaW53NU3QQTlUQoRs4czw==}
+
+ '@livekit/protocol@1.44.0':
+ resolution: {integrity: sha512-/vfhDUGcUKO8Q43r6i+5FrDhl5oZjm/X3U4x2Iciqvgn5C8qbj+57YPcWSJ1kyIZm5Cm6AV2nAPjMm3ETD/iyg==}
+
'@manypkg/find-root@1.1.0':
resolution: {integrity: sha512-mki5uBvhHzO8kYYix/WRy2WX8S3B5wdVSc9D6KcU5lQNglP2yt58/VfLuAK49glRXChosY8ap2oJ1qgma3GUVA==}
@@ -4452,6 +4487,9 @@ packages:
'@types/deep-eql@4.0.2':
resolution: {integrity: sha512-c9h9dVVMigMPc4bwTvC5dxqtqJZwQPePsWjPlpSOnojbor6pGqdk541lfA7AqFQr5pB1BRdq0juY9db81BwyFw==}
+ '@types/dom-mediacapture-record@1.0.22':
+ resolution: {integrity: sha512-mUMZLK3NvwRLcAAT9qmcK+9p7tpU2FHdDsntR3YI4+GY88XrgG4XiE7u1Q2LAN2/FZOz/tdMDC3GQCR4T8nFuw==}
+
'@types/estree-jsx@1.0.5':
resolution: {integrity: sha512-52CcUVNFyfb1A2ALocQw/Dd1BQFNmSdkuC3BkZ6iqhdMfQz7JWOFRuJFloOzjk+6WijU56m9oKXFAXc7o3Towg==}
@@ -6688,6 +6726,9 @@ packages:
jju@1.4.0:
resolution: {integrity: sha512-8wb9Yw966OSxApiCt0K3yNJL8pnNeIv+OEq2YMidz4FKP6nonSRoOXc80iXY4JaN2FC11B9qsNmDsm+ZOfMROA==}
+ jose@6.2.0:
+ resolution: {integrity: sha512-xsfE1TcSCbUdo6U07tR0mvhg0flGxU8tPLbF03mirl2ukGQENhUg4ubGYQnhVH0b5stLlPM+WOqDkEl1R1y5sQ==}
+
joycon@3.1.1:
resolution: {integrity: sha512-34wB/Y7MW7bzjKRjUKTa46I2Z7eV62Rkhva+KkopW7Qvv/OSWBqvkSY7vusOPrNuZcUG3tApvdVgNB8POj3SPw==}
engines: {node: '>=10'}
@@ -6902,6 +6943,11 @@ packages:
resolution: {integrity: sha512-I8oW2+QL5KJo8zXNWX046M134WchxsXC7SawLPvRQpogCbkyQIaFxPE89A2HiwR7vAK2Dm2ERBAmyjTYGYEpBg==}
hasBin: true
+ livekit-client@2.17.2:
+ resolution: {integrity: sha512-+67y2EtAWZabARlY7kANl/VT1Uu1EJYR5a8qwpT2ub/uBCltsEgEDOxCIMwE9HFR5w+z41HR6GL9hyEvW/y6CQ==}
+ peerDependencies:
+ '@types/dom-mediacapture-record': ^1
+
load-tsconfig@0.2.5:
resolution: {integrity: sha512-IXO6OCs9yg8tMKzfPZ1YmheJbZCiEsnBdcB03l0OcfK9prKnJb96siuHCr5Fl37/yo9DnKU+TLpxzTUspw9shg==}
engines: {node: ^12.20.0 || ^14.13.1 || >=16.0.0}
@@ -6944,6 +6990,10 @@ packages:
resolution: {integrity: sha512-8XPvpAA8uyhfteu8pIvQxpJZ7SYYdpUivZpGy6sFsBuKRY/7rQGavedeB8aK+Zkyq6upMFVL/9AW6vOYzfRyLg==}
engines: {node: '>=10'}
+ loglevel@1.9.2:
+ resolution: {integrity: sha512-HgMmCqIJSAKqo68l0rS2AanEWfkxaZ5wNiEFb5ggm08lDs9Xl2KxBlX3PTcaD2chBM1gXAYf491/M2Rv8Jwayg==}
+ engines: {node: '>= 0.6.0'}
+
long@5.3.2:
resolution: {integrity: sha512-mNAgZ1GmyNhD7AuqnTG3/VQ26o760+ZYBPKjPvugO8+nLbYfX6TVpJPseBvopbdY+qpZ/lKUnmEc1LeZYS3QAA==}
@@ -8053,6 +8103,13 @@ packages:
scule@1.3.0:
resolution: {integrity: sha512-6FtHJEvt+pVMIB9IBY+IcCJ6Z5f1iQnytgyfKMhDKgmzYG+TeH/wx1y3l27rshSbLiSanrR9ffZDrEsmjlQF2g==}
+ sdp-transform@2.15.0:
+ resolution: {integrity: sha512-KrOH82c/W+GYQ0LHqtr3caRpM3ITglq3ljGUIb8LTki7ByacJZ9z+piSGiwZDsRyhQbYBOBJgr2k6X4BZXi3Kw==}
+ hasBin: true
+
+ sdp@3.2.1:
+ resolution: {integrity: sha512-lwsAIzOPlH8/7IIjjz3K0zYBk7aBVVcvjMwt3M4fLxpjMYyy7i3I97SLHebgn4YBjirkzfp3RvRDWSKsh/+WFw==}
+
semver@6.3.1:
resolution: {integrity: sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==}
hasBin: true
@@ -8546,6 +8603,9 @@ packages:
peerDependencies:
typescript: '>=4.8.4'
+ ts-debounce@4.0.0:
+ resolution: {integrity: sha512-+1iDGY6NmOGidq7i7xZGA4cm8DAa6fqdYcvO5Z6yBevH++Bdo9Qt/mN0TzHUgcCcKv1gmh9+W5dHqz8pMWbCbg==}
+
ts-declaration-location@1.0.7:
resolution: {integrity: sha512-EDyGAwH1gO0Ausm9gV6T2nUvBgXT5kGoCMJPllOaooZ+4VvJiKBdZE7wK18N1deEowhcUptS+5GXZK8U/fvpwA==}
peerDependencies:
@@ -8642,6 +8702,9 @@ packages:
resolution: {integrity: sha512-OZs6gsjF4vMp32qrCbiVSkrFmXtG/AZhY3t0iAMrMBiAZyV9oALtXO8hsrHbMXF9x6L3grlFuwW2oAz7cav+Gw==}
engines: {node: '>= 0.6'}
+ typed-emitter@2.1.0:
+ resolution: {integrity: sha512-g/KzbYKbH5C2vPkaXGu8DJlHrGKHLsM25Zg9WuC9pMGfuvT+X25tZQWo5fK1BjBm8+UrVE9LDCvaY0CQk+fXDA==}
+
typedoc-plugin-frontmatter@1.3.0:
resolution: {integrity: sha512-xYQFMAecMlsRUjmf9oM/Sq2FVz4zlgcbIeVFNLdO118CHTN06gIKJNSlyExh9+Xl8sK0YhIvoQwViUURxritWA==}
peerDependencies:
@@ -9284,6 +9347,10 @@ packages:
webpack-virtual-modules@0.6.2:
resolution: {integrity: sha512-66/V2i5hQanC51vBQKPH4aI8NMAcBW59FVBs+rC7eGHupMyfn34q7rZIE+ETlJ+XTevqfUhVVBgSUNSW2flEUQ==}
+ webrtc-adapter@9.0.4:
+ resolution: {integrity: sha512-5ZZY1+lGq8LEKuDlg9M2RPJHlH3R7OVwyHqMcUsLKCgd9Wvf+QrFTCItkXXYPmrJn8H6gRLXbSgxLLdexiqHxw==}
+ engines: {node: '>=6.0.0', npm: '>=3.10.0'}
+
whatwg-encoding@3.1.1:
resolution: {integrity: sha512-6qN4hJdMwfYBtE3YBTTHhoeuUrDBPZmbQaxWAqSALV/MeEnR5z1xd8UKud2RAkFoPkmB+hli1TZSnyi84xz1vQ==}
engines: {node: '>=18'}
@@ -9438,6 +9505,12 @@ packages:
snapshots:
+ '@11labs/client@0.2.0(@types/dom-mediacapture-record@1.0.22)':
+ dependencies:
+ livekit-client: 2.17.2(@types/dom-mediacapture-record@1.0.22)
+ transitivePeerDependencies:
+ - '@types/dom-mediacapture-record'
+
'@acemir/cssom@0.9.29': {}
'@alcyone-labs/zod-to-json-schema@4.0.10(zod@4.2.1)':
@@ -9687,6 +9760,8 @@ snapshots:
'@bcoe/v8-coverage@1.0.2': {}
+ '@bufbuild/protobuf@1.10.1': {}
+
'@changesets/apply-release-plan@7.0.14':
dependencies:
'@changesets/config': 3.1.2
@@ -10361,6 +10436,12 @@ snapshots:
'@jridgewell/resolve-uri': 3.1.2
'@jridgewell/sourcemap-codec': 1.5.5
+ '@livekit/mutex@1.1.1': {}
+
+ '@livekit/protocol@1.44.0':
+ dependencies:
+ '@bufbuild/protobuf': 1.10.1
+
'@manypkg/find-root@1.1.0':
dependencies:
'@babel/runtime': 7.28.4
@@ -12903,6 +12984,8 @@ snapshots:
'@types/deep-eql@4.0.2': {}
+ '@types/dom-mediacapture-record@1.0.22': {}
+
'@types/estree-jsx@1.0.5':
dependencies:
'@types/estree': 1.0.8
@@ -15599,6 +15682,8 @@ snapshots:
jju@1.4.0: {}
+ jose@6.2.0: {}
+
joycon@3.1.1: {}
js-beautify@1.15.4:
@@ -15824,6 +15909,20 @@ snapshots:
untun: 0.1.3
uqr: 0.1.2
+ livekit-client@2.17.2(@types/dom-mediacapture-record@1.0.22):
+ dependencies:
+ '@livekit/mutex': 1.1.1
+ '@livekit/protocol': 1.44.0
+ '@types/dom-mediacapture-record': 1.0.22
+ events: 3.3.0
+ jose: 6.2.0
+ loglevel: 1.9.2
+ sdp-transform: 2.15.0
+ ts-debounce: 4.0.0
+ tslib: 2.8.1
+ typed-emitter: 2.1.0
+ webrtc-adapter: 9.0.4
+
load-tsconfig@0.2.5: {}
local-pkg@0.5.1:
@@ -15862,6 +15961,8 @@ snapshots:
chalk: 4.1.2
is-unicode-supported: 0.1.0
+ loglevel@1.9.2: {}
+
long@5.3.2: {}
longest-streak@3.1.0: {}
@@ -17515,6 +17616,10 @@ snapshots:
scule@1.3.0: {}
+ sdp-transform@2.15.0: {}
+
+ sdp@3.2.1: {}
+
semver@6.3.1: {}
semver@7.5.4:
@@ -18036,6 +18141,8 @@ snapshots:
dependencies:
typescript: 5.9.3
+ ts-debounce@4.0.0: {}
+
ts-declaration-location@1.0.7(typescript@5.9.3):
dependencies:
picomatch: 4.0.3
@@ -18143,6 +18250,10 @@ snapshots:
media-typer: 1.1.0
mime-types: 3.0.2
+ typed-emitter@2.1.0:
+ optionalDependencies:
+ rxjs: 7.8.2
+
typedoc-plugin-frontmatter@1.3.0(typedoc-plugin-markdown@4.9.0(typedoc@0.28.14(typescript@5.9.3))):
dependencies:
typedoc-plugin-markdown: 4.9.0(typedoc@0.28.14(typescript@5.9.3))
@@ -18908,6 +19019,10 @@ snapshots:
webpack-virtual-modules@0.6.2: {}
+ webrtc-adapter@9.0.4:
+ dependencies:
+ sdp: 3.2.1
+
whatwg-encoding@3.1.1:
dependencies:
iconv-lite: 0.6.3