Defined in: realtime/types.ts
Configuration for a realtime session. Passed to the provider to configure model behavior, voice, tools, and VAD settings.
optional model: string;
Model to use for the session.
optional voice: string;
Voice to use for audio output.
optional instructions: string;
System instructions for the assistant.
optional tools: Array<RealtimeToolConfig>;
Tools available in the session.
optional vadMode: 'server' | 'semantic' | 'manual';
Voice activity detection mode.
optional vadConfig: VADConfig;
Detailed VAD configuration (threshold, padding, silence duration).
optional outputModalities: Array<'audio' | 'text'>;
Output modalities for responses (e.g., ['audio', 'text']).
optional temperature: number;
Temperature for generation (provider-specific range, e.g., 0.6-1.2 for OpenAI).
optional maxOutputTokens: number | 'inf';
Maximum number of tokens in a response.
optional semanticEagerness: 'low' | 'medium' | 'high';
Eagerness level for semantic VAD.
optional providerOptions: Record<string, any>;
Provider-specific options.