Skip to content

Commit 8fe0ab8

Browse files
feat(api): realtime 2
1 parent 13c639c commit 8fe0ab8

24 files changed

Lines changed: 198 additions & 53 deletions

.stats.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
configured_endpoints: 233
2-
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai/openai-08cb8ed18dfe4a9fa518e278576d3cfe5710cb5c22789cf80826c900569bcf56.yml
3-
openapi_spec_hash: 20f820c94f54741b75d719f6a7371c12
4-
config_hash: f291a449469edfe61a28424e548899b2
2+
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai/openai-371f497afe4d6070f6e252e5febbe8f453c7058a8dff0c26a01b4d88442a4ac2.yml
3+
openapi_spec_hash: d39f46e8fda45f77096448105efd175a
4+
config_hash: b64135fff1fe9cf4069b9ecf59ae8b07

src/openai/resources/realtime/api.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ from openai.types.realtime import (
5858
RealtimeMcpToolCall,
5959
RealtimeMcpToolExecutionError,
6060
RealtimeMcphttpError,
61+
RealtimeReasoning,
62+
RealtimeReasoningEffort,
6163
RealtimeResponse,
6264
RealtimeResponseCreateAudioOutput,
6365
RealtimeResponseCreateMcpTool,
@@ -130,7 +132,6 @@ Types:
130132

131133
```python
132134
from openai.types.realtime import (
133-
RealtimeSessionClientSecret,
134135
RealtimeSessionCreateResponse,
135136
RealtimeTranscriptionSessionCreateResponse,
136137
RealtimeTranscriptionSessionTurnDetection,

src/openai/resources/realtime/calls.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
call_reject_params,
2929
)
3030
from ...types.responses.response_prompt_param import ResponsePromptParam
31+
from ...types.realtime.realtime_reasoning_param import RealtimeReasoningParam
3132
from ...types.realtime.realtime_truncation_param import RealtimeTruncationParam
3233
from ...types.realtime.realtime_audio_config_param import RealtimeAudioConfigParam
3334
from ...types.realtime.realtime_tools_config_param import RealtimeToolsConfigParam
@@ -121,6 +122,7 @@ def accept(
121122
Literal[
122123
"gpt-realtime",
123124
"gpt-realtime-1.5",
125+
"gpt-realtime-2",
124126
"gpt-realtime-2025-08-28",
125127
"gpt-4o-realtime-preview",
126128
"gpt-4o-realtime-preview-2024-10-01",
@@ -139,7 +141,9 @@ def accept(
139141
]
140142
| Omit = omit,
141143
output_modalities: List[Literal["text", "audio"]] | Omit = omit,
144+
parallel_tool_calls: bool | Omit = omit,
142145
prompt: Optional[ResponsePromptParam] | Omit = omit,
146+
reasoning: RealtimeReasoningParam | Omit = omit,
143147
tool_choice: RealtimeToolChoiceConfigParam | Omit = omit,
144148
tools: RealtimeToolsConfigParam | Omit = omit,
145149
tracing: Optional[RealtimeTracingConfigParam] | Omit = omit,
@@ -188,9 +192,14 @@ def accept(
188192
can be used to make the model respond with text only. It is not possible to
189193
request both `text` and `audio` at the same time.
190194
195+
parallel_tool_calls: Whether the model may call multiple tools in parallel. Only supported by
196+
reasoning Realtime models such as `gpt-realtime-2`.
197+
191198
prompt: Reference to a prompt template and its variables.
192199
[Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
193200
201+
reasoning: Configuration for reasoning-capable Realtime models such as `gpt-realtime-2`.
202+
194203
tool_choice: How the model chooses tools. Provide one of the string modes or force a specific
195204
function/MCP tool.
196205
@@ -245,7 +254,9 @@ def accept(
245254
"max_output_tokens": max_output_tokens,
246255
"model": model,
247256
"output_modalities": output_modalities,
257+
"parallel_tool_calls": parallel_tool_calls,
248258
"prompt": prompt,
259+
"reasoning": reasoning,
249260
"tool_choice": tool_choice,
250261
"tools": tools,
251262
"tracing": tracing,
@@ -471,6 +482,7 @@ async def accept(
471482
Literal[
472483
"gpt-realtime",
473484
"gpt-realtime-1.5",
485+
"gpt-realtime-2",
474486
"gpt-realtime-2025-08-28",
475487
"gpt-4o-realtime-preview",
476488
"gpt-4o-realtime-preview-2024-10-01",
@@ -489,7 +501,9 @@ async def accept(
489501
]
490502
| Omit = omit,
491503
output_modalities: List[Literal["text", "audio"]] | Omit = omit,
504+
parallel_tool_calls: bool | Omit = omit,
492505
prompt: Optional[ResponsePromptParam] | Omit = omit,
506+
reasoning: RealtimeReasoningParam | Omit = omit,
493507
tool_choice: RealtimeToolChoiceConfigParam | Omit = omit,
494508
tools: RealtimeToolsConfigParam | Omit = omit,
495509
tracing: Optional[RealtimeTracingConfigParam] | Omit = omit,
@@ -538,9 +552,14 @@ async def accept(
538552
can be used to make the model respond with text only. It is not possible to
539553
request both `text` and `audio` at the same time.
540554
555+
parallel_tool_calls: Whether the model may call multiple tools in parallel. Only supported by
556+
reasoning Realtime models such as `gpt-realtime-2`.
557+
541558
prompt: Reference to a prompt template and its variables.
542559
[Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
543560
561+
reasoning: Configuration for reasoning-capable Realtime models such as `gpt-realtime-2`.
562+
544563
tool_choice: How the model chooses tools. Provide one of the string modes or force a specific
545564
function/MCP tool.
546565
@@ -595,7 +614,9 @@ async def accept(
595614
"max_output_tokens": max_output_tokens,
596615
"model": model,
597616
"output_modalities": output_modalities,
617+
"parallel_tool_calls": parallel_tool_calls,
598618
"prompt": prompt,
619+
"reasoning": reasoning,
599620
"tool_choice": tool_choice,
600621
"tools": tools,
601622
"tracing": tracing,

src/openai/types/realtime/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from .call_accept_params import CallAcceptParams as CallAcceptParams
1010
from .call_create_params import CallCreateParams as CallCreateParams
1111
from .call_reject_params import CallRejectParams as CallRejectParams
12+
from .realtime_reasoning import RealtimeReasoning as RealtimeReasoning
1213
from .audio_transcription import AudioTranscription as AudioTranscription
1314
from .log_prob_properties import LogProbProperties as LogProbProperties
1415
from .realtime_truncation import RealtimeTruncation as RealtimeTruncation
@@ -38,11 +39,13 @@
3839
from .realtime_response_usage import RealtimeResponseUsage as RealtimeResponseUsage
3940
from .realtime_tracing_config import RealtimeTracingConfig as RealtimeTracingConfig
4041
from .mcp_list_tools_completed import McpListToolsCompleted as McpListToolsCompleted
42+
from .realtime_reasoning_param import RealtimeReasoningParam as RealtimeReasoningParam
4143
from .realtime_response_status import RealtimeResponseStatus as RealtimeResponseStatus
4244
from .response_mcp_call_failed import ResponseMcpCallFailed as ResponseMcpCallFailed
4345
from .response_text_done_event import ResponseTextDoneEvent as ResponseTextDoneEvent
4446
from .audio_transcription_param import AudioTranscriptionParam as AudioTranscriptionParam
4547
from .rate_limits_updated_event import RateLimitsUpdatedEvent as RateLimitsUpdatedEvent
48+
from .realtime_reasoning_effort import RealtimeReasoningEffort as RealtimeReasoningEffort
4649
from .realtime_truncation_param import RealtimeTruncationParam as RealtimeTruncationParam
4750
from .response_audio_done_event import ResponseAudioDoneEvent as ResponseAudioDoneEvent
4851
from .response_text_delta_event import ResponseTextDeltaEvent as ResponseTextDeltaEvent
@@ -75,7 +78,6 @@
7578
from .conversation_item_delete_event import ConversationItemDeleteEvent as ConversationItemDeleteEvent
7679
from .input_audio_buffer_clear_event import InputAudioBufferClearEvent as InputAudioBufferClearEvent
7780
from .realtime_mcp_approval_response import RealtimeMcpApprovalResponse as RealtimeMcpApprovalResponse
78-
from .realtime_session_client_secret import RealtimeSessionClientSecret as RealtimeSessionClientSecret
7981
from .conversation_item_created_event import ConversationItemCreatedEvent as ConversationItemCreatedEvent
8082
from .conversation_item_deleted_event import ConversationItemDeletedEvent as ConversationItemDeletedEvent
8183
from .input_audio_buffer_append_event import InputAudioBufferAppendEvent as InputAudioBufferAppendEvent

src/openai/types/realtime/audio_transcription.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,13 @@
99

1010

1111
class AudioTranscription(BaseModel):
12+
delay: Optional[Literal["minimal", "low", "medium", "high", "xhigh"]] = None
13+
"""
14+
Controls how long the model waits before emitting transcription text. Higher
15+
values can improve transcription accuracy at the cost of latency. Only supported
16+
with `gpt-realtime-whisper` in GA Realtime sessions.
17+
"""
18+
1219
language: Optional[str] = None
1320
"""The language of the input audio.
1421
@@ -25,15 +32,16 @@ class AudioTranscription(BaseModel):
2532
"gpt-4o-mini-transcribe-2025-12-15",
2633
"gpt-4o-transcribe",
2734
"gpt-4o-transcribe-diarize",
35+
"gpt-realtime-whisper",
2836
],
2937
None,
3038
] = None
3139
"""The model to use for transcription.
3240
3341
Current options are `whisper-1`, `gpt-4o-mini-transcribe`,
34-
`gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, and
35-
`gpt-4o-transcribe-diarize`. Use `gpt-4o-transcribe-diarize` when you need
36-
diarization with speaker labels.
42+
`gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`,
43+
`gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`. Use
44+
`gpt-4o-transcribe-diarize` when you need diarization with speaker labels.
3745
"""
3846

3947
prompt: Optional[str] = None
@@ -43,4 +51,5 @@ class AudioTranscription(BaseModel):
4351
[prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting).
4452
For `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the
4553
prompt is a free text string, for example "expect words related to technology".
54+
Prompt is not supported with `gpt-realtime-whisper` in GA Realtime sessions.
4655
"""

src/openai/types/realtime/audio_transcription_param.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,13 @@
99

1010

1111
class AudioTranscriptionParam(TypedDict, total=False):
12+
delay: Literal["minimal", "low", "medium", "high", "xhigh"]
13+
"""
14+
Controls how long the model waits before emitting transcription text. Higher
15+
values can improve transcription accuracy at the cost of latency. Only supported
16+
with `gpt-realtime-whisper` in GA Realtime sessions.
17+
"""
18+
1219
language: str
1320
"""The language of the input audio.
1421
@@ -25,14 +32,15 @@ class AudioTranscriptionParam(TypedDict, total=False):
2532
"gpt-4o-mini-transcribe-2025-12-15",
2633
"gpt-4o-transcribe",
2734
"gpt-4o-transcribe-diarize",
35+
"gpt-realtime-whisper",
2836
],
2937
]
3038
"""The model to use for transcription.
3139
3240
Current options are `whisper-1`, `gpt-4o-mini-transcribe`,
33-
`gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, and
34-
`gpt-4o-transcribe-diarize`. Use `gpt-4o-transcribe-diarize` when you need
35-
diarization with speaker labels.
41+
`gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`,
42+
`gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`. Use
43+
`gpt-4o-transcribe-diarize` when you need diarization with speaker labels.
3644
"""
3745

3846
prompt: str
@@ -42,4 +50,5 @@ class AudioTranscriptionParam(TypedDict, total=False):
4250
[prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting).
4351
For `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the
4452
prompt is a free text string, for example "expect words related to technology".
53+
Prompt is not supported with `gpt-realtime-whisper` in GA Realtime sessions.
4554
"""

src/openai/types/realtime/call_accept_params.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from typing import List, Union, Optional
66
from typing_extensions import Literal, Required, TypedDict
77

8+
from .realtime_reasoning_param import RealtimeReasoningParam
89
from .realtime_truncation_param import RealtimeTruncationParam
910
from .realtime_audio_config_param import RealtimeAudioConfigParam
1011
from .realtime_tools_config_param import RealtimeToolsConfigParam
@@ -57,6 +58,7 @@ class CallAcceptParams(TypedDict, total=False):
5758
Literal[
5859
"gpt-realtime",
5960
"gpt-realtime-1.5",
61+
"gpt-realtime-2",
6062
"gpt-realtime-2025-08-28",
6163
"gpt-4o-realtime-preview",
6264
"gpt-4o-realtime-preview-2024-10-01",
@@ -83,12 +85,21 @@ class CallAcceptParams(TypedDict, total=False):
8385
only. It is not possible to request both `text` and `audio` at the same time.
8486
"""
8587

88+
parallel_tool_calls: bool
89+
"""Whether the model may call multiple tools in parallel.
90+
91+
Only supported by reasoning Realtime models such as `gpt-realtime-2`.
92+
"""
93+
8694
prompt: Optional[ResponsePromptParam]
8795
"""
8896
Reference to a prompt template and its variables.
8997
[Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
9098
"""
9199

100+
reasoning: RealtimeReasoningParam
101+
"""Configuration for reasoning-capable Realtime models such as `gpt-realtime-2`."""
102+
92103
tool_choice: RealtimeToolChoiceConfigParam
93104
"""How the model chooses tools.
94105

src/openai/types/realtime/realtime_audio_config_input.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,4 +67,7 @@ class RealtimeAudioConfigInput(BaseModel):
6767
trails off with "uhhm", the model will score a low probability of turn end and
6868
wait longer for the user to continue speaking. This can be useful for more
6969
natural conversations, but may have a higher latency.
70+
71+
For `gpt-realtime-whisper` transcription sessions, turn detection must be set to
72+
`null`; VAD is not supported.
7073
"""

src/openai/types/realtime/realtime_audio_config_input_param.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,4 +69,7 @@ class RealtimeAudioConfigInputParam(TypedDict, total=False):
6969
trails off with "uhhm", the model will score a low probability of turn end and
7070
wait longer for the user to continue speaking. This can be useful for more
7171
natural conversations, but may have a higher latency.
72+
73+
For `gpt-realtime-whisper` transcription sessions, turn detection must be set to
74+
`null`; VAD is not supported.
7275
"""
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2+
3+
from typing import Optional
4+
5+
from ..._models import BaseModel
6+
from .realtime_reasoning_effort import RealtimeReasoningEffort
7+
8+
__all__ = ["RealtimeReasoning"]
9+
10+
11+
class RealtimeReasoning(BaseModel):
12+
"""Configuration for reasoning-capable Realtime models such as `gpt-realtime-2`."""
13+
14+
effort: Optional[RealtimeReasoningEffort] = None
15+
"""
16+
Constrains effort on reasoning for reasoning-capable Realtime models such as
17+
`gpt-realtime-2`.
18+
"""

0 commit comments

Comments
 (0)