Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/models/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@ If you use a custom OpenAI-compatible endpoint or proxy, websocket transport als
- Install the `websockets` package if it is not already available in your environment.
- You can use [`Runner.run_streamed()`][agents.run.Runner.run_streamed] directly after enabling websocket transport. For multi-turn workflows where you want to reuse the same websocket connection across turns (and nested agent-as-tool calls), the [`responses_websocket_session()`][agents.responses_websocket_session] helper is recommended. See the [Running agents](../running_agents.md) guide and [`examples/basic/stream_ws.py`](https://github.com/openai/openai-agents-python/tree/main/examples/basic/stream_ws.py).
- For long reasoning turns or networks with latency spikes, customize websocket keepalive behavior with `responses_websocket_options`. Increase `ping_timeout` to tolerate delayed pong frames, or set `ping_timeout=None` to disable heartbeat timeouts while keeping pings enabled. Prefer HTTP/SSE transport when reliability is more important than websocket latency.
- By default the SDK disables the incoming message-size limit (`max_size=None`). For long-lived agent processes behind proxies or in memory-constrained containers, set `responses_websocket_options={"max_size": 8 * 1024 * 1024}` to bound per-message memory usage.

## Non-OpenAI models

Expand Down
9 changes: 9 additions & 0 deletions src/agents/models/openai_responses.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,13 @@ class OpenAIResponsesWebSocketOptions(TypedDict):
spikes.
"""

max_size: NotRequired[int | None]
"""Maximum size in bytes of an incoming websocket message.

The SDK defaults to ``None`` (no limit). Set an explicit byte limit to bound memory usage
for long-lived agent processes running behind proxies or in memory-constrained containers.
"""


class _ResponseStreamWithRequestId:
"""Wrap an SDK event stream and retain the originating request ID."""
Expand Down Expand Up @@ -1585,6 +1592,8 @@ async def _open_websocket_connection(
connect_kwargs["ping_interval"] = self._websocket_options["ping_interval"]
if "ping_timeout" in self._websocket_options:
connect_kwargs["ping_timeout"] = self._websocket_options["ping_timeout"]
if "max_size" in self._websocket_options:
connect_kwargs["max_size"] = self._websocket_options["max_size"]

return await connect(
ws_url,
Expand Down
7 changes: 7 additions & 0 deletions src/agents/realtime/openai_realtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,6 +471,11 @@ class TransportConfig(TypedDict):
handshake_timeout: NotRequired[float]
"""Time in seconds to wait for the connection handshake to complete."""

max_size: NotRequired[int | None]
"""Maximum size in bytes of an incoming websocket message.
Defaults to None (no limit). Set an explicit byte limit to bound memory usage for
long-lived connections behind proxies or in memory-constrained containers."""


class OpenAIRealtimeWebSocketModel(RealtimeModel):
"""A model that uses OpenAI's WebSocket API."""
Expand Down Expand Up @@ -589,6 +594,8 @@ async def _create_websocket_connection(
connect_kwargs["ping_timeout"] = transport_config["ping_timeout"]
if "handshake_timeout" in transport_config:
connect_kwargs["open_timeout"] = transport_config["handshake_timeout"]
if "max_size" in transport_config:
connect_kwargs["max_size"] = transport_config["max_size"]

return await websockets.connect(url, **connect_kwargs)

Expand Down
30 changes: 30 additions & 0 deletions tests/models/test_openai_responses.py
Original file line number Diff line number Diff line change
Expand Up @@ -1770,6 +1770,36 @@ async def fake_connect(ws_url: str, **kwargs: Any) -> DummyWSConnection:
assert captured_kwargs["ping_timeout"] is None


@pytest.mark.asyncio
async def test_websocket_model_passes_max_size_to_connect(monkeypatch):
import websockets.asyncio.client as websockets_client

client = DummyWSClient()
model = OpenAIResponsesWSModel(
model="gpt-4",
openai_client=client, # type: ignore[arg-type]
websocket_options={"max_size": 8 * 1024 * 1024},
)
ws = DummyWSConnection([])
captured_kwargs: dict[str, Any] = {}

async def fake_connect(ws_url: str, **kwargs: Any) -> DummyWSConnection:
captured_kwargs["ws_url"] = ws_url
captured_kwargs.update(kwargs)
return ws

monkeypatch.setattr(websockets_client, "connect", fake_connect)

opened = await model._open_websocket_connection(
"wss://example.test/v1/responses",
{"Authorization": "Bearer test-key"},
connect_timeout=10.0,
)

assert opened is ws
assert captured_kwargs["max_size"] == 8 * 1024 * 1024


@pytest.mark.allow_call_model_methods
def test_websocket_model_reconnects_when_reused_from_different_event_loop(monkeypatch):
client = DummyWSClient()
Expand Down
34 changes: 34 additions & 0 deletions tests/realtime/test_openai_realtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -2139,6 +2139,40 @@ def mock_create_task_func(coro):

assert captured_kwargs.get("open_timeout") == 0.75

@pytest.mark.asyncio
async def test_max_size_config_is_applied(self):
"""Test that max_size is passed through to websockets.connect."""
captured_kwargs: dict[str, Any] = {}

async def capture_connect(*args, **kwargs):
captured_kwargs.update(kwargs)
mock_ws = AsyncMock()
mock_ws.close_code = None
return mock_ws

transport: TransportConfig = {
"max_size": 8 * 1024 * 1024,
}
model = OpenAIRealtimeWebSocketModel(transport_config=transport)
with patch("websockets.connect", side_effect=capture_connect):
with patch("asyncio.create_task") as mock_create_task:
mock_task = AsyncMock()

def mock_create_task_func(coro):
coro.close()
return mock_task

mock_create_task.side_effect = mock_create_task_func

config: RealtimeModelConfig = {
"api_key": "test-key",
"url": "ws://localhost:8080/v1/realtime",
"initial_model_settings": {"model_name": "gpt-4o-realtime-preview"},
}
await model.connect(config)

assert captured_kwargs.get("max_size") == 8 * 1024 * 1024

@pytest.mark.asyncio
async def test_ping_timeout_disabled_vs_enabled(self):
"""Test that ping timeout can be disabled (None) vs enabled with a value."""
Expand Down