From 08e6bd8dc715d50106512345a1ed63e5627651fb Mon Sep 17 00:00:00 2001 From: Charles Cheng Date: Mon, 15 Jun 2026 16:40:20 +0800 Subject: [PATCH] feat: expose configurable websocket max_size limit Both the Responses websocket transport (OpenAIResponsesWebSocketOptions) and the Realtime websocket transport (TransportConfig) hard-coded max_size=None, disabling the websockets library's incoming message-size limit with no way to override it short of forking or monkeypatching. Add an optional max_size field to both option types and pass it through to websockets.connect when set, mirroring the existing ping_interval / ping_timeout knobs. The default behavior is unchanged (no limit) so this is purely opt-in for long-lived agent processes that need to bound per-message memory usage behind proxies or in constrained containers. --- docs/models/index.md | 1 + src/agents/models/openai_responses.py | 9 +++++++ src/agents/realtime/openai_realtime.py | 7 ++++++ tests/models/test_openai_responses.py | 30 +++++++++++++++++++++++ tests/realtime/test_openai_realtime.py | 34 ++++++++++++++++++++++++++ 5 files changed, 81 insertions(+) diff --git a/docs/models/index.md b/docs/models/index.md index 30c3b18f4f..2ec41aedf6 100644 --- a/docs/models/index.md +++ b/docs/models/index.md @@ -206,6 +206,7 @@ If you use a custom OpenAI-compatible endpoint or proxy, websocket transport als - Install the `websockets` package if it is not already available in your environment. - You can use [`Runner.run_streamed()`][agents.run.Runner.run_streamed] directly after enabling websocket transport. For multi-turn workflows where you want to reuse the same websocket connection across turns (and nested agent-as-tool calls), the [`responses_websocket_session()`][agents.responses_websocket_session] helper is recommended. See the [Running agents](../running_agents.md) guide and [`examples/basic/stream_ws.py`](https://github.com/openai/openai-agents-python/tree/main/examples/basic/stream_ws.py). - For long reasoning turns or networks with latency spikes, customize websocket keepalive behavior with `responses_websocket_options`. Increase `ping_timeout` to tolerate delayed pong frames, or set `ping_timeout=None` to disable heartbeat timeouts while keeping pings enabled. Prefer HTTP/SSE transport when reliability is more important than websocket latency. +- By default the SDK disables the incoming message-size limit (`max_size=None`). For long-lived agent processes behind proxies or in memory-constrained containers, set `responses_websocket_options={"max_size": 8 * 1024 * 1024}` to bound per-message memory usage. ## Non-OpenAI models diff --git a/src/agents/models/openai_responses.py b/src/agents/models/openai_responses.py index 3af75481bf..f66c6afb7b 100644 --- a/src/agents/models/openai_responses.py +++ b/src/agents/models/openai_responses.py @@ -210,6 +210,13 @@ class OpenAIResponsesWebSocketOptions(TypedDict): spikes. """ + max_size: NotRequired[int | None] + """Maximum size in bytes of an incoming websocket message. + + The SDK defaults to ``None`` (no limit). Set an explicit byte limit to bound memory usage + for long-lived agent processes running behind proxies or in memory-constrained containers. + """ + class _ResponseStreamWithRequestId: """Wrap an SDK event stream and retain the originating request ID.""" @@ -1585,6 +1592,8 @@ async def _open_websocket_connection( connect_kwargs["ping_interval"] = self._websocket_options["ping_interval"] if "ping_timeout" in self._websocket_options: connect_kwargs["ping_timeout"] = self._websocket_options["ping_timeout"] + if "max_size" in self._websocket_options: + connect_kwargs["max_size"] = self._websocket_options["max_size"] return await connect( ws_url, diff --git a/src/agents/realtime/openai_realtime.py b/src/agents/realtime/openai_realtime.py index 9bf7ea1308..d38f535e47 100644 --- a/src/agents/realtime/openai_realtime.py +++ b/src/agents/realtime/openai_realtime.py @@ -471,6 +471,11 @@ class TransportConfig(TypedDict): handshake_timeout: NotRequired[float] """Time in seconds to wait for the connection handshake to complete.""" + max_size: NotRequired[int | None] + """Maximum size in bytes of an incoming websocket message. + Defaults to None (no limit). Set an explicit byte limit to bound memory usage for + long-lived connections behind proxies or in memory-constrained containers.""" + class OpenAIRealtimeWebSocketModel(RealtimeModel): """A model that uses OpenAI's WebSocket API.""" @@ -589,6 +594,8 @@ async def _create_websocket_connection( connect_kwargs["ping_timeout"] = transport_config["ping_timeout"] if "handshake_timeout" in transport_config: connect_kwargs["open_timeout"] = transport_config["handshake_timeout"] + if "max_size" in transport_config: + connect_kwargs["max_size"] = transport_config["max_size"] return await websockets.connect(url, **connect_kwargs) diff --git a/tests/models/test_openai_responses.py b/tests/models/test_openai_responses.py index 7d329da6f8..d86d435167 100644 --- a/tests/models/test_openai_responses.py +++ b/tests/models/test_openai_responses.py @@ -1770,6 +1770,36 @@ async def fake_connect(ws_url: str, **kwargs: Any) -> DummyWSConnection: assert captured_kwargs["ping_timeout"] is None +@pytest.mark.asyncio +async def test_websocket_model_passes_max_size_to_connect(monkeypatch): + import websockets.asyncio.client as websockets_client + + client = DummyWSClient() + model = OpenAIResponsesWSModel( + model="gpt-4", + openai_client=client, # type: ignore[arg-type] + websocket_options={"max_size": 8 * 1024 * 1024}, + ) + ws = DummyWSConnection([]) + captured_kwargs: dict[str, Any] = {} + + async def fake_connect(ws_url: str, **kwargs: Any) -> DummyWSConnection: + captured_kwargs["ws_url"] = ws_url + captured_kwargs.update(kwargs) + return ws + + monkeypatch.setattr(websockets_client, "connect", fake_connect) + + opened = await model._open_websocket_connection( + "wss://example.test/v1/responses", + {"Authorization": "Bearer test-key"}, + connect_timeout=10.0, + ) + + assert opened is ws + assert captured_kwargs["max_size"] == 8 * 1024 * 1024 + + @pytest.mark.allow_call_model_methods def test_websocket_model_reconnects_when_reused_from_different_event_loop(monkeypatch): client = DummyWSClient() diff --git a/tests/realtime/test_openai_realtime.py b/tests/realtime/test_openai_realtime.py index 87207e3160..89e41b7b11 100644 --- a/tests/realtime/test_openai_realtime.py +++ b/tests/realtime/test_openai_realtime.py @@ -2139,6 +2139,40 @@ def mock_create_task_func(coro): assert captured_kwargs.get("open_timeout") == 0.75 + @pytest.mark.asyncio + async def test_max_size_config_is_applied(self): + """Test that max_size is passed through to websockets.connect.""" + captured_kwargs: dict[str, Any] = {} + + async def capture_connect(*args, **kwargs): + captured_kwargs.update(kwargs) + mock_ws = AsyncMock() + mock_ws.close_code = None + return mock_ws + + transport: TransportConfig = { + "max_size": 8 * 1024 * 1024, + } + model = OpenAIRealtimeWebSocketModel(transport_config=transport) + with patch("websockets.connect", side_effect=capture_connect): + with patch("asyncio.create_task") as mock_create_task: + mock_task = AsyncMock() + + def mock_create_task_func(coro): + coro.close() + return mock_task + + mock_create_task.side_effect = mock_create_task_func + + config: RealtimeModelConfig = { + "api_key": "test-key", + "url": "ws://localhost:8080/v1/realtime", + "initial_model_settings": {"model_name": "gpt-4o-realtime-preview"}, + } + await model.connect(config) + + assert captured_kwargs.get("max_size") == 8 * 1024 * 1024 + @pytest.mark.asyncio async def test_ping_timeout_disabled_vs_enabled(self): """Test that ping timeout can be disabled (None) vs enabled with a value."""