From 08e6bd8dc715d50106512345a1ed63e5627651fb Mon Sep 17 00:00:00 2001
From: Charles Cheng <charlescheng@rezona.ai>
Date: Mon, 15 Jun 2026 16:40:20 +0800
Subject: [PATCH] feat: expose configurable websocket max_size limit

Both the Responses websocket transport (OpenAIResponsesWebSocketOptions)
and the Realtime websocket transport (TransportConfig) hard-coded
max_size=None, disabling the websockets library's incoming message-size
limit with no way to override it short of forking or monkeypatching.

Add an optional max_size field to both option types and pass it through
to websockets.connect when set, mirroring the existing ping_interval /
ping_timeout knobs. The default behavior is unchanged (no limit) so this
is purely opt-in for long-lived agent processes that need to bound
per-message memory usage behind proxies or in constrained containers.
---
 docs/models/index.md                   |  1 +
 src/agents/models/openai_responses.py  |  9 +++++++
 src/agents/realtime/openai_realtime.py |  7 ++++++
 tests/models/test_openai_responses.py  | 30 +++++++++++++++++++++++
 tests/realtime/test_openai_realtime.py | 34 ++++++++++++++++++++++++++
 5 files changed, 81 insertions(+)

diff --git a/docs/models/index.md b/docs/models/index.md
index 30c3b18f4f..2ec41aedf6 100644
--- a/docs/models/index.md
+++ b/docs/models/index.md
@@ -206,6 +206,7 @@ If you use a custom OpenAI-compatible endpoint or proxy, websocket transport als
 -   Install the `websockets` package if it is not already available in your environment.
 -   You can use [`Runner.run_streamed()`][agents.run.Runner.run_streamed] directly after enabling websocket transport. For multi-turn workflows where you want to reuse the same websocket connection across turns (and nested agent-as-tool calls), the [`responses_websocket_session()`][agents.responses_websocket_session] helper is recommended. See the [Running agents](../running_agents.md) guide and [`examples/basic/stream_ws.py`](https://github.com/openai/openai-agents-python/tree/main/examples/basic/stream_ws.py).
 -   For long reasoning turns or networks with latency spikes, customize websocket keepalive behavior with `responses_websocket_options`. Increase `ping_timeout` to tolerate delayed pong frames, or set `ping_timeout=None` to disable heartbeat timeouts while keeping pings enabled. Prefer HTTP/SSE transport when reliability is more important than websocket latency.
+-   By default the SDK disables the incoming message-size limit (`max_size=None`). For long-lived agent processes behind proxies or in memory-constrained containers, set `responses_websocket_options={"max_size": 8 * 1024 * 1024}` to bound per-message memory usage.
 
 ## Non-OpenAI models
 
diff --git a/src/agents/models/openai_responses.py b/src/agents/models/openai_responses.py
index 3af75481bf..f66c6afb7b 100644
--- a/src/agents/models/openai_responses.py
+++ b/src/agents/models/openai_responses.py
@@ -210,6 +210,13 @@ class OpenAIResponsesWebSocketOptions(TypedDict):
     spikes.
     """
 
+    max_size: NotRequired[int | None]
+    """Maximum size in bytes of an incoming websocket message.
+
+    The SDK defaults to ``None`` (no limit). Set an explicit byte limit to bound memory usage
+    for long-lived agent processes running behind proxies or in memory-constrained containers.
+    """
+
 
 class _ResponseStreamWithRequestId:
     """Wrap an SDK event stream and retain the originating request ID."""
@@ -1585,6 +1592,8 @@ async def _open_websocket_connection(
             connect_kwargs["ping_interval"] = self._websocket_options["ping_interval"]
         if "ping_timeout" in self._websocket_options:
             connect_kwargs["ping_timeout"] = self._websocket_options["ping_timeout"]
+        if "max_size" in self._websocket_options:
+            connect_kwargs["max_size"] = self._websocket_options["max_size"]
 
         return await connect(
             ws_url,
diff --git a/src/agents/realtime/openai_realtime.py b/src/agents/realtime/openai_realtime.py
index 9bf7ea1308..d38f535e47 100644
--- a/src/agents/realtime/openai_realtime.py
+++ b/src/agents/realtime/openai_realtime.py
@@ -471,6 +471,11 @@ class TransportConfig(TypedDict):
     handshake_timeout: NotRequired[float]
     """Time in seconds to wait for the connection handshake to complete."""
 
+    max_size: NotRequired[int | None]
+    """Maximum size in bytes of an incoming websocket message.
+    Defaults to None (no limit). Set an explicit byte limit to bound memory usage for
+    long-lived connections behind proxies or in memory-constrained containers."""
+
 
 class OpenAIRealtimeWebSocketModel(RealtimeModel):
     """A model that uses OpenAI's WebSocket API."""
@@ -589,6 +594,8 @@ async def _create_websocket_connection(
                 connect_kwargs["ping_timeout"] = transport_config["ping_timeout"]
             if "handshake_timeout" in transport_config:
                 connect_kwargs["open_timeout"] = transport_config["handshake_timeout"]
+            if "max_size" in transport_config:
+                connect_kwargs["max_size"] = transport_config["max_size"]
 
         return await websockets.connect(url, **connect_kwargs)
 
diff --git a/tests/models/test_openai_responses.py b/tests/models/test_openai_responses.py
index 7d329da6f8..d86d435167 100644
--- a/tests/models/test_openai_responses.py
+++ b/tests/models/test_openai_responses.py
@@ -1770,6 +1770,36 @@ async def fake_connect(ws_url: str, **kwargs: Any) -> DummyWSConnection:
     assert captured_kwargs["ping_timeout"] is None
 
 
+@pytest.mark.asyncio
+async def test_websocket_model_passes_max_size_to_connect(monkeypatch):
+    import websockets.asyncio.client as websockets_client
+
+    client = DummyWSClient()
+    model = OpenAIResponsesWSModel(
+        model="gpt-4",
+        openai_client=client,  # type: ignore[arg-type]
+        websocket_options={"max_size": 8 * 1024 * 1024},
+    )
+    ws = DummyWSConnection([])
+    captured_kwargs: dict[str, Any] = {}
+
+    async def fake_connect(ws_url: str, **kwargs: Any) -> DummyWSConnection:
+        captured_kwargs["ws_url"] = ws_url
+        captured_kwargs.update(kwargs)
+        return ws
+
+    monkeypatch.setattr(websockets_client, "connect", fake_connect)
+
+    opened = await model._open_websocket_connection(
+        "wss://example.test/v1/responses",
+        {"Authorization": "Bearer test-key"},
+        connect_timeout=10.0,
+    )
+
+    assert opened is ws
+    assert captured_kwargs["max_size"] == 8 * 1024 * 1024
+
+
 @pytest.mark.allow_call_model_methods
 def test_websocket_model_reconnects_when_reused_from_different_event_loop(monkeypatch):
     client = DummyWSClient()
diff --git a/tests/realtime/test_openai_realtime.py b/tests/realtime/test_openai_realtime.py
index 87207e3160..89e41b7b11 100644
--- a/tests/realtime/test_openai_realtime.py
+++ b/tests/realtime/test_openai_realtime.py
@@ -2139,6 +2139,40 @@ def mock_create_task_func(coro):
 
         assert captured_kwargs.get("open_timeout") == 0.75
 
+    @pytest.mark.asyncio
+    async def test_max_size_config_is_applied(self):
+        """Test that max_size is passed through to websockets.connect."""
+        captured_kwargs: dict[str, Any] = {}
+
+        async def capture_connect(*args, **kwargs):
+            captured_kwargs.update(kwargs)
+            mock_ws = AsyncMock()
+            mock_ws.close_code = None
+            return mock_ws
+
+        transport: TransportConfig = {
+            "max_size": 8 * 1024 * 1024,
+        }
+        model = OpenAIRealtimeWebSocketModel(transport_config=transport)
+        with patch("websockets.connect", side_effect=capture_connect):
+            with patch("asyncio.create_task") as mock_create_task:
+                mock_task = AsyncMock()
+
+                def mock_create_task_func(coro):
+                    coro.close()
+                    return mock_task
+
+                mock_create_task.side_effect = mock_create_task_func
+
+                config: RealtimeModelConfig = {
+                    "api_key": "test-key",
+                    "url": "ws://localhost:8080/v1/realtime",
+                    "initial_model_settings": {"model_name": "gpt-4o-realtime-preview"},
+                }
+                await model.connect(config)
+
+        assert captured_kwargs.get("max_size") == 8 * 1024 * 1024
+
     @pytest.mark.asyncio
     async def test_ping_timeout_disabled_vs_enabled(self):
         """Test that ping timeout can be disabled (None) vs enabled with a value."""