HarnessLab · Copilot · Apr 22, 2026 · Apr 22, 2026
diff --git a/src/agent_runtime.py b/src/agent_runtime.py
@@ -56,7 +56,7 @@
     ToolExecutionResult,
     UsageStats,
 )
-from .openai_compat import OpenAICompatClient, OpenAICompatError
+from .openai_compat import OpenAICompatClient, OpenAICompatError, extract_tool_calls_from_content
 from .plan_runtime import PlanRuntime
 from .plugin_runtime import PluginRuntime
 from .remote_runtime import RemoteRuntime
@@ -1212,9 +1212,37 @@ def _query_model(
             usage=usage,
         )
         assistant_message = session.messages[assistant_index]
+        turn_content = assistant_message.content
+        turn_tool_calls = self._tool_calls_from_message(assistant_message.tool_calls)
+
+        # Fallback: some models (e.g. Qwen without native function-calling
+        # support) embed tool calls as <tool_call> blocks in the content.
+        if not turn_tool_calls and turn_content:
+            extracted, cleaned_content = extract_tool_calls_from_content(turn_content)
+            if extracted:
+                turn_tool_calls = tuple(extracted)
+                turn_content = cleaned_content
+                openai_tool_calls = tuple(
+                    {
+                        'id': tc.id,
+                        'type': 'function',
+                        'function': {
+                            'name': tc.name,
+                            'arguments': json.dumps(tc.arguments, ensure_ascii=True),
+                        },
+                    }
+                    for tc in extracted
+                )
+                session.messages[assistant_index] = replace(
+                    assistant_message,
+                    content=cleaned_content,
+                    tool_calls=openai_tool_calls,
+                )
+                assistant_message = session.messages[assistant_index]
+
         turn = AssistantTurn(
-            content=assistant_message.content,
-            tool_calls=self._tool_calls_from_message(assistant_message.tool_calls),
+            content=turn_content,
+            tool_calls=turn_tool_calls,
             finish_reason=finish_reason,
             raw_message=assistant_message.to_openai_message(),
             usage=usage,

diff --git a/src/openai_compat.py b/src/openai_compat.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import json
+import re
 from typing import Any, Iterator
 from urllib import error, request
 
@@ -131,6 +132,46 @@ def _build_response_format(
     }
 
 
+_TOOL_CALL_RE = re.compile(r'<tool_call>\s*(.*?)\s*</tool_call>', re.DOTALL)
+
+
+def extract_tool_calls_from_content(content: str) -> tuple[list[ToolCall], str]:
+    """Extract embedded <tool_call> blocks from content text (Qwen/Hermes format).
+
+    Some models (e.g. Qwen served without native function-calling support) emit
+    tool invocations as ``<tool_call>{...}</tool_call>`` blocks inside the
+    assistant content instead of the structured ``tool_calls`` field.  This
+    helper parses those blocks and returns the tool calls together with the
+    content string stripped of the ``<tool_call>`` blocks.
+
+    Returns ``(tool_calls, cleaned_content)``.
+    """
+    tool_calls: list[ToolCall] = []
+
+    def _replace(match: re.Match) -> str:  # type: ignore[type-arg]
+        raw_json = match.group(1)
+        try:
+            payload = json.loads(raw_json)
+        except json.JSONDecodeError:
+            return match.group(0)
+        if not isinstance(payload, dict):
+            return match.group(0)
+        name = payload.get('name')
+        if not isinstance(name, str) or not name:
+            return match.group(0)
+        arguments = payload.get('arguments') or payload.get('parameters') or {}
+        if not isinstance(arguments, dict):
+            arguments = {}
+        call_id = f'call_{len(tool_calls)}'
+        tool_calls.append(ToolCall(id=call_id, name=name, arguments=arguments))
+        return ''
+
+    cleaned = _TOOL_CALL_RE.sub(_replace, content)
+    if tool_calls:
+        cleaned = cleaned.strip()
+    return tool_calls, cleaned
+
+
 class OpenAICompatClient:
     """Minimal OpenAI-compatible chat client for local model servers."""
 
@@ -166,6 +207,12 @@ def complete(
         content = _normalize_content(message.get('content'))
         tool_calls = self._parse_tool_calls_from_message(message)
 
+        # Fallback: some models (e.g. Qwen without native function-calling
+        # support) embed tool calls as <tool_call> blocks in the content.
+        if not tool_calls and content:
+            extracted, content = extract_tool_calls_from_content(content)
+            tool_calls = extracted
+
         finish_reason = first_choice.get('finish_reason')
         if finish_reason is not None and not isinstance(finish_reason, str):
             finish_reason = str(finish_reason)

diff --git a/tests/test_agent_runtime.py b/tests/test_agent_runtime.py
@@ -184,7 +184,89 @@ def test_openai_client_parses_tool_calls(self) -> None:
         self.assertEqual(turn.tool_calls[0].name, 'read_file')
         self.assertEqual(turn.tool_calls[0].arguments['path'], 'hello.txt')
 
-    def test_openai_client_streams_content_and_usage(self) -> None:
+    def test_openai_client_parses_tool_calls_from_content(self) -> None:
+        """Qwen/Hermes models may embed tool calls as <tool_call> blocks in content."""
+        tool_call_json = '{"name": "write_file", "arguments": {"path": "out.txt", "content": "hi"}}'
+        responses = [
+            {
+                'choices': [
+                    {
+                        'message': {
+                            'role': 'assistant',
+                            'content': f'<tool_call>\n{tool_call_json}\n</tool_call>',
+                        },
+                        'finish_reason': 'stop',
+                    }
+                ],
+                'usage': {'prompt_tokens': 10, 'completion_tokens': 20},
+            }
+        ]
+        with patch('src.openai_compat.request.urlopen', side_effect=make_urlopen_side_effect(responses)):
+            client = OpenAICompatClient(
+                ModelConfig(
+                    model='Qwen/Qwen3-Coder-30B-A3B-Instruct',
+                    base_url='http://127.0.0.1:8000/v1',
+                )
+            )
+            turn = client.complete(
+                messages=[{'role': 'user', 'content': 'write out.txt'}],
+                tools=[],
+            )
+        self.assertEqual(turn.content, '')
+        self.assertEqual(len(turn.tool_calls), 1)
+        self.assertEqual(turn.tool_calls[0].name, 'write_file')
+        self.assertEqual(turn.tool_calls[0].arguments['path'], 'out.txt')
+        self.assertEqual(turn.tool_calls[0].arguments['content'], 'hi')
+
+    def test_agent_executes_embedded_tool_calls_from_content(self) -> None:
+        """End-to-end: agent creates a file when the model uses <tool_call> content format."""
+        tool_call_json = '{"name": "write_file", "arguments": {"path": "out.txt", "content": "hello"}}'
+        responses = [
+            {
+                'choices': [
+                    {
+                        'message': {
+                            'role': 'assistant',
+                            'content': f'<tool_call>\n{tool_call_json}\n</tool_call>',
+                        },
+                        'finish_reason': 'stop',
+                    }
+                ],
+                'usage': {'prompt_tokens': 10, 'completion_tokens': 20},
+            },
+            {
+                'choices': [
+                    {
+                        'message': {
+                            'role': 'assistant',
+                            'content': 'Done.',
+                        },
+                        'finish_reason': 'stop',
+                    }
+                ],
+                'usage': {'prompt_tokens': 5, 'completion_tokens': 2},
+            },
+        ]
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            workspace = Path(tmp_dir)
+            with patch('src.openai_compat.request.urlopen', side_effect=make_urlopen_side_effect(responses)):
+                agent = LocalCodingAgent(
+                    model_config=ModelConfig(
+                        model='Qwen/Qwen3-Coder-30B-A3B-Instruct',
+                        base_url='http://127.0.0.1:8000/v1',
+                    ),
+                    runtime_config=AgentRuntimeConfig(
+                        cwd=workspace,
+                        permissions=AgentPermissions(allow_file_write=True),
+                    ),
+                )
+                result = agent.run('Create out.txt')
+            self.assertTrue((workspace / 'out.txt').exists())
+            self.assertEqual((workspace / 'out.txt').read_text(), 'hello')
+        self.assertEqual(result.final_output, 'Done.')
+        self.assertEqual(result.tool_calls, 1)
+
+
         responses = [
             [
                 {'choices': [{'delta': {'content': 'Hello '}, 'finish_reason': None}]},