Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 31 additions & 3 deletions src/agent_runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@
ToolExecutionResult,
UsageStats,
)
from .openai_compat import OpenAICompatClient, OpenAICompatError
from .openai_compat import OpenAICompatClient, OpenAICompatError, extract_tool_calls_from_content
from .plan_runtime import PlanRuntime
from .plugin_runtime import PluginRuntime
from .remote_runtime import RemoteRuntime
Expand Down Expand Up @@ -1212,9 +1212,37 @@ def _query_model(
usage=usage,
)
assistant_message = session.messages[assistant_index]
turn_content = assistant_message.content
turn_tool_calls = self._tool_calls_from_message(assistant_message.tool_calls)

# Fallback: some models (e.g. Qwen without native function-calling
# support) embed tool calls as <tool_call> blocks in the content.
if not turn_tool_calls and turn_content:
extracted, cleaned_content = extract_tool_calls_from_content(turn_content)
if extracted:
turn_tool_calls = tuple(extracted)
turn_content = cleaned_content
openai_tool_calls = tuple(
{
'id': tc.id,
'type': 'function',
'function': {
'name': tc.name,
'arguments': json.dumps(tc.arguments, ensure_ascii=True),
},
}
for tc in extracted
)
session.messages[assistant_index] = replace(
assistant_message,
content=cleaned_content,
tool_calls=openai_tool_calls,
)
assistant_message = session.messages[assistant_index]

turn = AssistantTurn(
content=assistant_message.content,
tool_calls=self._tool_calls_from_message(assistant_message.tool_calls),
content=turn_content,
tool_calls=turn_tool_calls,
finish_reason=finish_reason,
raw_message=assistant_message.to_openai_message(),
usage=usage,
Expand Down
47 changes: 47 additions & 0 deletions src/openai_compat.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

import json
import re
from typing import Any, Iterator
from urllib import error, request

Expand Down Expand Up @@ -131,6 +132,46 @@ def _build_response_format(
}


_TOOL_CALL_RE = re.compile(r'<tool_call>\s*(.*?)\s*</tool_call>', re.DOTALL)


def extract_tool_calls_from_content(content: str) -> tuple[list[ToolCall], str]:
"""Extract embedded <tool_call> blocks from content text (Qwen/Hermes format).

Some models (e.g. Qwen served without native function-calling support) emit
tool invocations as ``<tool_call>{...}</tool_call>`` blocks inside the
assistant content instead of the structured ``tool_calls`` field. This
helper parses those blocks and returns the tool calls together with the
content string stripped of the ``<tool_call>`` blocks.

Returns ``(tool_calls, cleaned_content)``.
"""
tool_calls: list[ToolCall] = []

def _replace(match: re.Match) -> str: # type: ignore[type-arg]
raw_json = match.group(1)
try:
payload = json.loads(raw_json)
except json.JSONDecodeError:
return match.group(0)
if not isinstance(payload, dict):
return match.group(0)
name = payload.get('name')
if not isinstance(name, str) or not name:
return match.group(0)
arguments = payload.get('arguments') or payload.get('parameters') or {}
if not isinstance(arguments, dict):
arguments = {}
call_id = f'call_{len(tool_calls)}'
tool_calls.append(ToolCall(id=call_id, name=name, arguments=arguments))
return ''

cleaned = _TOOL_CALL_RE.sub(_replace, content)
if tool_calls:
cleaned = cleaned.strip()
return tool_calls, cleaned


class OpenAICompatClient:
"""Minimal OpenAI-compatible chat client for local model servers."""

Expand Down Expand Up @@ -166,6 +207,12 @@ def complete(
content = _normalize_content(message.get('content'))
tool_calls = self._parse_tool_calls_from_message(message)

# Fallback: some models (e.g. Qwen without native function-calling
# support) embed tool calls as <tool_call> blocks in the content.
if not tool_calls and content:
extracted, content = extract_tool_calls_from_content(content)
tool_calls = extracted

finish_reason = first_choice.get('finish_reason')
if finish_reason is not None and not isinstance(finish_reason, str):
finish_reason = str(finish_reason)
Expand Down
84 changes: 83 additions & 1 deletion tests/test_agent_runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,89 @@ def test_openai_client_parses_tool_calls(self) -> None:
self.assertEqual(turn.tool_calls[0].name, 'read_file')
self.assertEqual(turn.tool_calls[0].arguments['path'], 'hello.txt')

def test_openai_client_streams_content_and_usage(self) -> None:
def test_openai_client_parses_tool_calls_from_content(self) -> None:
"""Qwen/Hermes models may embed tool calls as <tool_call> blocks in content."""
tool_call_json = '{"name": "write_file", "arguments": {"path": "out.txt", "content": "hi"}}'
responses = [
{
'choices': [
{
'message': {
'role': 'assistant',
'content': f'<tool_call>\n{tool_call_json}\n</tool_call>',
},
'finish_reason': 'stop',
}
],
'usage': {'prompt_tokens': 10, 'completion_tokens': 20},
}
]
with patch('src.openai_compat.request.urlopen', side_effect=make_urlopen_side_effect(responses)):
client = OpenAICompatClient(
ModelConfig(
model='Qwen/Qwen3-Coder-30B-A3B-Instruct',
base_url='http://127.0.0.1:8000/v1',
)
)
turn = client.complete(
messages=[{'role': 'user', 'content': 'write out.txt'}],
tools=[],
)
self.assertEqual(turn.content, '')
self.assertEqual(len(turn.tool_calls), 1)
self.assertEqual(turn.tool_calls[0].name, 'write_file')
self.assertEqual(turn.tool_calls[0].arguments['path'], 'out.txt')
self.assertEqual(turn.tool_calls[0].arguments['content'], 'hi')

def test_agent_executes_embedded_tool_calls_from_content(self) -> None:
"""End-to-end: agent creates a file when the model uses <tool_call> content format."""
tool_call_json = '{"name": "write_file", "arguments": {"path": "out.txt", "content": "hello"}}'
responses = [
{
'choices': [
{
'message': {
'role': 'assistant',
'content': f'<tool_call>\n{tool_call_json}\n</tool_call>',
},
'finish_reason': 'stop',
}
],
'usage': {'prompt_tokens': 10, 'completion_tokens': 20},
},
{
'choices': [
{
'message': {
'role': 'assistant',
'content': 'Done.',
},
'finish_reason': 'stop',
}
],
'usage': {'prompt_tokens': 5, 'completion_tokens': 2},
},
]
with tempfile.TemporaryDirectory() as tmp_dir:
workspace = Path(tmp_dir)
with patch('src.openai_compat.request.urlopen', side_effect=make_urlopen_side_effect(responses)):
agent = LocalCodingAgent(
model_config=ModelConfig(
model='Qwen/Qwen3-Coder-30B-A3B-Instruct',
base_url='http://127.0.0.1:8000/v1',
),
runtime_config=AgentRuntimeConfig(
cwd=workspace,
permissions=AgentPermissions(allow_file_write=True),
),
)
result = agent.run('Create out.txt')
self.assertTrue((workspace / 'out.txt').exists())
self.assertEqual((workspace / 'out.txt').read_text(), 'hello')
self.assertEqual(result.final_output, 'Done.')
self.assertEqual(result.tool_calls, 1)


responses = [
[
{'choices': [{'delta': {'content': 'Hello '}, 'finish_reason': None}]},
Expand Down