diff --git a/src/agent_runtime.py b/src/agent_runtime.py index 60c6ec3..fef5f88 100644 --- a/src/agent_runtime.py +++ b/src/agent_runtime.py @@ -56,7 +56,7 @@ ToolExecutionResult, UsageStats, ) -from .openai_compat import OpenAICompatClient, OpenAICompatError +from .openai_compat import OpenAICompatClient, OpenAICompatError, extract_tool_calls_from_content from .plan_runtime import PlanRuntime from .plugin_runtime import PluginRuntime from .remote_runtime import RemoteRuntime @@ -1212,9 +1212,37 @@ def _query_model( usage=usage, ) assistant_message = session.messages[assistant_index] + turn_content = assistant_message.content + turn_tool_calls = self._tool_calls_from_message(assistant_message.tool_calls) + + # Fallback: some models (e.g. Qwen without native function-calling + # support) embed tool calls as blocks in the content. + if not turn_tool_calls and turn_content: + extracted, cleaned_content = extract_tool_calls_from_content(turn_content) + if extracted: + turn_tool_calls = tuple(extracted) + turn_content = cleaned_content + openai_tool_calls = tuple( + { + 'id': tc.id, + 'type': 'function', + 'function': { + 'name': tc.name, + 'arguments': json.dumps(tc.arguments, ensure_ascii=True), + }, + } + for tc in extracted + ) + session.messages[assistant_index] = replace( + assistant_message, + content=cleaned_content, + tool_calls=openai_tool_calls, + ) + assistant_message = session.messages[assistant_index] + turn = AssistantTurn( - content=assistant_message.content, - tool_calls=self._tool_calls_from_message(assistant_message.tool_calls), + content=turn_content, + tool_calls=turn_tool_calls, finish_reason=finish_reason, raw_message=assistant_message.to_openai_message(), usage=usage, diff --git a/src/openai_compat.py b/src/openai_compat.py index c30981f..abcc2d5 100644 --- a/src/openai_compat.py +++ b/src/openai_compat.py @@ -1,6 +1,7 @@ from __future__ import annotations import json +import re from typing import Any, Iterator from urllib import error, request @@ -131,6 +132,46 @@ def _build_response_format( } +_TOOL_CALL_RE = re.compile(r'\s*(.*?)\s*', re.DOTALL) + + +def extract_tool_calls_from_content(content: str) -> tuple[list[ToolCall], str]: + """Extract embedded blocks from content text (Qwen/Hermes format). + + Some models (e.g. Qwen served without native function-calling support) emit + tool invocations as ``{...}`` blocks inside the + assistant content instead of the structured ``tool_calls`` field. This + helper parses those blocks and returns the tool calls together with the + content string stripped of the ```` blocks. + + Returns ``(tool_calls, cleaned_content)``. + """ + tool_calls: list[ToolCall] = [] + + def _replace(match: re.Match) -> str: # type: ignore[type-arg] + raw_json = match.group(1) + try: + payload = json.loads(raw_json) + except json.JSONDecodeError: + return match.group(0) + if not isinstance(payload, dict): + return match.group(0) + name = payload.get('name') + if not isinstance(name, str) or not name: + return match.group(0) + arguments = payload.get('arguments') or payload.get('parameters') or {} + if not isinstance(arguments, dict): + arguments = {} + call_id = f'call_{len(tool_calls)}' + tool_calls.append(ToolCall(id=call_id, name=name, arguments=arguments)) + return '' + + cleaned = _TOOL_CALL_RE.sub(_replace, content) + if tool_calls: + cleaned = cleaned.strip() + return tool_calls, cleaned + + class OpenAICompatClient: """Minimal OpenAI-compatible chat client for local model servers.""" @@ -166,6 +207,12 @@ def complete( content = _normalize_content(message.get('content')) tool_calls = self._parse_tool_calls_from_message(message) + # Fallback: some models (e.g. Qwen without native function-calling + # support) embed tool calls as blocks in the content. + if not tool_calls and content: + extracted, content = extract_tool_calls_from_content(content) + tool_calls = extracted + finish_reason = first_choice.get('finish_reason') if finish_reason is not None and not isinstance(finish_reason, str): finish_reason = str(finish_reason) diff --git a/tests/test_agent_runtime.py b/tests/test_agent_runtime.py index ecdbc0d..aa20a03 100644 --- a/tests/test_agent_runtime.py +++ b/tests/test_agent_runtime.py @@ -184,7 +184,89 @@ def test_openai_client_parses_tool_calls(self) -> None: self.assertEqual(turn.tool_calls[0].name, 'read_file') self.assertEqual(turn.tool_calls[0].arguments['path'], 'hello.txt') - def test_openai_client_streams_content_and_usage(self) -> None: + def test_openai_client_parses_tool_calls_from_content(self) -> None: + """Qwen/Hermes models may embed tool calls as blocks in content.""" + tool_call_json = '{"name": "write_file", "arguments": {"path": "out.txt", "content": "hi"}}' + responses = [ + { + 'choices': [ + { + 'message': { + 'role': 'assistant', + 'content': f'\n{tool_call_json}\n', + }, + 'finish_reason': 'stop', + } + ], + 'usage': {'prompt_tokens': 10, 'completion_tokens': 20}, + } + ] + with patch('src.openai_compat.request.urlopen', side_effect=make_urlopen_side_effect(responses)): + client = OpenAICompatClient( + ModelConfig( + model='Qwen/Qwen3-Coder-30B-A3B-Instruct', + base_url='http://127.0.0.1:8000/v1', + ) + ) + turn = client.complete( + messages=[{'role': 'user', 'content': 'write out.txt'}], + tools=[], + ) + self.assertEqual(turn.content, '') + self.assertEqual(len(turn.tool_calls), 1) + self.assertEqual(turn.tool_calls[0].name, 'write_file') + self.assertEqual(turn.tool_calls[0].arguments['path'], 'out.txt') + self.assertEqual(turn.tool_calls[0].arguments['content'], 'hi') + + def test_agent_executes_embedded_tool_calls_from_content(self) -> None: + """End-to-end: agent creates a file when the model uses content format.""" + tool_call_json = '{"name": "write_file", "arguments": {"path": "out.txt", "content": "hello"}}' + responses = [ + { + 'choices': [ + { + 'message': { + 'role': 'assistant', + 'content': f'\n{tool_call_json}\n', + }, + 'finish_reason': 'stop', + } + ], + 'usage': {'prompt_tokens': 10, 'completion_tokens': 20}, + }, + { + 'choices': [ + { + 'message': { + 'role': 'assistant', + 'content': 'Done.', + }, + 'finish_reason': 'stop', + } + ], + 'usage': {'prompt_tokens': 5, 'completion_tokens': 2}, + }, + ] + with tempfile.TemporaryDirectory() as tmp_dir: + workspace = Path(tmp_dir) + with patch('src.openai_compat.request.urlopen', side_effect=make_urlopen_side_effect(responses)): + agent = LocalCodingAgent( + model_config=ModelConfig( + model='Qwen/Qwen3-Coder-30B-A3B-Instruct', + base_url='http://127.0.0.1:8000/v1', + ), + runtime_config=AgentRuntimeConfig( + cwd=workspace, + permissions=AgentPermissions(allow_file_write=True), + ), + ) + result = agent.run('Create out.txt') + self.assertTrue((workspace / 'out.txt').exists()) + self.assertEqual((workspace / 'out.txt').read_text(), 'hello') + self.assertEqual(result.final_output, 'Done.') + self.assertEqual(result.tool_calls, 1) + + responses = [ [ {'choices': [{'delta': {'content': 'Hello '}, 'finish_reason': None}]},