From 3d7832a750b589424d8ee06bb76bbfa037b1965a Mon Sep 17 00:00:00 2001 From: Liang Jiaqing Date: Thu, 16 Apr 2026 12:56:17 +0800 Subject: [PATCH] feat: multi code_run support, inline_eval logging & cwd fix, tool reinject upgrade --- assets/tool_usable_history.json | 1 + assets/tools_schema.json | 18 ++++++++++-------- assets/tools_schema_cn.json | 18 ++++++++++-------- frontends/stapp.py | 17 ++++++++++++----- ga.py | 14 ++++++++------ memory/plan_sop.md | 2 +- 6 files changed, 42 insertions(+), 28 deletions(-) create mode 100644 assets/tool_usable_history.json diff --git a/assets/tool_usable_history.json b/assets/tool_usable_history.json new file mode 100644 index 0000000..1539e94 --- /dev/null +++ b/assets/tool_usable_history.json @@ -0,0 +1 @@ +[{"role":"user","content":[{"type":"text","text":"code_run个helloworld,根据结果简单评价你的工具配置"}]},{"role":"assistant","content":[{"type":"text","text":"\n\n用户要求code_run helloworld并评价工具配置\n\n```python\nprint(\"Hello, World!\")\n```"},{"type":"tool_use","id":"tooluse_GlmTNw190WGgK4sBBOiDHK","name":"code_run","input":{"script":"print(\"Hello, World!\")"}}]},{"role":"user","content":[{"type":"tool_result","tool_use_id":"tooluse_GlmTNw190WGgK4sBBOiDHK","content":"{\"status\": \"success\", \"stdout\": \"Hello, World!\\r\\n\", \"exit_code\": 0}"},{"type":"text","text":"\n### [WORKING MEMORY]\n\n\n[USER]: code_run个helloworld,根据结果简单评价你的工具配置\n\nCurrent turn: 1\n"}]},{"role":"assistant","content":[{"type":"text","text":"\n\ncode_run helloworld成功,评价工具配置\n\n工具配置没问题。`code_run` 返回了结构化的 JSON(`status`、`stdout`、`exit_code`),执行链路通畅,cwd 默认落在 temp 目录。\n\n刚才还验证了 `inline_eval` 能正确访问 agent 内部状态并导出 history,`file_patch` 也能精准改文件。整套工具链从代码执行、文件读写、到 agent 内省都是通的。"}]}] \ No newline at end of file diff --git a/assets/tools_schema.json b/assets/tools_schema.json index 42e1fb7..e0b281c 100644 --- a/assets/tools_schema.json +++ b/assets/tools_schema.json @@ -1,19 +1,20 @@ [ {"type": "function", "function": { "name": "code_run", - "description": "Code executor. Prefer python. No concurrent calls. Prefer code in ```python/powershell blocks in reply body to avoid escaping. No hardcoding bulk data", + "description": "Code executor. Prefer python. Multi-call OK, use script param. Reply code block is executed if no script arg; prefer for single call to avoid escaping. No hardcoding bulk data", "parameters": {"type": "object", "properties": { "script": {"type": "string", "description": "[Mutually exclusive] NEVER use this param when use reply code block."}, "type": {"type": "string", "enum": ["python", "powershell"], "description": "Code type", "default": "python"}, "timeout": {"type": "integer", "description": "in seconds", "default": 60}, - "cwd": {"type": "string", "description": "Working directory, defaults to cwd"}}} + "cwd": {"type": "string", "description": "Working directory, defaults to cwd"}, + "inline_eval": {"type": "boolean", "description": "Only when usage is explicitly specified."}}} }}, {"type": "function", "function": { "name": "file_read", "description": "Read file. Read before modify for latest context and line numbers", "parameters": {"type": "object", "properties": { "path": {"type": "string", "description": "Relative or absolute"}, - "start": {"type": "integer", "description": "Start line number (1-based)", "default": 1}, + "start": {"type": "integer", "description": "Start line number (1-based)"}, "count": {"type": "integer", "description": "Number of lines to read", "default": 200}, "keyword": {"type": "string", "description": "[Optional] If provided, returns first match (case-insensitive) with context"}, "show_linenos": {"type": "boolean", "description": "Show line numbers", "default": true}}} @@ -37,17 +38,18 @@ "name": "web_scan", "description": "Get simplified HTML and tab list. Removes hidden/floating/covered elements. Call after switching pages", "parameters": {"type": "object", "properties": { - "tabs_only": {"type": "boolean", "description": "Show tab list only, no HTML", "default": false}, + "tabs_only": {"type": "boolean", "description": "Show tab list only, no HTML"}, "switch_tab_id": {"type": "string", "description": "[Optional] Tab ID to switch to"}, - "text_only": {"type": "boolean", "description": "Plain text only, no HTML", "default": false}}} + "text_only": {"type": "boolean", "description": "Plain text only, no HTML"}}} }}, {"type": "function", "function": { "name": "web_execute_js", - "description": "Execute JS to control browser. No guessing. Act accurately to reduce web_scan calls. Put code in ```javascript blocks in reply body to avoid escaping", + "description": "Execute JS. Multi-call OK with different switch_tab_id. No guessing. Act accurately to reduce web_scan calls. Execute JS in ```javascript blocks if no script arg, prefer to avoid escaping", "parameters": {"type": "object", "properties": { "script": {"type": "string", "description": "[Mutually exclusive] JS code or script path. NEVER use this param when use reply code block"}, - "save_to_file": {"type": "string", "description": "file path; **only** for long result", "default": ""}, - "no_monitor": {"type": "boolean", "description": "Skip page change monitoring, saves 2-3s. Only for reads, not for page actions", "default": false}}} + "save_to_file": {"type": "string", "description": "file path; **only** for long result"}, + "no_monitor": {"type": "boolean", "description": "Skip page change monitoring, saves 2-3s. Only for reads, not for page actions"}, + "switch_tab_id": {"type": "string", "description": "[Optional] Tab ID to switch to before executing"}}} }}, {"type": "function", "function": { "name": "update_working_checkpoint", diff --git a/assets/tools_schema_cn.json b/assets/tools_schema_cn.json index ad7a6b6..fbf729d 100644 --- a/assets/tools_schema_cn.json +++ b/assets/tools_schema_cn.json @@ -1,19 +1,20 @@ [ {"type": "function", "function": { "name": "code_run", - "description": "代码执行器。优先使用python。禁同时调用多个。为免转义问题,代码放正文 ```python/powershell 块中。禁硬编码大量数据", + "description": "代码执行器。优先使用python。支持Multi-call,并行时用script参数。无script参数时正文代码块会被执行,单次调用优先使用以免转义。禁硬编码大量数据", "parameters": {"type": "object", "properties": { "script": {"type": "string", "description": "[Optional] 要执行的代码。为免转义建议留空,改用正文代码块(与此参数互斥)"}, "type": {"type": "string", "enum": ["python", "powershell"], "description": "代码类型", "default": "python"}, "timeout": {"type": "integer", "description": "执行超时时间(秒)", "default": 60}, - "cwd": {"type": "string", "description": "工作目录,默认为当前工作目录"}}} + "cwd": {"type": "string", "description": "工作目录,默认为当前工作目录"}, + "inline_eval": {"type": "boolean", "description": "仅在明确要求时使用"}}} }}, {"type": "function", "function": { "name": "file_read", "description": "读取文件内容。建议在修改文件前先读取,以确保获取最新的上下文和行号。支持分页读取或关键字搜索", "parameters": {"type": "object", "properties": { "path": {"type": "string", "description": "文件相对或绝对路径"}, - "start": {"type": "integer", "description": "起始行号(从 1 开始)", "default": 1}, + "start": {"type": "integer", "description": "起始行号(从 1 开始)"}, "count": {"type": "integer", "description": "读取的行数", "default": 200}, "keyword": {"type": "string", "description": "可选搜索关键字。如果提供,将返回第一个匹配项(忽略大小写)及其周边的内容"}, "show_linenos": {"type": "boolean", "description": "是否显示行号,建议开启以辅助 file_patch 定位", "default": true}}} @@ -37,17 +38,18 @@ "name": "web_scan", "description": "获取当前页面的简化HTML内容和标签页列表。会移除隐藏/浮动/被遮盖的元素。切换页面后一般应先调用查看", "parameters": {"type": "object", "properties": { - "tabs_only": {"type": "boolean", "description": "仅返回标签页列表和当前标签信息,不获取HTML内容", "default": false}, + "tabs_only": {"type": "boolean", "description": "仅返回标签页列表和当前标签信息,不获取HTML内容"}, "switch_tab_id": {"type": "string", "description": "可选的标签页 ID。如果提供,系统将在扫描前切换到该标签页"}, - "text_only": {"type": "boolean", "description": "只要纯文本不要HTML信息", "default": false}}} + "text_only": {"type": "boolean", "description": "只要纯文本不要HTML"}}} }}, {"type": "function", "function": { "name": "web_execute_js", - "description": "执行 JS 控制浏览器。禁止猜测,准确操作以减少 web_scan 调用。为免转义问题,代码优先考虑放回复正文 ```javascript 块", + "description": "执行JS。支持Multi-call,用不同switch_tab_id并行操作多标签页。禁止猜测,准确操作以减少 web_scan 调用。无script参数时执行正文 ```javascript 块,以免转义", "parameters": {"type": "object", "properties": { "script": {"type": "string", "description": "[Optional] JS代码或路径。为免转义建议留空,改用正文代码块(与此参数互斥)"}, - "save_to_file": {"type": "string", "description": "结果存文件,适合返回值较长时", "default": ""}, - "no_monitor": {"type": "boolean", "description": "跳过页面变更监控,省2-3秒。仅在纯读取信息时设置,页面操作时不要设置", "default": false}}} + "save_to_file": {"type": "string", "description": "结果存文件,适合返回值较长时"}, + "no_monitor": {"type": "boolean", "description": "跳过页面变更监控,省2-3秒。仅在纯读取信息时设置,页面操作时不要设置"}, + "switch_tab_id": {"type": "string", "description": "可选的标签页 ID,切换到该标签页执行"}}} }}, {"type": "function", "function": { "name": "update_working_checkpoint", diff --git a/frontends/stapp.py b/frontends/stapp.py index a43574f..6eb9787 100644 --- a/frontends/stapp.py +++ b/frontends/stapp.py @@ -7,7 +7,8 @@ try: sys.stdout.reconfigure(errors='replace') except: pass try: sys.stderr.reconfigure(errors='replace') except: pass -sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) +script_dir = os.path.dirname(__file__) +sys.path.append(os.path.abspath(os.path.join(script_dir, '..'))) import streamlit as st import time, json, re, threading, queue @@ -41,12 +42,18 @@ def render_sidebar(): agent.next_llm(); st.rerun(scope="fragment") if st.button("强行停止任务"): agent.abort(); st.toast("已发送停止信号"); st.rerun() - if st.button("重新注入System Prompt"): - agent.llmclient.last_tools = ''; st.toast("下次将重新注入System Prompt") + if st.button("重新注入工具"): + agent.llmclient.last_tools = '' + try: + hist_path = os.path.join(script_dir, '..', 'assets', 'tool_usable_history.json') + with open(hist_path, 'r', encoding='utf-8') as f: tool_hist = json.load(f) + agent.llmclient.backend.history.extend(tool_hist) + st.toast(f"已重新注入工具,追加了 {len(tool_hist)} 条示范记录") + except Exception as e: st.toast(f"注入工具示范失败: {e}") if st.button("🐱 桌面宠物"): kwargs = {'creationflags': 0x08} if sys.platform == 'win32' else {} - pet_script = os.path.join(os.path.dirname(__file__), 'desktop_pet_v2.pyw') - if not os.path.exists(pet_script): pet_script = os.path.join(os.path.dirname(__file__), 'desktop_pet.pyw') + pet_script = os.path.join(script_dir, 'desktop_pet_v2.pyw') + if not os.path.exists(pet_script): pet_script = os.path.join(script_dir, 'desktop_pet.pyw') subprocess.Popen([sys.executable, pet_script], **kwargs) def _pet_req(q): def _do(): diff --git a/ga.py b/ga.py index 9cd9fbe..3eec2b7 100644 --- a/ga.py +++ b/ga.py @@ -295,8 +295,6 @@ class GenericAgentHandler(BaseHandler): def do_code_run(self, args, response): '''执行代码片段,有长度限制,不允许代码中放大量数据,如有需要应当通过文件读取进行。''' - if response.tool_calls and sum(1 for tc in response.tool_calls[:args.get('_index', 0)] if tc.function.name == 'code_run') > 0: - return StepOutcome("[ERROR] no multi code_run in one round!", next_prompt="\n") code_type = args.get("type", "python") code = args.get("code") or args.get("script") if not code: @@ -306,11 +304,15 @@ class GenericAgentHandler(BaseHandler): raw_path = os.path.join(self.cwd, args.get("cwd", './')) cwd = os.path.normpath(os.path.abspath(raw_path)) code_cwd = os.path.normpath(self.cwd) - if args.get("_inline_eval"): + if code_type == 'python' and args.get("inline_eval"): ns = {'handler': self, 'parent': self.parent} - try: result = repr(eval(code, ns)) - except SyntaxError: exec(code, ns); result = ns.get('_r', 'OK') - except Exception as e: result = f'Error: {e}' + old_cwd = os.getcwd() + try: + os.chdir(cwd) + try: result = repr(eval(code, ns)) + except SyntaxError: exec(code, ns); result = ns.get('_r', 'OK') + except Exception as e: result = f'Error: {e}' + finally: os.chdir(old_cwd) else: result = yield from code_run(code, code_type, timeout, cwd, code_cwd=code_cwd, stop_signal=self.code_stop_signal) next_prompt = self._get_anchor_prompt(skip=args.get('_index', 0) > 0) return StepOutcome(result, next_prompt=next_prompt) diff --git a/memory/plan_sop.md b/memory/plan_sop.md index 44909f4..a597e7c 100644 --- a/memory/plan_sop.md +++ b/memory/plan_sop.md @@ -2,7 +2,7 @@ **触发**:3步以上有依赖/多文件协同/条件分支/需并行 | **禁用**:1-2步简单任务直接做 任务开始前必须先创建工作目录 `./plan_XXX/`(XXX=任务英文短名) -使用code_run({'_inline_eval':True, 'script':'handler.enter_plan_mode("./plan_XXX/plan.md")'})进入plan模式(使用未在schema中描述的秘密参数_inline_eval) +单独使用一个code_run({'inline_eval':True, 'script':'handler.enter_plan_mode("./plan_XXX/plan.md")'})进入plan模式 ---