feat: multi code_run support, inline_eval logging & cwd fix, tool reinject upgrade

2026-04-16 12:56:17 +08:00
parent a3c15c54af
commit 3d7832a750
6 changed files with 42 additions and 28 deletions
--- a/assets/tool_usable_history.json
+++ b/assets/tool_usable_history.json
@@ -0,0 +1 @@
+[{"role":"user","content":[{"type":"text","text":"code_run个helloworld，根据结果简单评价你的工具配置"}]},{"role":"assistant","content":[{"type":"text","text":"\n\n<summary>用户要求code_run helloworld并评价工具配置</summary>\n\n```python\nprint(\"Hello, World!\")\n```"},{"type":"tool_use","id":"tooluse_GlmTNw190WGgK4sBBOiDHK","name":"code_run","input":{"script":"print(\"Hello, World!\")"}}]},{"role":"user","content":[{"type":"tool_result","tool_use_id":"tooluse_GlmTNw190WGgK4sBBOiDHK","content":"{\"status\": \"success\", \"stdout\": \"Hello, World!\\r\\n\", \"exit_code\": 0}"},{"type":"text","text":"\n### [WORKING MEMORY]\n<history>\n\n[USER]: code_run个helloworld，根据结果简单评价你的工具配置\n</history>\nCurrent turn: 1\n"}]},{"role":"assistant","content":[{"type":"text","text":"\n\n<summary>code_run helloworld成功，评价工具配置</summary>\n\n工具配置没问题。`code_run` 返回了结构化的 JSON（`status`、`stdout`、`exit_code`），执行链路通畅，cwd 默认落在 temp 目录。\n\n刚才还验证了 `inline_eval` 能正确访问 agent 内部状态并导出 history，`file_patch` 也能精准改文件。整套工具链从代码执行、文件读写、到 agent 内省都是通的。"}]}]
--- a/assets/tools_schema.json
+++ b/assets/tools_schema.json
@@ -1,19 +1,20 @@
 [
  {"type": "function", "function": {
    "name": "code_run",
-    "description": "Code executor. Prefer python. No concurrent calls. Prefer code in ```python/powershell blocks in reply body to avoid escaping. No hardcoding bulk data",
+    "description": "Code executor. Prefer python. Multi-call OK, use script param. Reply code block is executed if no script arg; prefer for single call to avoid escaping. No hardcoding bulk data",
    "parameters": {"type": "object", "properties": {
      "script": {"type": "string", "description": "[Mutually exclusive] NEVER use this param when use reply code block."},
      "type": {"type": "string", "enum": ["python", "powershell"], "description": "Code type", "default": "python"},
      "timeout": {"type": "integer", "description": "in seconds", "default": 60},
-      "cwd": {"type": "string", "description": "Working directory, defaults to cwd"}}}
+      "cwd": {"type": "string", "description": "Working directory, defaults to cwd"},
+      "inline_eval": {"type": "boolean", "description": "Only when usage is explicitly specified."}}}
  }},
  {"type": "function", "function": {
    "name": "file_read",
    "description": "Read file. Read before modify for latest context and line numbers",
    "parameters": {"type": "object", "properties": {
      "path": {"type": "string", "description": "Relative or absolute"},
-      "start": {"type": "integer", "description": "Start line number (1-based)", "default": 1},
+      "start": {"type": "integer", "description": "Start line number (1-based)"},
      "count": {"type": "integer", "description": "Number of lines to read", "default": 200},
      "keyword": {"type": "string", "description": "[Optional] If provided, returns first match (case-insensitive) with context"},
      "show_linenos": {"type": "boolean", "description": "Show line numbers", "default": true}}}
@@ -37,17 +38,18 @@
    "name": "web_scan",
    "description": "Get simplified HTML and tab list. Removes hidden/floating/covered elements. Call after switching pages",
    "parameters": {"type": "object", "properties": {
-      "tabs_only": {"type": "boolean", "description": "Show tab list only, no HTML", "default": false},
+      "tabs_only": {"type": "boolean", "description": "Show tab list only, no HTML"},
      "switch_tab_id": {"type": "string", "description": "[Optional] Tab ID to switch to"},
-      "text_only": {"type": "boolean", "description": "Plain text only, no HTML", "default": false}}}
+      "text_only": {"type": "boolean", "description": "Plain text only, no HTML"}}}
  }},
  {"type": "function", "function": {
    "name": "web_execute_js",
-    "description": "Execute JS to control browser. No guessing. Act accurately to reduce web_scan calls. Put code in ```javascript blocks in reply body to avoid escaping",
+    "description": "Execute JS. Multi-call OK with different switch_tab_id. No guessing. Act accurately to reduce web_scan calls. Execute JS in ```javascript blocks if no script arg, prefer to avoid escaping",
    "parameters": {"type": "object", "properties": {
      "script": {"type": "string", "description": "[Mutually exclusive] JS code or script path. NEVER use this param when use reply code block"},
-      "save_to_file": {"type": "string", "description": "file path; **only** for long result", "default": ""},
-      "no_monitor": {"type": "boolean", "description": "Skip page change monitoring, saves 2-3s. Only for reads, not for page actions", "default": false}}}
+      "save_to_file": {"type": "string", "description": "file path; **only** for long result"},
+      "no_monitor": {"type": "boolean", "description": "Skip page change monitoring, saves 2-3s. Only for reads, not for page actions"},
+      "switch_tab_id": {"type": "string", "description": "[Optional] Tab ID to switch to before executing"}}}
  }},
  {"type": "function", "function": {
    "name": "update_working_checkpoint",
--- a/assets/tools_schema_cn.json
+++ b/assets/tools_schema_cn.json
@@ -1,19 +1,20 @@
 [
  {"type": "function", "function": {
    "name": "code_run",
-    "description": "代码执行器。优先使用python。禁同时调用多个。为免转义问题，代码放正文 ```python/powershell 块中。禁硬编码大量数据",
+    "description": "代码执行器。优先使用python。支持Multi-call，并行时用script参数。无script参数时正文代码块会被执行，单次调用优先使用以免转义。禁硬编码大量数据",
    "parameters": {"type": "object", "properties": {
      "script": {"type": "string", "description": "[Optional] 要执行的代码。为免转义建议留空，改用正文代码块（与此参数互斥）"},
      "type": {"type": "string", "enum": ["python", "powershell"], "description": "代码类型", "default": "python"},
      "timeout": {"type": "integer", "description": "执行超时时间（秒）", "default": 60},
-      "cwd": {"type": "string", "description": "工作目录，默认为当前工作目录"}}}
+      "cwd": {"type": "string", "description": "工作目录，默认为当前工作目录"},
+      "inline_eval": {"type": "boolean", "description": "仅在明确要求时使用"}}}
  }},
  {"type": "function", "function": {
    "name": "file_read",
    "description": "读取文件内容。建议在修改文件前先读取，以确保获取最新的上下文和行号。支持分页读取或关键字搜索",
    "parameters": {"type": "object", "properties": {
      "path": {"type": "string", "description": "文件相对或绝对路径"},
-      "start": {"type": "integer", "description": "起始行号（从 1 开始）", "default": 1},
+      "start": {"type": "integer", "description": "起始行号（从 1 开始）"},
      "count": {"type": "integer", "description": "读取的行数", "default": 200},
      "keyword": {"type": "string", "description": "可选搜索关键字。如果提供，将返回第一个匹配项（忽略大小写）及其周边的内容"},
      "show_linenos": {"type": "boolean", "description": "是否显示行号，建议开启以辅助 file_patch 定位", "default": true}}}
@@ -37,17 +38,18 @@
    "name": "web_scan",
    "description": "获取当前页面的简化HTML内容和标签页列表。会移除隐藏/浮动/被遮盖的元素。切换页面后一般应先调用查看",
    "parameters": {"type": "object", "properties": {
-      "tabs_only": {"type": "boolean", "description": "仅返回标签页列表和当前标签信息，不获取HTML内容", "default": false},
+      "tabs_only": {"type": "boolean", "description": "仅返回标签页列表和当前标签信息，不获取HTML内容"},
      "switch_tab_id": {"type": "string", "description": "可选的标签页 ID。如果提供，系统将在扫描前切换到该标签页"},
-      "text_only": {"type": "boolean", "description": "只要纯文本不要HTML信息", "default": false}}}
+      "text_only": {"type": "boolean", "description": "只要纯文本不要HTML"}}}
  }},
  {"type": "function", "function": {
    "name": "web_execute_js",
-    "description": "执行 JS 控制浏览器。禁止猜测，准确操作以减少 web_scan 调用。为免转义问题，代码优先考虑放回复正文 ```javascript 块",
+    "description": "执行JS。支持Multi-call，用不同switch_tab_id并行操作多标签页。禁止猜测，准确操作以减少 web_scan 调用。无script参数时执行正文 ```javascript 块，以免转义",
    "parameters": {"type": "object", "properties": {
      "script": {"type": "string", "description": "[Optional] JS代码或路径。为免转义建议留空，改用正文代码块（与此参数互斥）"},
-      "save_to_file": {"type": "string", "description": "结果存文件，适合返回值较长时", "default": ""},
-      "no_monitor": {"type": "boolean", "description": "跳过页面变更监控，省2-3秒。仅在纯读取信息时设置，页面操作时不要设置", "default": false}}}
+      "save_to_file": {"type": "string", "description": "结果存文件，适合返回值较长时"},
+      "no_monitor": {"type": "boolean", "description": "跳过页面变更监控，省2-3秒。仅在纯读取信息时设置，页面操作时不要设置"},
+      "switch_tab_id": {"type": "string", "description": "可选的标签页 ID，切换到该标签页执行"}}}
  }},
  {"type": "function", "function": {
    "name": "update_working_checkpoint",
--- a/frontends/stapp.py
+++ b/frontends/stapp.py
@@ -7,7 +7,8 @@ try: sys.stdout.reconfigure(errors='replace')
 except: pass
 try: sys.stderr.reconfigure(errors='replace')
 except: pass
-sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+script_dir = os.path.dirname(__file__)
+sys.path.append(os.path.abspath(os.path.join(script_dir, '..')))

 import streamlit as st
 import time, json, re, threading, queue
@@ -41,12 +42,18 @@ def render_sidebar():
        agent.next_llm(); st.rerun(scope="fragment")
    if st.button("强行停止任务"):
        agent.abort(); st.toast("已发送停止信号"); st.rerun()
-    if st.button("重新注入System Prompt"):
-        agent.llmclient.last_tools = ''; st.toast("下次将重新注入System Prompt")
+    if st.button("重新注入工具"):
+        agent.llmclient.last_tools = ''
+        try:
+            hist_path = os.path.join(script_dir, '..', 'assets', 'tool_usable_history.json')
+            with open(hist_path, 'r', encoding='utf-8') as f: tool_hist = json.load(f)
+            agent.llmclient.backend.history.extend(tool_hist)
+            st.toast(f"已重新注入工具，追加了 {len(tool_hist)} 条示范记录")
+        except Exception as e: st.toast(f"注入工具示范失败: {e}")
    if st.button("🐱 桌面宠物"):
        kwargs = {'creationflags': 0x08} if sys.platform == 'win32' else {}
-        pet_script = os.path.join(os.path.dirname(__file__), 'desktop_pet_v2.pyw')
-        if not os.path.exists(pet_script): pet_script = os.path.join(os.path.dirname(__file__), 'desktop_pet.pyw')
+        pet_script = os.path.join(script_dir, 'desktop_pet_v2.pyw')
+        if not os.path.exists(pet_script): pet_script = os.path.join(script_dir, 'desktop_pet.pyw')
        subprocess.Popen([sys.executable, pet_script], **kwargs)
        def _pet_req(q):
            def _do():
--- a/ga.py
+++ b/ga.py
@@ -295,8 +295,6 @@ class GenericAgentHandler(BaseHandler):

    def do_code_run(self, args, response):
        '''执行代码片段，有长度限制，不允许代码中放大量数据，如有需要应当通过文件读取进行。'''
-        if response.tool_calls and sum(1 for tc in response.tool_calls[:args.get('_index', 0)] if tc.function.name == 'code_run') > 0:
-            return StepOutcome("[ERROR] no multi code_run in one round!", next_prompt="\n") 
        code_type = args.get("type", "python")
        code = args.get("code") or args.get("script")
        if not code:
@@ -306,11 +304,15 @@ class GenericAgentHandler(BaseHandler):
        raw_path = os.path.join(self.cwd, args.get("cwd", './'))
        cwd = os.path.normpath(os.path.abspath(raw_path))
        code_cwd = os.path.normpath(self.cwd)
-        if args.get("_inline_eval"):
+        if code_type == 'python' and args.get("inline_eval"):
            ns = {'handler': self, 'parent': self.parent}
+            old_cwd = os.getcwd()
+            try:
+                os.chdir(cwd)
                try: result = repr(eval(code, ns))
                except SyntaxError: exec(code, ns); result = ns.get('_r', 'OK')
                except Exception as e: result = f'Error: {e}'
+            finally: os.chdir(old_cwd)
        else: result = yield from code_run(code, code_type, timeout, cwd, code_cwd=code_cwd, stop_signal=self.code_stop_signal)
        next_prompt = self._get_anchor_prompt(skip=args.get('_index', 0) > 0)
        return StepOutcome(result, next_prompt=next_prompt)
--- a/memory/plan_sop.md
+++ b/memory/plan_sop.md
@@ -2,7 +2,7 @@

 **触发**：3步以上有依赖/多文件协同/条件分支/需并行 | **禁用**：1-2步简单任务直接做
 任务开始前必须先创建工作目录 `./plan_XXX/`（XXX=任务英文短名）
-使用code_run({'_inline_eval':True, 'script':'handler.enter_plan_mode("./plan_XXX/plan.md")'})进入plan模式（使用未在schema中描述的秘密参数_inline_eval）
+单独使用一个code_run({'inline_eval':True, 'script':'handler.enter_plan_mode("./plan_XXX/plan.md")'})进入plan模式

 ---
				`@@ -0,0 +1 @@`
				[{"role":"user","content":[{"type":"text","text":"code_run个helloworld，根据结果简单评价你的工具配置"}]},{"role":"assistant","content":[{"type":"text","text":"\n\n<summary>用户要求code_run helloworld并评价工具配置</summary>\n\n```python\nprint(\"Hello, World!\")\n```"},{"type":"tool_use","id":"tooluse_GlmTNw190WGgK4sBBOiDHK","name":"code_run","input":{"script":"print(\"Hello, World!\")"}}]},{"role":"user","content":[{"type":"tool_result","tool_use_id":"tooluse_GlmTNw190WGgK4sBBOiDHK","content":"{\"status\": \"success\", \"stdout\": \"Hello, World!\\r\\n\", \"exit_code\": 0}"},{"type":"text","text":"\n### [WORKING MEMORY]\n<history>\n\n[USER]: code_run个helloworld，根据结果简单评价你的工具配置\n</history>\nCurrent turn: 1\n"}]},{"role":"assistant","content":[{"type":"text","text":"\n\n<summary>code_run helloworld成功，评价工具配置</summary>\n\n工具配置没问题。`code_run` 返回了结构化的 JSON（`status`、`stdout`、`exit_code`），执行链路通畅，cwd 默认落在 temp 目录。\n\n刚才还验证了 `inline_eval` 能正确访问 agent 内部状态并导出 history，`file_patch` 也能精准改文件。整套工具链从代码执行、文件读写、到 agent 内省都是通的。"}]}]