feat: multi code_run support, inline_eval logging & cwd fix, tool reinject upgrade

This commit is contained in:
Liang Jiaqing
2026-04-16 12:56:17 +08:00
parent a3c15c54af
commit 3d7832a750
6 changed files with 42 additions and 28 deletions

View File

@@ -0,0 +1 @@
[{"role":"user","content":[{"type":"text","text":"code_run个helloworld根据结果简单评价你的工具配置"}]},{"role":"assistant","content":[{"type":"text","text":"\n\n<summary>用户要求code_run helloworld并评价工具配置</summary>\n\n```python\nprint(\"Hello, World!\")\n```"},{"type":"tool_use","id":"tooluse_GlmTNw190WGgK4sBBOiDHK","name":"code_run","input":{"script":"print(\"Hello, World!\")"}}]},{"role":"user","content":[{"type":"tool_result","tool_use_id":"tooluse_GlmTNw190WGgK4sBBOiDHK","content":"{\"status\": \"success\", \"stdout\": \"Hello, World!\\r\\n\", \"exit_code\": 0}"},{"type":"text","text":"\n### [WORKING MEMORY]\n<history>\n\n[USER]: code_run个helloworld根据结果简单评价你的工具配置\n</history>\nCurrent turn: 1\n"}]},{"role":"assistant","content":[{"type":"text","text":"\n\n<summary>code_run helloworld成功评价工具配置</summary>\n\n工具配置没问题。`code_run` 返回了结构化的 JSON`status`、`stdout`、`exit_code`执行链路通畅cwd 默认落在 temp 目录。\n\n刚才还验证了 `inline_eval` 能正确访问 agent 内部状态并导出 history`file_patch` 也能精准改文件。整套工具链从代码执行、文件读写、到 agent 内省都是通的。"}]}]

View File

@@ -1,19 +1,20 @@
[
{"type": "function", "function": {
"name": "code_run",
"description": "Code executor. Prefer python. No concurrent calls. Prefer code in ```python/powershell blocks in reply body to avoid escaping. No hardcoding bulk data",
"description": "Code executor. Prefer python. Multi-call OK, use script param. Reply code block is executed if no script arg; prefer for single call to avoid escaping. No hardcoding bulk data",
"parameters": {"type": "object", "properties": {
"script": {"type": "string", "description": "[Mutually exclusive] NEVER use this param when use reply code block."},
"type": {"type": "string", "enum": ["python", "powershell"], "description": "Code type", "default": "python"},
"timeout": {"type": "integer", "description": "in seconds", "default": 60},
"cwd": {"type": "string", "description": "Working directory, defaults to cwd"}}}
"cwd": {"type": "string", "description": "Working directory, defaults to cwd"},
"inline_eval": {"type": "boolean", "description": "Only when usage is explicitly specified."}}}
}},
{"type": "function", "function": {
"name": "file_read",
"description": "Read file. Read before modify for latest context and line numbers",
"parameters": {"type": "object", "properties": {
"path": {"type": "string", "description": "Relative or absolute"},
"start": {"type": "integer", "description": "Start line number (1-based)", "default": 1},
"start": {"type": "integer", "description": "Start line number (1-based)"},
"count": {"type": "integer", "description": "Number of lines to read", "default": 200},
"keyword": {"type": "string", "description": "[Optional] If provided, returns first match (case-insensitive) with context"},
"show_linenos": {"type": "boolean", "description": "Show line numbers", "default": true}}}
@@ -37,17 +38,18 @@
"name": "web_scan",
"description": "Get simplified HTML and tab list. Removes hidden/floating/covered elements. Call after switching pages",
"parameters": {"type": "object", "properties": {
"tabs_only": {"type": "boolean", "description": "Show tab list only, no HTML", "default": false},
"tabs_only": {"type": "boolean", "description": "Show tab list only, no HTML"},
"switch_tab_id": {"type": "string", "description": "[Optional] Tab ID to switch to"},
"text_only": {"type": "boolean", "description": "Plain text only, no HTML", "default": false}}}
"text_only": {"type": "boolean", "description": "Plain text only, no HTML"}}}
}},
{"type": "function", "function": {
"name": "web_execute_js",
"description": "Execute JS to control browser. No guessing. Act accurately to reduce web_scan calls. Put code in ```javascript blocks in reply body to avoid escaping",
"description": "Execute JS. Multi-call OK with different switch_tab_id. No guessing. Act accurately to reduce web_scan calls. Execute JS in ```javascript blocks if no script arg, prefer to avoid escaping",
"parameters": {"type": "object", "properties": {
"script": {"type": "string", "description": "[Mutually exclusive] JS code or script path. NEVER use this param when use reply code block"},
"save_to_file": {"type": "string", "description": "file path; **only** for long result", "default": ""},
"no_monitor": {"type": "boolean", "description": "Skip page change monitoring, saves 2-3s. Only for reads, not for page actions", "default": false}}}
"save_to_file": {"type": "string", "description": "file path; **only** for long result"},
"no_monitor": {"type": "boolean", "description": "Skip page change monitoring, saves 2-3s. Only for reads, not for page actions"},
"switch_tab_id": {"type": "string", "description": "[Optional] Tab ID to switch to before executing"}}}
}},
{"type": "function", "function": {
"name": "update_working_checkpoint",

View File

@@ -1,19 +1,20 @@
[
{"type": "function", "function": {
"name": "code_run",
"description": "代码执行器。优先使用python。禁同时调用多个。为免转义问题,代码放正文 ```python/powershell 块中。禁硬编码大量数据",
"description": "代码执行器。优先使用python。支持Multi-call并行时用script参数。无script参数时正文代码块会被执行单次调用优先使用以免转义。禁硬编码大量数据",
"parameters": {"type": "object", "properties": {
"script": {"type": "string", "description": "[Optional] 要执行的代码。为免转义建议留空,改用正文代码块(与此参数互斥)"},
"type": {"type": "string", "enum": ["python", "powershell"], "description": "代码类型", "default": "python"},
"timeout": {"type": "integer", "description": "执行超时时间(秒)", "default": 60},
"cwd": {"type": "string", "description": "工作目录,默认为当前工作目录"}}}
"cwd": {"type": "string", "description": "工作目录,默认为当前工作目录"},
"inline_eval": {"type": "boolean", "description": "仅在明确要求时使用"}}}
}},
{"type": "function", "function": {
"name": "file_read",
"description": "读取文件内容。建议在修改文件前先读取,以确保获取最新的上下文和行号。支持分页读取或关键字搜索",
"parameters": {"type": "object", "properties": {
"path": {"type": "string", "description": "文件相对或绝对路径"},
"start": {"type": "integer", "description": "起始行号(从 1 开始)", "default": 1},
"start": {"type": "integer", "description": "起始行号(从 1 开始)"},
"count": {"type": "integer", "description": "读取的行数", "default": 200},
"keyword": {"type": "string", "description": "可选搜索关键字。如果提供,将返回第一个匹配项(忽略大小写)及其周边的内容"},
"show_linenos": {"type": "boolean", "description": "是否显示行号,建议开启以辅助 file_patch 定位", "default": true}}}
@@ -37,17 +38,18 @@
"name": "web_scan",
"description": "获取当前页面的简化HTML内容和标签页列表。会移除隐藏/浮动/被遮盖的元素。切换页面后一般应先调用查看",
"parameters": {"type": "object", "properties": {
"tabs_only": {"type": "boolean", "description": "仅返回标签页列表和当前标签信息不获取HTML内容", "default": false},
"tabs_only": {"type": "boolean", "description": "仅返回标签页列表和当前标签信息不获取HTML内容"},
"switch_tab_id": {"type": "string", "description": "可选的标签页 ID。如果提供系统将在扫描前切换到该标签页"},
"text_only": {"type": "boolean", "description": "只要纯文本不要HTML信息", "default": false}}}
"text_only": {"type": "boolean", "description": "只要纯文本不要HTML"}}}
}},
{"type": "function", "function": {
"name": "web_execute_js",
"description": "执行 JS 控制浏览器。禁止猜测,准确操作以减少 web_scan 调用。为免转义问题,代码优先考虑放回复正文 ```javascript 块",
"description": "执行JS。支持Multi-call用不同switch_tab_id并行操作多标签页。禁止猜测,准确操作以减少 web_scan 调用。无script参数时执行正文 ```javascript 块,以免转义",
"parameters": {"type": "object", "properties": {
"script": {"type": "string", "description": "[Optional] JS代码或路径。为免转义建议留空改用正文代码块与此参数互斥"},
"save_to_file": {"type": "string", "description": "结果存文件,适合返回值较长时", "default": ""},
"no_monitor": {"type": "boolean", "description": "跳过页面变更监控省2-3秒。仅在纯读取信息时设置页面操作时不要设置", "default": false}}}
"save_to_file": {"type": "string", "description": "结果存文件,适合返回值较长时"},
"no_monitor": {"type": "boolean", "description": "跳过页面变更监控省2-3秒。仅在纯读取信息时设置页面操作时不要设置"},
"switch_tab_id": {"type": "string", "description": "可选的标签页 ID切换到该标签页执行"}}}
}},
{"type": "function", "function": {
"name": "update_working_checkpoint",

View File

@@ -7,7 +7,8 @@ try: sys.stdout.reconfigure(errors='replace')
except: pass
try: sys.stderr.reconfigure(errors='replace')
except: pass
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
script_dir = os.path.dirname(__file__)
sys.path.append(os.path.abspath(os.path.join(script_dir, '..')))
import streamlit as st
import time, json, re, threading, queue
@@ -41,12 +42,18 @@ def render_sidebar():
agent.next_llm(); st.rerun(scope="fragment")
if st.button("强行停止任务"):
agent.abort(); st.toast("已发送停止信号"); st.rerun()
if st.button("重新注入System Prompt"):
agent.llmclient.last_tools = ''; st.toast("下次将重新注入System Prompt")
if st.button("重新注入工具"):
agent.llmclient.last_tools = ''
try:
hist_path = os.path.join(script_dir, '..', 'assets', 'tool_usable_history.json')
with open(hist_path, 'r', encoding='utf-8') as f: tool_hist = json.load(f)
agent.llmclient.backend.history.extend(tool_hist)
st.toast(f"已重新注入工具,追加了 {len(tool_hist)} 条示范记录")
except Exception as e: st.toast(f"注入工具示范失败: {e}")
if st.button("🐱 桌面宠物"):
kwargs = {'creationflags': 0x08} if sys.platform == 'win32' else {}
pet_script = os.path.join(os.path.dirname(__file__), 'desktop_pet_v2.pyw')
if not os.path.exists(pet_script): pet_script = os.path.join(os.path.dirname(__file__), 'desktop_pet.pyw')
pet_script = os.path.join(script_dir, 'desktop_pet_v2.pyw')
if not os.path.exists(pet_script): pet_script = os.path.join(script_dir, 'desktop_pet.pyw')
subprocess.Popen([sys.executable, pet_script], **kwargs)
def _pet_req(q):
def _do():

14
ga.py
View File

@@ -295,8 +295,6 @@ class GenericAgentHandler(BaseHandler):
def do_code_run(self, args, response):
'''执行代码片段,有长度限制,不允许代码中放大量数据,如有需要应当通过文件读取进行。'''
if response.tool_calls and sum(1 for tc in response.tool_calls[:args.get('_index', 0)] if tc.function.name == 'code_run') > 0:
return StepOutcome("[ERROR] no multi code_run in one round!", next_prompt="\n")
code_type = args.get("type", "python")
code = args.get("code") or args.get("script")
if not code:
@@ -306,11 +304,15 @@ class GenericAgentHandler(BaseHandler):
raw_path = os.path.join(self.cwd, args.get("cwd", './'))
cwd = os.path.normpath(os.path.abspath(raw_path))
code_cwd = os.path.normpath(self.cwd)
if args.get("_inline_eval"):
if code_type == 'python' and args.get("inline_eval"):
ns = {'handler': self, 'parent': self.parent}
try: result = repr(eval(code, ns))
except SyntaxError: exec(code, ns); result = ns.get('_r', 'OK')
except Exception as e: result = f'Error: {e}'
old_cwd = os.getcwd()
try:
os.chdir(cwd)
try: result = repr(eval(code, ns))
except SyntaxError: exec(code, ns); result = ns.get('_r', 'OK')
except Exception as e: result = f'Error: {e}'
finally: os.chdir(old_cwd)
else: result = yield from code_run(code, code_type, timeout, cwd, code_cwd=code_cwd, stop_signal=self.code_stop_signal)
next_prompt = self._get_anchor_prompt(skip=args.get('_index', 0) > 0)
return StepOutcome(result, next_prompt=next_prompt)

View File

@@ -2,7 +2,7 @@
**触发**3步以上有依赖/多文件协同/条件分支/需并行 | **禁用**1-2步简单任务直接做
任务开始前必须先创建工作目录 `./plan_XXX/`XXX=任务英文短名)
使用code_run({'_inline_eval':True, 'script':'handler.enter_plan_mode("./plan_XXX/plan.md")'})进入plan模式使用未在schema中描述的秘密参数_inline_eval
单独使用一个code_run({'inline_eval':True, 'script':'handler.enter_plan_mode("./plan_XXX/plan.md")'})进入plan模式
---