refactor: optimize code extraction priority in tool handlers & update autofill SOP
This commit is contained in:
@@ -1,10 +1,11 @@
|
|||||||
[
|
[
|
||||||
{"type": "function", "function": {
|
{"type": "function", "function": {
|
||||||
"name": "code_run",
|
"name": "code_run",
|
||||||
"description": "代码执行器。优先使用python,仅在必要系统操作时使用 powershell。注意:不能同时调用多个,执行的代码放在回复正文中,以 ```python 或 ```powershell 代码块的形式。严禁在代码中硬编码大量数据,如有需要应通过文件读取",
|
"description": "代码执行器。优先python,系统操作用powershell。禁同时调用多个。为免转义问题,代码放正文 ```python/powershell 块中。禁硬编码大量数据",
|
||||||
"parameters": {"type": "object", "properties": {
|
"parameters": {"type": "object", "properties": {
|
||||||
"type": {"type": "string", "enum": ["python", "powershell"], "description": "执行环境类型,默认为 python", "default": "python"},
|
"script": {"type": "string", "description": "[Optional] 要执行的代码。为免转义建议留空,改用正文代码块(与此参数互斥)"},
|
||||||
"timeout": {"type": "integer", "description": "执行超时时间(秒),默认 60", "default": 60},
|
"type": {"type": "string", "enum": ["python", "powershell"], "description": "代码类型", "default": "python"},
|
||||||
|
"timeout": {"type": "integer", "description": "执行超时时间(秒)", "default": 60},
|
||||||
"cwd": {"type": "string", "description": "工作目录,默认为当前工作目录"}}}
|
"cwd": {"type": "string", "description": "工作目录,默认为当前工作目录"}}}
|
||||||
}},
|
}},
|
||||||
{"type": "function", "function": {
|
{"type": "function", "function": {
|
||||||
@@ -42,11 +43,11 @@
|
|||||||
}},
|
}},
|
||||||
{"type": "function", "function": {
|
{"type": "function", "function": {
|
||||||
"name": "web_execute_js",
|
"name": "web_execute_js",
|
||||||
"description": "万能网页操控工具。通过执行 JavaScript 脚本实现对浏览器的完全控制(如点击、滚动、提取特定数据)。鼓励在有把握情况下(记忆中有selector/做法等)精准使用以减少web_scan调用。执行结果可选择保存到本地文件进行后续分析",
|
"description": "执行 JS 控制浏览器。建议精准使用减少 web_scan。为免转义问题,代码优先考虑放回复正文 ```javascript 块",
|
||||||
"parameters": {"type": "object", "properties": {
|
"parameters": {"type": "object", "properties": {
|
||||||
"script": {"type": "string", "description": "要执行的代码或JS文件路径"},
|
"script": {"type": "string", "description": "[Optional] JS代码或路径。为免转义建议留空,改用正文代码块(与此参数互斥)"},
|
||||||
"save_to_file": {"type": "string", "description": "结果存文件,适合返回值较长时。不支持await", "default": ""},
|
"save_to_file": {"type": "string", "description": "结果存文件,适合返回值较长时", "default": ""},
|
||||||
"no_monitor": {"type": "boolean", "description": "跳过页面变更监控,省2-3秒。仅在纯读取信息时设置,页面操作时不要设置", "default": false}}, "required": ["script"]}
|
"no_monitor": {"type": "boolean", "description": "跳过页面变更监控,省2-3秒。仅在纯读取信息时设置,页面操作时不要设置", "default": false}}}
|
||||||
}},
|
}},
|
||||||
{"type": "function", "function": {
|
{"type": "function", "function": {
|
||||||
"name": "update_working_checkpoint",
|
"name": "update_working_checkpoint",
|
||||||
|
|||||||
27
ga.py
27
ga.py
@@ -282,27 +282,26 @@ class GenericAgentHandler(BaseHandler):
|
|||||||
ret.next_prompt += "\nPROTOCOL_VIOLATION: 上一轮遗漏了<summary>。 已根据物理动作自动补全。请务必在下次回复中记得<summary>协议。"
|
ret.next_prompt += "\nPROTOCOL_VIOLATION: 上一轮遗漏了<summary>。 已根据物理动作自动补全。请务必在下次回复中记得<summary>协议。"
|
||||||
self.history_info.append('[Agent] ' + smart_format(summary, max_str_len=100))
|
self.history_info.append('[Agent] ' + smart_format(summary, max_str_len=100))
|
||||||
|
|
||||||
|
def _extract_code_block(self, response, code_type):
|
||||||
|
matches = re.findall(rf"```{code_type}\n(.*?)\n```", response.content, re.DOTALL)
|
||||||
|
return matches[-1].strip() if matches else None
|
||||||
|
|
||||||
def do_code_run(self, args, response):
|
def do_code_run(self, args, response):
|
||||||
'''执行代码片段,有长度限制,不允许代码中放大量数据,如有需要应当通过文件读取进行。
|
'''执行代码片段,有长度限制,不允许代码中放大量数据,如有需要应当通过文件读取进行。
|
||||||
'''
|
'''
|
||||||
if response.tool_calls and sum(1 for tc in response.tool_calls[:args.get('_index', 0)] if tc.function.name == 'code_run') > 0:
|
if response.tool_calls and sum(1 for tc in response.tool_calls[:args.get('_index', 0)] if tc.function.name == 'code_run') > 0:
|
||||||
return StepOutcome("[BLANK]", next_prompt="no multi code_run in one round!")
|
return StepOutcome("[BLANK]", next_prompt="no multi code_run in one round!")
|
||||||
code_type = args.get("type", "python")
|
code_type = args.get("type", "python")
|
||||||
# 从 response.content 中提取代码块, 匹配 ```python ... ``` 或 ```powershell ... ```
|
code = args.get("code") or args.get("script")
|
||||||
pattern = rf"```{code_type}\n(.*?)\n```"
|
if not code:
|
||||||
matches = re.findall(pattern, response.content, re.DOTALL)
|
code = self._extract_code_block(response, code_type)
|
||||||
warning = ""
|
if not code: return StepOutcome(None, next_prompt=f"[Error] Code missing. Use ```{code_type} block or 'script' arg.")
|
||||||
if not matches:
|
|
||||||
code = args.get("code") or args.get("script")
|
|
||||||
if not code: return StepOutcome(None, next_prompt=f"【系统错误】:你调用了 code_run,但未在先在回复正文中提供 ```{code_type} 代码块。请重新输出代码并附带工具调用。")
|
|
||||||
warning = "\n下次要记得先在回复正文中提供代码块,而不是放在参数中"
|
|
||||||
else: code = matches[-1].strip() # 提取最后一个代码块(通常是模型修正后的最终逻辑)
|
|
||||||
timeout = args.get("timeout", 60)
|
timeout = args.get("timeout", 60)
|
||||||
raw_path = os.path.join(self.cwd, args.get("cwd", './'))
|
raw_path = os.path.join(self.cwd, args.get("cwd", './'))
|
||||||
cwd = os.path.normpath(os.path.abspath(raw_path))
|
cwd = os.path.normpath(os.path.abspath(raw_path))
|
||||||
code_cwd = os.path.normpath(self.cwd)
|
code_cwd = os.path.normpath(self.cwd)
|
||||||
result = yield from code_run(code, code_type, timeout, cwd, code_cwd=code_cwd, stop_signal=self.code_stop_signal)
|
result = yield from code_run(code, code_type, timeout, cwd, code_cwd=code_cwd, stop_signal=self.code_stop_signal)
|
||||||
next_prompt = self._get_anchor_prompt() + warning
|
next_prompt = self._get_anchor_prompt()
|
||||||
return StepOutcome(result, next_prompt=next_prompt)
|
return StepOutcome(result, next_prompt=next_prompt)
|
||||||
|
|
||||||
def do_ask_user(self, args, response):
|
def do_ask_user(self, args, response):
|
||||||
@@ -328,11 +327,9 @@ class GenericAgentHandler(BaseHandler):
|
|||||||
return StepOutcome(result, next_prompt=next_prompt)
|
return StepOutcome(result, next_prompt=next_prompt)
|
||||||
|
|
||||||
def do_web_execute_js(self, args, response):
|
def do_web_execute_js(self, args, response):
|
||||||
'''web情况下的优先使用工具,执行任何js达成对浏览器的*完全*控制。
|
'''web情况下的优先使用工具,执行任何js达成对浏览器的*完全*控制。支持将结果保存到文件供后续读取分析。'''
|
||||||
支持将结果保存到文件供后续读取分析,但保存功能仅限即时读取,与await等异步操作不兼容。
|
script = args.get("script", "") or self._extract_code_block(response, "javascript")
|
||||||
'''
|
if not script: return StepOutcome(None, next_prompt="[Error] Script missing. Use ```javascript block or 'script' arg.")
|
||||||
script = args.get("script", "")
|
|
||||||
if not script: return StepOutcome(None, next_prompt="[Error] Empty script param. Check your tool call arguments.")
|
|
||||||
abs_path = self._get_abs_path(script.strip())
|
abs_path = self._get_abs_path(script.strip())
|
||||||
if os.path.isfile(abs_path):
|
if os.path.isfile(abs_path):
|
||||||
with open(abs_path, 'r', encoding='utf-8') as f: script = f.read()
|
with open(abs_path, 'r', encoding='utf-8') as f: script = f.read()
|
||||||
|
|||||||
@@ -126,17 +126,16 @@ document.body.appendChild(el); // 响应写回el.textContent
|
|||||||
- 渲染检查:`DOM.resolveNode` → `Runtime.callFunctionOn` 检查offsetHeight>0
|
- 渲染检查:`DOM.resolveNode` → `Runtime.callFunctionOn` 检查offsetHeight>0
|
||||||
- 完整pipeline: getDocument(pierce) → querySelector → getBoxModel → 四点平均坐标 → Input三事件点击
|
- 完整pipeline: getDocument(pierce) → querySelector → getBoxModel → 四点平均坐标 → Input三事件点击
|
||||||
|
|
||||||
## autofill获取
|
## autofill获取与登录 (需 v0.4+ 脚本支持 await)
|
||||||
检测:web_scan输出input带`data-autofilled="true"`,value显示为受保护提示(非真实值,Chrome安全保护需点击释放)
|
检测:web_scan输出input带`data-autofilled="true"`,value显示为受保护提示(非真实值,Chrome安全保护需点击释放)
|
||||||
- ⭐首选CDP单次点击:JS取任一autofill输入框坐标→CDP `Input.dispatchMouseEvent` mousePressed一次即可释放→JS读`.value`
|
- ⭐**一键释放与登录**:利用 v0.4 脚本的顶层 `await`,在单次 `web_execute_js` 中连贯完成:
|
||||||
- ⚠点击一个autofill字段会释放页面上**所有**autofill字段的值,无需逐个点击
|
1. JS获取输入框坐标。
|
||||||
- ⚠只需mousePressed,不需要mouseReleased配对
|
2. CDP发送 `Input.dispatchMouseEvent` (mousePressed) 物理点击释放autofill。
|
||||||
- ⚠tabId:当前注入页无需指定(默认sender.tab.id),跨tab才需显式tabId(整数)
|
3. `await new Promise(r => setTimeout(r, 500))` 等待释放。
|
||||||
- 示例(当前页):`{cmd:'cdp',method:'Input.dispatchMouseEvent',params:{type:'mousePressed',x:X,y:Y,button:'left',clickCount:1}}`
|
4. 派发 `input`/`change` 事件唤醒前端框架(解禁登录按钮)。
|
||||||
- 示例(跨tab):先`{cmd:'tabs'}`获取tabId(整数),再`{cmd:'cdp',tabId:N,method:'Input.dispatchMouseEvent',params:{...}}`
|
5. 触发登录点击。
|
||||||
- ⚠batch的`$N.path`引用会将整数tabId转为字符串导致类型错误,跨tab时建议分两次命令而非batch
|
- ⚠只需 `mousePressed`,无需 `mouseReleased`。点击一个字段即释放全页。
|
||||||
- 备选PostMessage物理点击(仅Windows/需前台):枚举Chrome窗口标题匹配→rect*dpr→WM_LBUTTONDOWN/UP到Chrome_RenderWidgetHostHWND子窗口
|
- ⚠已淘汰旧版跨 tab 查 tabId 或 Python 轮询的繁琐流程,直接在当前页异步完成。
|
||||||
- 坑:多RenderWidgetHostHWND共存,必须按父窗口标题匹配再取子窗口
|
|
||||||
|
|
||||||
## 验证码/页面视觉截图
|
## 验证码/页面视觉截图
|
||||||
- ⭐首选CDP截图:`Page.captureScreenshot`(format:'png')→返回base64,无需前台/后台tab也行,全页高清
|
- ⭐首选CDP截图:`Page.captureScreenshot`(format:'png')→返回base64,无需前台/后台tab也行,全页高清
|
||||||
|
|||||||
Reference in New Issue
Block a user