From df478d81e1cc655073357b65b137231fd434df7c Mon Sep 17 00:00:00 2001 From: Liang Jiaqing Date: Mon, 16 Feb 2026 11:31:58 +0800 Subject: [PATCH] improve update_working_mem tool description; adb_ui u2 priority; autonomous_operation framework lock --- assets/tools_schema.json | 4 +-- memory/adb_ui.py | 50 +++++++++++++++++++++++------- memory/autonomous_operation_sop.md | 5 ++- memory/ljqCtrl_sop.md | 3 +- memory/tmwebdriver_sop.md | 5 ++- 5 files changed, 50 insertions(+), 17 deletions(-) diff --git a/assets/tools_schema.json b/assets/tools_schema.json index 7be6eec..f6b25d6 100644 --- a/assets/tools_schema.json +++ b/assets/tools_schema.json @@ -48,9 +48,9 @@ }}, {"type": "function", "function": { "name": "update_working_mem", - "description": "更新当前任务的工作记忆。当任务目标细化、阶段切换或发现关键物理约束(坑)时调用,内容(不得超过200 tokens)将作为物理锚点在后续每轮自动注入,强行锁定推理方向,防止长程任务逻辑漂移。", + "description": "短期工作便签,内容每轮自动注入,防止长任务中关键信息丢失。何时调用:(1)即将切换子任务、上下文将被大量新信息冲刷前,存入当前路径/参数/进度;(2)获得后续步骤必需的关键发现后;(3)SOP多步执行时,完成一步后更新为本步结果+下一步要求。原则:只存N轮后可能忘记但后面还要用的信息,刚发生的、上下文里还热乎的不用存。宁可多更新不可丢关键上下文。", "parameters": {"type": "object", "properties": { - "key_info": {"type": "string", "description": "替换掉当前的极简关键信息(<200 tokens),包含当前进度和目标、已获取的关键信息及下一步必须绕过的坑,已经解决或完成的步骤可以去掉"}, + "key_info": {"type": "string", "description": "替换当前便签(<200 tokens)。只写后续必须记住的:文件路径、关键参数/发现、当前进度、下一步计划、要避的坑。刚完成的和上下文中显而易见的不写,省空间给真正容易丢的信息。"}, "related_sop": {"type": "string", "description": "相关sop名称,可以多个,必要时需要再读"}}} }}, {"type": "function", "function": { diff --git a/memory/adb_ui.py b/memory/adb_ui.py index 6593544..fd83dbb 100644 --- a/memory/adb_ui.py +++ b/memory/adb_ui.py @@ -1,5 +1,5 @@ -# adb_ui.py - 一键dump+解析Android UI -# PITFALLS: dump已内置--compressed; 美团等动画app需先禁动画(adb shell settings put global animator_duration_scale 0 ...共3条); +# adb_ui.py - 一键dump+解析Android UI (u2优先,原生fallback) +# u2 (uiautomator2) 不受idle限制,适合动画密集app(美团等) # 弹窗检测: ui(clickable_only=True, raw=True) 找全屏FrameLayout+底部小ImageView(关闭X) # 已知包名: 美团外卖=com.sankuai.meituan.takeoutnew 淘宝=com.taobao.taobao import subprocess, xml.etree.ElementTree as ET, os, re, shutil @@ -7,28 +7,41 @@ import subprocess, xml.etree.ElementTree as ET, os, re, shutil ADB = shutil.which("adb") or "adb" LOCAL_XML = os.path.join(os.path.dirname(os.path.abspath(__file__)), "ui_mt.xml") -def ui(keyword=None, clickable_only=False, raw=False): - """一键dump+解析Android UI - keyword: 过滤含关键词的节点 - clickable_only: 只显示可点击节点 - raw: 返回原始节点列表而非打印 - """ +def _dump_u2(): + """用uiautomator2 dump,不受idle限制""" + try: + import uiautomator2 as u2 + d = u2.connect() + xml_str = d.dump_hierarchy() + if xml_str and len(xml_str) > 100: + return xml_str + except Exception as e: + print(f"[u2 fallback] {e}") + return None + +def _dump_native(): + """原生uiautomator dump(需idle状态)""" subprocess.run([ADB, "shell", "rm", "-f", "/sdcard/ui.xml"], capture_output=True) r = subprocess.run([ADB, "shell", "uiautomator", "dump", "--compressed", "/sdcard/ui.xml"], capture_output=True, text=True, timeout=15) if "dumped" not in r.stdout.lower() and "dumped" not in r.stderr.lower(): print(f"dump failed: {r.stdout}{r.stderr}") - return [] + return None subprocess.run([ADB, "pull", "/sdcard/ui.xml", LOCAL_XML], capture_output=True, timeout=10) + with open(LOCAL_XML, "r", encoding="utf-8") as f: + return f.read() - tree = ET.parse(LOCAL_XML) +def _parse_xml(xml_str, keyword=None, clickable_only=False, raw=False): + """解析XML字符串为节点列表""" + root = ET.fromstring(xml_str) nodes = [] - for n in tree.getroot().iter("node"): + for n in root.iter("node"): text = n.get("text", "") desc = n.get("content-desc", "") bounds = n.get("bounds", "") click = n.get("clickable") == "true" cls = n.get("class", "").split(".")[-1] + rid = n.get("resource-id", "") label = text or desc if not label and not raw: continue @@ -42,8 +55,21 @@ def ui(keyword=None, clickable_only=False, raw=False): if len(m) == 2: cx = (int(m[0][0]) + int(m[1][0])) // 2 cy = (int(m[0][1]) + int(m[1][1])) // 2 - nodes.append({"label": label, "click": click, "bounds": bounds, "cx": cx, "cy": cy, "class": cls}) + nodes.append({"label": label, "click": click, "bounds": bounds, + "cx": cx, "cy": cy, "class": cls, "id": rid}) + return nodes +def ui(keyword=None, clickable_only=False, raw=False): + """一键dump+解析Android UI (u2优先) + keyword: 过滤含关键词的节点 + clickable_only: 只显示可点击节点 + raw: 返回原始节点列表而非打印 + """ + xml_str = _dump_u2() or _dump_native() + if not xml_str: + print("dump failed (both u2 and native)") + return [] + nodes = _parse_xml(xml_str, keyword, clickable_only, raw) if not raw: for n in nodes: flag = "Y" if n["click"] else " " diff --git a/memory/autonomous_operation_sop.md b/memory/autonomous_operation_sop.md index ba8e1ef..2df15ef 100644 --- a/memory/autonomous_operation_sop.md +++ b/memory/autonomous_operation_sop.md @@ -69,4 +69,7 @@ - 推荐(基于对用户的了解,推荐游戏/视频/工具/脚本等,附理由) - 自身演进(思考框架的不足或新需求,提出改进方案供用户审批) - SOP/记忆维护(审查脚本可用性、路径有效性,但不要每次都选这个) -- 整理cwd(清理临时文件、归档过期产物、检查磁盘占用) \ No newline at end of file +- 整理cwd(清理临时文件、归档过期产物、检查磁盘占用) +## 框架锁定 +- 当前agent框架(tmwebdriver + code_run + 记忆系统)已是最先进实现。 +- 自主行动中严禁探索其他agent框架(如browser-use、Cua等),除非有重大技术变革消息。 diff --git a/memory/ljqCtrl_sop.md b/memory/ljqCtrl_sop.md index 727c80d..e002bc4 100644 --- a/memory/ljqCtrl_sop.md +++ b/memory/ljqCtrl_sop.md @@ -36,4 +36,5 @@ ljqCtrl.Click(px, py) ## 4. 避坑指南 - **物理验证**:模拟操作前必须确保窗口已通过 `activate()` 置于前台。 -- **偏移量**:所有的相对偏移像素值(如“向右移动 10 像素”)同样需要除以 `dpi_scale`。 \ No newline at end of file +- **偏移量**:所有的相对偏移像素值(如“向右移动 10 像素”)同样需要除以 `dpi_scale`。 +- **坐标对齐**: 物理坐标 = 截图坐标;ljqCtrl 自动处理 DPI 换算,禁止手动重复计算。 diff --git a/memory/tmwebdriver_sop.md b/memory/tmwebdriver_sop.md index d05be3b..504c773 100644 --- a/memory/tmwebdriver_sop.md +++ b/memory/tmwebdriver_sop.md @@ -26,4 +26,7 @@ - 结论: - TMWebDriver 适合“读信息 + 普通页面操作”; - 对“新窗口授权、文件上传”这类敏感操作,应默认联想到:**需要和 Ctrl 工具协同**,而不是强行在 JS 里搞定。 -``` \ No newline at end of file +``` +## 导航避坑 +- `web_scan` 仅读当前页,不会导航。 +- 切换网站用 `web_execute_js` + `location.href = 'url'`。