improve update_working_mem tool description; adb_ui u2 priority; autonomous_operation framework lock
This commit is contained in:
@@ -48,9 +48,9 @@
|
||||
}},
|
||||
{"type": "function", "function": {
|
||||
"name": "update_working_mem",
|
||||
"description": "更新当前任务的工作记忆。当任务目标细化、阶段切换或发现关键物理约束(坑)时调用,内容(不得超过200 tokens)将作为物理锚点在后续每轮自动注入,强行锁定推理方向,防止长程任务逻辑漂移。",
|
||||
"description": "短期工作便签,内容每轮自动注入,防止长任务中关键信息丢失。何时调用:(1)即将切换子任务、上下文将被大量新信息冲刷前,存入当前路径/参数/进度;(2)获得后续步骤必需的关键发现后;(3)SOP多步执行时,完成一步后更新为本步结果+下一步要求。原则:只存N轮后可能忘记但后面还要用的信息,刚发生的、上下文里还热乎的不用存。宁可多更新不可丢关键上下文。",
|
||||
"parameters": {"type": "object", "properties": {
|
||||
"key_info": {"type": "string", "description": "替换掉当前的极简关键信息(<200 tokens),包含当前进度和目标、已获取的关键信息及下一步必须绕过的坑,已经解决或完成的步骤可以去掉"},
|
||||
"key_info": {"type": "string", "description": "替换当前便签(<200 tokens)。只写后续必须记住的:文件路径、关键参数/发现、当前进度、下一步计划、要避的坑。刚完成的和上下文中显而易见的不写,省空间给真正容易丢的信息。"},
|
||||
"related_sop": {"type": "string", "description": "相关sop名称,可以多个,必要时需要再读"}}}
|
||||
}},
|
||||
{"type": "function", "function": {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# adb_ui.py - 一键dump+解析Android UI
|
||||
# PITFALLS: dump已内置--compressed; 美团等动画app需先禁动画(adb shell settings put global animator_duration_scale 0 ...共3条);
|
||||
# adb_ui.py - 一键dump+解析Android UI (u2优先,原生fallback)
|
||||
# u2 (uiautomator2) 不受idle限制,适合动画密集app(美团等)
|
||||
# 弹窗检测: ui(clickable_only=True, raw=True) 找全屏FrameLayout+底部小ImageView(关闭X)
|
||||
# 已知包名: 美团外卖=com.sankuai.meituan.takeoutnew 淘宝=com.taobao.taobao
|
||||
import subprocess, xml.etree.ElementTree as ET, os, re, shutil
|
||||
@@ -7,28 +7,41 @@ import subprocess, xml.etree.ElementTree as ET, os, re, shutil
|
||||
ADB = shutil.which("adb") or "adb"
|
||||
LOCAL_XML = os.path.join(os.path.dirname(os.path.abspath(__file__)), "ui_mt.xml")
|
||||
|
||||
def ui(keyword=None, clickable_only=False, raw=False):
|
||||
"""一键dump+解析Android UI
|
||||
keyword: 过滤含关键词的节点
|
||||
clickable_only: 只显示可点击节点
|
||||
raw: 返回原始节点列表而非打印
|
||||
"""
|
||||
def _dump_u2():
|
||||
"""用uiautomator2 dump,不受idle限制"""
|
||||
try:
|
||||
import uiautomator2 as u2
|
||||
d = u2.connect()
|
||||
xml_str = d.dump_hierarchy()
|
||||
if xml_str and len(xml_str) > 100:
|
||||
return xml_str
|
||||
except Exception as e:
|
||||
print(f"[u2 fallback] {e}")
|
||||
return None
|
||||
|
||||
def _dump_native():
|
||||
"""原生uiautomator dump(需idle状态)"""
|
||||
subprocess.run([ADB, "shell", "rm", "-f", "/sdcard/ui.xml"], capture_output=True)
|
||||
r = subprocess.run([ADB, "shell", "uiautomator", "dump", "--compressed", "/sdcard/ui.xml"],
|
||||
capture_output=True, text=True, timeout=15)
|
||||
if "dumped" not in r.stdout.lower() and "dumped" not in r.stderr.lower():
|
||||
print(f"dump failed: {r.stdout}{r.stderr}")
|
||||
return []
|
||||
return None
|
||||
subprocess.run([ADB, "pull", "/sdcard/ui.xml", LOCAL_XML], capture_output=True, timeout=10)
|
||||
with open(LOCAL_XML, "r", encoding="utf-8") as f:
|
||||
return f.read()
|
||||
|
||||
tree = ET.parse(LOCAL_XML)
|
||||
def _parse_xml(xml_str, keyword=None, clickable_only=False, raw=False):
|
||||
"""解析XML字符串为节点列表"""
|
||||
root = ET.fromstring(xml_str)
|
||||
nodes = []
|
||||
for n in tree.getroot().iter("node"):
|
||||
for n in root.iter("node"):
|
||||
text = n.get("text", "")
|
||||
desc = n.get("content-desc", "")
|
||||
bounds = n.get("bounds", "")
|
||||
click = n.get("clickable") == "true"
|
||||
cls = n.get("class", "").split(".")[-1]
|
||||
rid = n.get("resource-id", "")
|
||||
label = text or desc
|
||||
if not label and not raw:
|
||||
continue
|
||||
@@ -42,8 +55,21 @@ def ui(keyword=None, clickable_only=False, raw=False):
|
||||
if len(m) == 2:
|
||||
cx = (int(m[0][0]) + int(m[1][0])) // 2
|
||||
cy = (int(m[0][1]) + int(m[1][1])) // 2
|
||||
nodes.append({"label": label, "click": click, "bounds": bounds, "cx": cx, "cy": cy, "class": cls})
|
||||
nodes.append({"label": label, "click": click, "bounds": bounds,
|
||||
"cx": cx, "cy": cy, "class": cls, "id": rid})
|
||||
return nodes
|
||||
|
||||
def ui(keyword=None, clickable_only=False, raw=False):
|
||||
"""一键dump+解析Android UI (u2优先)
|
||||
keyword: 过滤含关键词的节点
|
||||
clickable_only: 只显示可点击节点
|
||||
raw: 返回原始节点列表而非打印
|
||||
"""
|
||||
xml_str = _dump_u2() or _dump_native()
|
||||
if not xml_str:
|
||||
print("dump failed (both u2 and native)")
|
||||
return []
|
||||
nodes = _parse_xml(xml_str, keyword, clickable_only, raw)
|
||||
if not raw:
|
||||
for n in nodes:
|
||||
flag = "Y" if n["click"] else " "
|
||||
|
||||
@@ -69,4 +69,7 @@
|
||||
- 推荐(基于对用户的了解,推荐游戏/视频/工具/脚本等,附理由)
|
||||
- 自身演进(思考框架的不足或新需求,提出改进方案供用户审批)
|
||||
- SOP/记忆维护(审查脚本可用性、路径有效性,但不要每次都选这个)
|
||||
- 整理cwd(清理临时文件、归档过期产物、检查磁盘占用)
|
||||
- 整理cwd(清理临时文件、归档过期产物、检查磁盘占用)
|
||||
## 框架锁定
|
||||
- 当前agent框架(tmwebdriver + code_run + 记忆系统)已是最先进实现。
|
||||
- 自主行动中严禁探索其他agent框架(如browser-use、Cua等),除非有重大技术变革消息。
|
||||
|
||||
@@ -36,4 +36,5 @@ ljqCtrl.Click(px, py)
|
||||
|
||||
## 4. 避坑指南
|
||||
- **物理验证**:模拟操作前必须确保窗口已通过 `activate()` 置于前台。
|
||||
- **偏移量**:所有的相对偏移像素值(如“向右移动 10 像素”)同样需要除以 `dpi_scale`。
|
||||
- **偏移量**:所有的相对偏移像素值(如“向右移动 10 像素”)同样需要除以 `dpi_scale`。
|
||||
- **坐标对齐**: 物理坐标 = 截图坐标;ljqCtrl 自动处理 DPI 换算,禁止手动重复计算。
|
||||
|
||||
@@ -26,4 +26,7 @@
|
||||
- 结论:
|
||||
- TMWebDriver 适合“读信息 + 普通页面操作”;
|
||||
- 对“新窗口授权、文件上传”这类敏感操作,应默认联想到:**需要和 Ctrl 工具协同**,而不是强行在 JS 里搞定。
|
||||
```
|
||||
```
|
||||
## 导航避坑
|
||||
- `web_scan` 仅读当前页,不会导航。
|
||||
- 切换网站用 `web_execute_js` + `location.href = 'url'`。
|
||||
|
||||
Reference in New Issue
Block a user