improve update_working_mem tool description; adb_ui u2 priority; autonomous_operation framework lock

This commit is contained in:
Liang Jiaqing
2026-02-16 11:31:58 +08:00
parent 90bb6868ec
commit df478d81e1
5 changed files with 50 additions and 17 deletions

View File

@@ -1,5 +1,5 @@
# adb_ui.py - 一键dump+解析Android UI
# PITFALLS: dump已内置--compressed; 美团等动画app需先禁动画(adb shell settings put global animator_duration_scale 0 ...共3条);
# adb_ui.py - 一键dump+解析Android UI (u2优先原生fallback)
# u2 (uiautomator2) 不受idle限制适合动画密集app美团等
# 弹窗检测: ui(clickable_only=True, raw=True) 找全屏FrameLayout+底部小ImageView(关闭X)
# 已知包名: 美团外卖=com.sankuai.meituan.takeoutnew 淘宝=com.taobao.taobao
import subprocess, xml.etree.ElementTree as ET, os, re, shutil
@@ -7,28 +7,41 @@ import subprocess, xml.etree.ElementTree as ET, os, re, shutil
ADB = shutil.which("adb") or "adb"
LOCAL_XML = os.path.join(os.path.dirname(os.path.abspath(__file__)), "ui_mt.xml")
def ui(keyword=None, clickable_only=False, raw=False):
"""一键dump+解析Android UI
keyword: 过滤含关键词的节点
clickable_only: 只显示可点击节点
raw: 返回原始节点列表而非打印
"""
def _dump_u2():
"""用uiautomator2 dump不受idle限制"""
try:
import uiautomator2 as u2
d = u2.connect()
xml_str = d.dump_hierarchy()
if xml_str and len(xml_str) > 100:
return xml_str
except Exception as e:
print(f"[u2 fallback] {e}")
return None
def _dump_native():
"""原生uiautomator dump需idle状态"""
subprocess.run([ADB, "shell", "rm", "-f", "/sdcard/ui.xml"], capture_output=True)
r = subprocess.run([ADB, "shell", "uiautomator", "dump", "--compressed", "/sdcard/ui.xml"],
capture_output=True, text=True, timeout=15)
if "dumped" not in r.stdout.lower() and "dumped" not in r.stderr.lower():
print(f"dump failed: {r.stdout}{r.stderr}")
return []
return None
subprocess.run([ADB, "pull", "/sdcard/ui.xml", LOCAL_XML], capture_output=True, timeout=10)
with open(LOCAL_XML, "r", encoding="utf-8") as f:
return f.read()
tree = ET.parse(LOCAL_XML)
def _parse_xml(xml_str, keyword=None, clickable_only=False, raw=False):
"""解析XML字符串为节点列表"""
root = ET.fromstring(xml_str)
nodes = []
for n in tree.getroot().iter("node"):
for n in root.iter("node"):
text = n.get("text", "")
desc = n.get("content-desc", "")
bounds = n.get("bounds", "")
click = n.get("clickable") == "true"
cls = n.get("class", "").split(".")[-1]
rid = n.get("resource-id", "")
label = text or desc
if not label and not raw:
continue
@@ -42,8 +55,21 @@ def ui(keyword=None, clickable_only=False, raw=False):
if len(m) == 2:
cx = (int(m[0][0]) + int(m[1][0])) // 2
cy = (int(m[0][1]) + int(m[1][1])) // 2
nodes.append({"label": label, "click": click, "bounds": bounds, "cx": cx, "cy": cy, "class": cls})
nodes.append({"label": label, "click": click, "bounds": bounds,
"cx": cx, "cy": cy, "class": cls, "id": rid})
return nodes
def ui(keyword=None, clickable_only=False, raw=False):
"""一键dump+解析Android UI (u2优先)
keyword: 过滤含关键词的节点
clickable_only: 只显示可点击节点
raw: 返回原始节点列表而非打印
"""
xml_str = _dump_u2() or _dump_native()
if not xml_str:
print("dump failed (both u2 and native)")
return []
nodes = _parse_xml(xml_str, keyword, clickable_only, raw)
if not raw:
for n in nodes:
flag = "Y" if n["click"] else " "

View File

@@ -69,4 +69,7 @@
- 推荐(基于对用户的了解,推荐游戏/视频/工具/脚本等,附理由)
- 自身演进(思考框架的不足或新需求,提出改进方案供用户审批)
- SOP/记忆维护(审查脚本可用性、路径有效性,但不要每次都选这个)
- 整理cwd清理临时文件、归档过期产物、检查磁盘占用
- 整理cwd清理临时文件、归档过期产物、检查磁盘占用
## 框架锁定
- 当前agent框架tmwebdriver + code_run + 记忆系统)已是最先进实现。
- 自主行动中严禁探索其他agent框架如browser-use、Cua等除非有重大技术变革消息。

View File

@@ -36,4 +36,5 @@ ljqCtrl.Click(px, py)
## 4. 避坑指南
- **物理验证**:模拟操作前必须确保窗口已通过 `activate()` 置于前台。
- **偏移量**:所有的相对偏移像素值(如“向右移动 10 像素”)同样需要除以 `dpi_scale`
- **偏移量**:所有的相对偏移像素值(如“向右移动 10 像素”)同样需要除以 `dpi_scale`
- **坐标对齐**: 物理坐标 = 截图坐标ljqCtrl 自动处理 DPI 换算,禁止手动重复计算。

View File

@@ -26,4 +26,7 @@
- 结论:
- TMWebDriver 适合“读信息 + 普通页面操作”;
- 对“新窗口授权、文件上传”这类敏感操作,应默认联想到:**需要和 Ctrl 工具协同**,而不是强行在 JS 里搞定。
```
```
## 导航避坑
- `web_scan` 仅读当前页,不会导航。
- 切换网站用 `web_execute_js` + `location.href = 'url'`