feat: CDP bridge batch support - cookies/tabs/cdp mixed commands, lazy attach, $N chain refs, file upload & screenshot verified

This commit is contained in:
Liang Jiaqing
2026-03-04 13:06:21 +08:00
parent 3f8407a853
commit 313f8a28f0
6 changed files with 63 additions and 13 deletions

View File

@@ -10,6 +10,10 @@ chrome.runtime.onMessage.addListener((msg, sender, sendResponse) => {
handleCDP(msg, sender).then(sendResponse);
return true;
}
if (msg.action === 'batch') {
handleBatch(msg, sender).then(sendResponse);
return true;
}
if (msg.action === 'tabs') {
(async () => {
try {
@@ -44,6 +48,38 @@ async function handleCookies(msg, sender) {
}
}
async function handleBatch(msg, sender) {
const R = [];
let attached = null;
const resolve$N = (params) => JSON.parse(JSON.stringify(params || {}).replace(/"\$(\d+)\.([^"]+)"/g,
(_, i, path) => { let v = R[+i]; for (const k of path.split('.')) v = v[k]; return JSON.stringify(v); }));
try {
for (const c of msg.commands) {
if (c.cmd === 'cookies') {
R.push(await handleCookies(c, sender));
} else if (c.cmd === 'tabs') {
const tabs = await chrome.tabs.query({});
R.push({ ok: true, data: tabs.map(t => ({ id: t.id, url: t.url, title: t.title, active: t.active, windowId: t.windowId })) });
} else if (c.cmd === 'cdp') {
const tabId = c.tabId || msg.tabId || sender.tab?.id;
if (attached !== tabId) {
if (attached) { await chrome.debugger.detach({ tabId: attached }); attached = null; }
await chrome.debugger.attach({ tabId }, '1.3');
attached = tabId;
}
R.push(await chrome.debugger.sendCommand({ tabId }, c.method, resolve$N(c.params)));
} else {
R.push({ ok: false, error: 'unknown cmd: ' + c.cmd });
}
}
if (attached) await chrome.debugger.detach({ tabId: attached });
return { ok: true, results: R };
} catch (e) {
if (attached) try { await chrome.debugger.detach({ tabId: attached }); } catch (_) {}
return { ok: false, error: e.message, results: R };
}
}
async function handleCDP(msg, sender) {
const tabId = msg.tabId || sender.tab?.id;
if (!tabId) return { ok: false, error: 'no tabId' };

View File

@@ -19,6 +19,8 @@ async function handle(el) {
resp = await chrome.runtime.sendMessage({ action: 'cookies', url: req.url || location.href });
} else if (cmd === 'cdp') {
resp = await chrome.runtime.sendMessage({ action: 'cdp', method: req.method, params: req.params || {}, tabId: req.tabId });
} else if (cmd === 'batch') {
resp = await chrome.runtime.sendMessage({ action: 'batch', commands: req.commands, tabId: req.tabId });
} else if (cmd === 'tabs') {
resp = await chrome.runtime.sendMessage({ action: 'tabs', method: req.method, tabId: req.tabId });
} else {

View File

@@ -44,7 +44,8 @@
"description": "万能网页操控工具。通过执行 JavaScript 脚本实现对浏览器的完全控制如点击、滚动、提取特定数据。鼓励在有把握情况下记忆中有selector/做法等精准使用以减少web_scan调用。执行结果可选择保存到本地文件进行后续分析。",
"parameters": {"type": "object", "properties": {
"script": {"type": "string", "description": "要执行的 JavaScript 代码或JS文件路径。"},
"save_to_file": {"type": "string", "description": "可选。将 JS 执行结果js_return保存到的文件路径。该功能不支持 await 等异步结果。"}}, "required": ["script"]}
"save_to_file": {"type": "string", "description": "结果存文件,适合返回值较长时。不支持await。", "default": ""},
"no_monitor": {"type": "boolean", "description": "跳过页面变更监控仅读取信息时用省2-3秒。", "default": false}}, "required": ["script"]}
}},
{"type": "function", "function": {
"name": "update_working_checkpoint",

7
ga.py
View File

@@ -158,7 +158,7 @@ def log_memory_access(path):
stats[fname] = {'count': stats.get(fname, {}).get('count', 0) + 1, 'last': datetime.now().strftime('%Y-%m-%d')}
with open(stats_file, 'w', encoding='utf-8') as f: json.dump(stats, f, indent=2, ensure_ascii=False)
def web_execute_js(script, switch_tab_id=None):
def web_execute_js(script, switch_tab_id=None, no_monitor=False):
"""
执行 JS 脚本来控制浏览器,并捕获结果和页面变化。
script: 要执行的 JavaScript 代码字符串。
@@ -180,7 +180,7 @@ def web_execute_js(script, switch_tab_id=None):
if len(driver.get_all_sessions()) == 0:
return {"status": "error", "msg": "没有可用的浏览器标签页请先打开一个浏览器标签页且确认TMWebDriver浏览器tempermonkey插件已安装并启用。"}
if switch_tab_id: driver.default_session_id = switch_tab_id
result = execute_js_rich(script, driver)
result = execute_js_rich(script, driver, no_monitor=no_monitor)
return result
except Exception as e:
return {"status": "error", "msg": format_error(e)}
@@ -318,7 +318,8 @@ class GenericAgentHandler(BaseHandler):
with open(abs_path, 'r', encoding='utf-8') as f: script = f.read()
save_to_file = args.get("save_to_file", "")
switch_tab_id = args.get("switch_tab_id") or args.get("tab_id")
result = web_execute_js(script, switch_tab_id=switch_tab_id)
no_monitor = args.get("no_monitor", False)
result = web_execute_js(script, switch_tab_id=switch_tab_id, no_monitor=no_monitor)
if save_to_file and "js_return" in result:
content = str(result["js_return"] or '')
abs_path = self._get_abs_path(save_to_file)

View File

@@ -8,13 +8,14 @@
## 限制(isTrusted)
- JS dispatch的事件`isTrusted=false`,敏感操作(文件上传/部分按钮)会被浏览器拦截
- ⭐**首选绕过CDP桥**——CDP派发的Input事件是浏览器原生级别(isTrusted=true)且无需前台见下方CDP章节
- 文件上传JS无法填充`<input type=file>`仍需ljqCtrl物理点击+Win32轮询文件对话框
- 流程SetForegroundWindow→ljqCtrl点上传按钮→FindWindow轮询对话框→输入路径→轮询关闭
- 文件上传JS无法填充`<input type=file>`
- ⭐首选CDP batchgetDocument→querySelector→DOM.setFileInputFiles(无需前台/物理点击)
- 备选ljqCtrl物理点击SetForegroundWindow→点上传按钮→FindWindow轮询对话框→输入路径→轮询关闭
- 备选:元素→屏幕物理坐标(ljqCtrl/PostMessage点击前必算)JS一次取rect+窗口信息,公式:
- `physX = (screenX + rect中心x) * dpr``physY = (screenY + chromeH + rect中心y) * dpr`
- chromeH = outerHeight - innerHeightdpr = devicePixelRatio
- 注意screenX/Y也是CSS像素所有值先加后统一乘dpr
- 结论:读信息+普通操作用TMWebDriver需isTrusted事件首选CDP桥文件上传需配合ljqCtrl
- 结论:读信息+普通操作用TMWebDriver需isTrusted事件首选CDP桥文件上传首选CDP三连(备选ljqCtrl)
## 导航
- `web_scan` 仅读当前页不导航,切换网站用 `web_execute_js` + `location.href='url'`
@@ -54,8 +55,13 @@ el.id = '__ljq_ctrl'; el.style.display = 'none';
el.textContent = JSON.stringify({cmd:'...', ...});
document.body.appendChild(el); // 响应写回el.textContent
```
命令:`{cmd:'tabs'}` | `{cmd:'cookies'}` | `{cmd:'cdp', tabId:N, method:'...', params:{...}}`
- CDP可用任意方法(Input/Network/DOM/Page/Runtime/Emulation等)每次attach→send→detach
命令:`{cmd:'tabs'}` | `{cmd:'cookies'}` | `{cmd:'cdp', tabId:N, method:'...', params:{...}}`
- ⭐batch混合`{cmd:'batch', commands:[{cmd:'cookies'},{cmd:'tabs'},{cmd:'cdp',...},...]}`
- 返回`{ok:true, results:[...]}`一次请求多命令CDP懒attach复用session
- `$N.path`引用第N个结果字段(0-indexed),如`"nodeId":"$2.root.nodeId"`
- 典型:文件上传三连 getDocument→querySelector(input[type=file])→setFileInputFiles
- ⚠tabIdCDP默认sender.tab.id(当前注入页)跨tab需显式tabId或先batch内tabs查
- CDP可用任意方法(Input/Network/DOM/Page/Runtime/Emulation等)单条每次attach→send→detach
- ⭐跨tab无需前台指定tabId即可操作后台标签页
- ⭐绕过isTrustedCDP派发的Input事件是浏览器原生级别
@@ -66,7 +72,8 @@ document.body.appendChild(el); // 响应写回el.textContent
-多RenderWidgetHostHWND共存必须按父窗口标题匹配再取子窗口
## 验证码/页面视觉截图
- 优先JS `canvas.toDataURL()` 直接拿base64验证码是canvas/img时最干净无需截屏
- ⭐首选CDP截图`Page.captureScreenshot`(format:'png')→返回base64无需前台/后台tab也行全页高清
- 验证码canvas/imgJS `canvas.toDataURL()` 直接拿base64最干净
- 备选:`window.open(location.href,'_blank')` 前台开新标签→win32截图→完后close
- GM_openInTab在web_execute_js不可用非油猴上下文
- 浏览器无JS API切标签页只能开新的来保证前台

View File

@@ -860,9 +860,11 @@ def get_html(driver, cutlist=False, maxchars=28000, instruction="", extra_js="")
if len(ss) > maxchars: ss = ss[:maxchars] + ' ... [TRUNCATED]'
return ss
def execute_js_rich(script, driver):
def execute_js_rich(script, driver, no_monitor=False):
last_html = None
if not no_monitor:
try: last_html = get_html(driver, cutlist=False, extra_js=temp_monitor_js)
except: last_html = None
except: pass
result = None; error_msg = None; reloaded = False; newTabs = []
before_sids = set(driver.get_session_dict().keys())
try:
@@ -889,6 +891,7 @@ def execute_js_rich(script, driver):
rr['environment']['newTabs'] = newTabs
rr['suggestion'] = "页面已刷新,以上新标签页在执行期间连接。"
if error_msg: rr['error'] = error_msg
if no_monitor: return rr
if not reloaded:
try: rr['transients'] = get_temp_texts(driver)
except: rr['transients'] = []