fix: buildCdpScript add smartProcessResult to prevent CDP serialization error on complex DOM (GitHub issues, Gmail etc); update SOP docs

This commit is contained in:
Liang Jiaqing
2026-03-30 22:30:34 +08:00
parent 18d5aafed8
commit 72a85a0823
3 changed files with 40 additions and 16 deletions

View File

@@ -133,7 +133,7 @@ class TMWebDriver:
current_tab_ids = {str(tab['id']) for tab in tabs} current_tab_ids = {str(tab['id']) for tab in tabs}
for sid in list(driver.sessions.keys()): for sid in list(driver.sessions.keys()):
sess = driver.sessions[sid] sess = driver.sessions[sid]
if sess.type == 'ext_ws' and sess.ws_client == self and sid not in current_tab_ids: if sess.type == 'ext_ws' and sid not in current_tab_ids:
sess.mark_disconnected() sess.mark_disconnected()
for tab in tabs: for tab in tabs:
session_id = str(tab['id']) session_id = str(tab['id'])
@@ -172,13 +172,10 @@ class TMWebDriver:
self.latest_session_id = session_id self.latest_session_id = session_id
if self.default_session_id is None: self.default_session_id = session_id if self.default_session_id is None: self.default_session_id = session_id
def _unregister_client(self, client: WebSocket) -> None: def _unregister_client(self, client: WebSocket) -> None:
for session in self.sessions.values(): for session in self.sessions.values():
if session.ws_client == client: if session.ws_client == client: session.mark_disconnected()
session.mark_disconnected()
break
def execute_js(self, code, timeout=15, session_id=None) -> Any: def execute_js(self, code, timeout=15, session_id=None) -> Any:
if session_id is None: session_id = self.default_session_id if session_id is None: session_id = self.default_session_id

View File

@@ -153,9 +153,27 @@ function buildPageScript(code) {
})()`; })()`;
} }
// --- Minimal CDP script: no smartProcessResult, returnByValue handles serialization --- // --- CDP script: includes smartProcessResult to avoid "Object reference chain is too long" ---
function buildCdpScript(code) { function buildCdpScript(code) {
return `(async () => { return `(async () => {
function smartProcessResult(result) {
if (result === null || result === undefined || typeof result !== 'object') return result;
if (typeof jQuery !== 'undefined' && result instanceof jQuery) {
const elements = []; for (let i = 0; i < result.length; i++) { if (result[i] && result[i].nodeType === 1) elements.push(result[i].outerHTML); } return elements;
}
if (result instanceof NodeList || result instanceof HTMLCollection) {
const elements = []; for (let i = 0; i < result.length; i++) { if (result[i] && result[i].nodeType === 1) elements.push(result[i].outerHTML); } return elements;
}
if (result.nodeType === 1) return result.outerHTML;
if (!Array.isArray(result) && typeof result === 'object' && 'length' in result && typeof result.length === 'number') {
const firstElement = result[0];
if (firstElement && firstElement.nodeType === 1) {
const elements = []; const length = Math.min(result.length, 100);
for (let i = 0; i < length; i++) { const elem = result[i]; if (elem && elem.nodeType === 1) elements.push(elem.outerHTML); } return elements;
}
}
try { return JSON.parse(JSON.stringify(result, function(key, value) { if (typeof value === 'object' && value !== null) { if (value.nodeType === 1) return value.outerHTML; if (value === window || value === document) return '[Object]'; } return value; })); } catch (e) { return '[无法序列化: ' + e.message + ']'; }
}
try { try {
const jsCode = ${JSON.stringify(code)}.trim(); const jsCode = ${JSON.stringify(code)}.trim();
const lines = jsCode.split(/\\r?\\n/).filter(l => l.trim()); const lines = jsCode.split(/\\r?\\n/).filter(l => l.trim());
@@ -169,7 +187,7 @@ function buildCdpScript(code) {
if (e instanceof SyntaxError && (/return/i.test(e.message) || /await/i.test(e.message))) { r = await (new AsyncFunction(jsCode))(); } else throw e; if (e instanceof SyntaxError && (/return/i.test(e.message) || /await/i.test(e.message))) { r = await (new AsyncFunction(jsCode))(); } else throw e;
} }
} }
return { ok: true, data: r }; return { ok: true, data: smartProcessResult(r) };
} catch (e) { } catch (e) {
return { ok: false, error: { name: e.name || 'Error', message: e.message || String(e), stack: e.stack || '' } }; return { ok: false, error: { name: e.name || 'Error', message: e.message || String(e), stack: e.stack || '' } };
} }
@@ -253,6 +271,10 @@ function connectWS() {
args: [buildPageScript(data.code)] args: [buildPageScript(data.code)]
}); });
res = result[0]?.result; res = result[0]?.result;
if (res === null || res === undefined) {
console.log('[TMWD-WS] executeScript returned null/undefined, treating as CSP issue');
res = { ok: false, error: { name: 'Error', message: 'executeScript returned null (possible CSP or context issue)', stack: '' }, csp: true };
}
} catch (e) { } catch (e) {
console.log('[TMWD-WS] scripting.executeScript failed:', e.message); console.log('[TMWD-WS] scripting.executeScript failed:', e.message);
res = { ok: false, error: { name: e.name || 'Error', message: e.message || String(e), stack: e.stack || '' }, csp: true }; res = { ok: false, error: { name: e.name || 'Error', message: e.message || String(e), stack: e.stack || '' }, csp: true };
@@ -282,6 +304,7 @@ function connectWS() {
if (res?.ok) { if (res?.ok) {
ws.send(JSON.stringify({ type: 'result', id: data.id, result: res.data, newTabs })); ws.send(JSON.stringify({ type: 'result', id: data.id, result: res.data, newTabs }));
} else { } else {
console.log(res);
ws.send(JSON.stringify({ type: 'error', id: data.id, error: res?.error || 'Unknown error', newTabs })); ws.send(JSON.stringify({ type: 'error', id: data.id, error: res?.error || 'Unknown error', newTabs }));
} }
} catch (e) { } catch (e) {

View File

@@ -1,9 +1,10 @@
# TMWebDriver SOP # TMWebDriver SOP
- 禁止import直接用web_scan/web_execute_js工具。本文件只记录特性和坑。 - 禁止import直接用web_scan/web_execute_js工具。本文件只记录特性和坑。
- 底层:`../TMWebDriver.py`通过Tampermonkey脚本接管用户浏览器(保留登录态/Cookie - 底层:`../TMWebDriver.py`通过Chrome扩展(非Tampermonkey)接管用户浏览器(保留登录态/Cookie
- 非Selenium/Playwright不需调试浏览器或新数据目录 - 非Selenium/Playwright不需调试浏览器或新数据目录
- 支撑 `web_scan`(只读DOM) / `web_execute_js`(执行JS) 等高层工具 - 支撑 `web_scan`(只读DOM) / `web_execute_js`(执行JS) 等高层工具
- ⚠扩展更新后已打开的旧tab不会自动加载新版脚本→scan/execute_js无ACK→需刷新页面或切到新tab
## 通用特性 ## 通用特性
- ✅web_execute_js**完美支持顶层await**v0.4+),可直接`await fetch()`/`await new Promise()` - ✅web_execute_js**完美支持顶层await**v0.4+),可直接`await fetch()`/`await new Promise()`
@@ -44,9 +45,9 @@ fetch('PDF_URL').then(r=>r.blob()).then(b=>{
## Chrome后台标签节流 ## Chrome后台标签节流
- 后台标签中`setTimeout`被Chrome intensive throttling延迟到≥1min/次 - 后台标签中`setTimeout`被Chrome intensive throttling延迟到≥1min/次
- TM脚本中detect_newtab的轮询(`setTimeout 150ms × 10`)会超时 - 扩展content script中detect_newtab的轮询(`setTimeout 150ms × 10`)会超时
- 已修复:移除TM脚本内轮询改由Python侧`get_session_dict()`前后对比检测新标签 - 已修复移除脚本内轮询改由Python侧`get_session_dict()`前后对比检测新标签
- 同理:TM脚本中任何后台逻辑都应避免依赖setTimeout轮询 - 同理:扩展脚本中任何后台逻辑都应避免依赖setTimeout轮询
## CDP桥(tmwd_cdp_bridge扩展) ⭐首选 ## CDP桥(tmwd_cdp_bridge扩展) ⭐首选
扩展路径:`assets/tmwd_cdp_bridge/`(需安装含debugger权限) 扩展路径:`assets/tmwd_cdp_bridge/`(需安装含debugger权限)
@@ -108,7 +109,11 @@ document.body.appendChild(el); // 响应写回el.textContent
var realX = x * zoom; var realY = y * zoom; var realX = x * zoom; var realY = y * zoom;
``` ```
- iframe内元素CDP点击坐标需合成 `finalX = iframeRect.x + elRect.x` - iframe内元素CDP点击坐标需合成 `finalX = iframeRect.x + elRect.x`
- 跨域iframe拿不到contentDocument用CDP `Target.getTargets`找iframe targetId → `Target.attachToTarget`建独立会话 - 跨域iframe拿不到contentDocument
- ⚠`Target.getTargets`/`Target.attachToTarget`在CDP桥中返回"Not allowed"(chrome.debugger权限限制)
- ⭐**已验证方案**`Page.getFrameTree`找iframe frameId → `Page.createIsolatedWorld({frameId})`获取contextId → `Runtime.evaluate({expression, contextId})`在iframe中执行JS
- batch链式引用`$0.frameTree.childFrames`遍历找url匹配的frame`$1.executionContextId`传给evaluate
- postMessage中继方案仅在content script已注入iframe时有效第三方支付iframe通常无注入
## CDP文本输入未验证BBS#23 ## CDP文本输入未验证BBS#23
- `Input.insertText({text:'...'})` — 直接插入不触发keydown/keyup - `Input.insertText({text:'...'})` — 直接插入不触发keydown/keyup
@@ -143,7 +148,6 @@ document.body.appendChild(el); // 响应写回el.textContent
- ⭐首选CDP截图`Page.captureScreenshot`(format:'png')→返回base64无需前台/后台tab也行全页高清 - ⭐首选CDP截图`Page.captureScreenshot`(format:'png')→返回base64无需前台/后台tab也行全页高清
- 验证码canvas/imgJS `canvas.toDataURL()` 直接拿base64最干净 - 验证码canvas/imgJS `canvas.toDataURL()` 直接拿base64最干净
- 备选:`window.open(location.href,'_blank')` 前台开新标签→win32截图→完后close - 备选:`window.open(location.href,'_blank')` 前台开新标签→win32截图→完后close
- GM_openInTab在web_execute_js不可用非油猴上下文
## 直接import(仅作调试使用) ## 直接import(仅作调试使用)
- `sys.path.insert(0, GenericAgent根目录)`, `from TMWebDriver import TMWebDriver` - `sys.path.insert(0, GenericAgent根目录)`, `from TMWebDriver import TMWebDriver`
@@ -152,7 +156,7 @@ document.body.appendChild(el); // 响应写回el.textContent
## 跨域iframe操控(postMessage中继) ## 跨域iframe操控(postMessage中继)
- 跨域iframe的contentDocument不可访问web_execute_js只在顶层执行 - 跨域iframe的contentDocument不可访问web_execute_js只在顶层执行
- TM脚本已改造iframe内不return改为监听postMessage并eval执行+回传结果 - 扩展content script已支持iframe内不return改为监听postMessage并eval执行+回传结果
- 顶层发送:`iframe.contentWindow.postMessage({type:'ljq_exec', id, code}, '*')` - 顶层发送:`iframe.contentWindow.postMessage({type:'ljq_exec', id, code}, '*')`
- iframe回传`{type:'ljq_result', id, result}` 通过window.addEventListener('message')接收 - iframe回传`{type:'ljq_result', id, result}` 通过window.addEventListener('message')接收
- ⚠只能eval表达式不支持return/函数体包装,构造代码时注意 - ⚠只能eval表达式不支持return/函数体包装,构造代码时注意
@@ -161,8 +165,8 @@ document.body.appendChild(el); // 响应写回el.textContent
## 连不上排查 ## 连不上排查
web_scan失败时按序排查 web_scan失败时按序排查
TM没装?→遍历本机所有Chromium浏览器(Chrome/Edge/Brave…)用户数据目录下Extensions/各子目录manifest.json搜"tampermonkey" 扩展没装?→检查Chrome扩展列表(chrome://extensions)是否有TMWebDriver扩展
没找到→走web_setup_sop找到→记住装在哪个浏览器 没找到→走web_setup_sop找到→确认已启用
②浏览器没开?→检查①对应的浏览器进程是否在跑(tasklist/ps)没有则启动并打开正常URL⚠about:blank等内部页不加载扩展 ②浏览器没开?→检查①对应的浏览器进程是否在跑(tasklist/ps)没有则启动并打开正常URL⚠about:blank等内部页不加载扩展
③WS后台挂了→socket.connect_ex(('127.0.0.1',18766))非0即dead→手动`from TMWebDriver import TMWebDriver; TMWebDriver()`起master ③WS后台挂了→socket.connect_ex(('127.0.0.1',18766))非0即dead→手动`from TMWebDriver import TMWebDriver; TMWebDriver()`起master