diff --git a/agent_loop.py b/agent_loop.py index 5e8404e..e9c69db 100644 --- a/agent_loop.py +++ b/agent_loop.py @@ -23,8 +23,7 @@ class BaseHandler: ret = yield from try_call_generator(getattr(self, method_name), args, response) _ = yield from try_call_generator(self.tool_after_callback, tool_name, args, response, ret) return ret - elif tool_name == 'bad_json': - return StepOutcome(None, next_prompt=args.get('msg', 'bad_json'), should_exit=False) + elif tool_name == 'bad_json': return StepOutcome(None, next_prompt=args.get('msg', 'bad_json'), should_exit=False) else: yield f"未知工具: {tool_name}\n" return StepOutcome(None, next_prompt=f"未知工具 {tool_name}", should_exit=False) @@ -43,9 +42,6 @@ def get_pretty_json(data): data = data.copy(); data["script"] = data["script"].replace("; ", ";\n ") return json.dumps(data, indent=2, ensure_ascii=False).replace('\\n', '\n') -_TOOL_ICONS = {'file_read': '📖', 'file_write': '✏️', 'file_patch': '✏️', 'code_run': '⚙️', - 'web_scan': '🌐', 'web_execute_js': '🌐', 'update_working_checkpoint': '💾', 'ask_user': '❓', 'start_long_term_update': '💾'} - def agent_runner_loop(client, system_prompt, user_input, handler, tools_schema, max_turns=40, verbose=True, initial_user_content=None): messages = [ {"role": "system", "content": system_prompt}, @@ -72,11 +68,10 @@ def agent_runner_loop(client, system_prompt, user_input, handler, tools_schema, tool_results = []; next_prompts = set(); should_exit = None for ii, tc in enumerate(tool_calls): tool_name, args, tid = tc['tool_name'], tc['args'], tc.get('id', '') - icon = _TOOL_ICONS.get(tool_name, '🛠️') if tool_name == 'no_tool': pass else: - if verbose: yield f"{icon} 正在调用工具: `{tool_name}` 📥参数:\n````text\n{get_pretty_json(args)}\n````\n" - else: yield f"{icon} {tool_name}({_compact_tool_args(tool_name, args)})\n\n\n" + if verbose: yield f"🛠️ 正在调用工具: `{tool_name}` 📥参数:\n````text\n{get_pretty_json(args)}\n````\n" + else: yield f"🛠️ {tool_name}({_compact_tool_args(tool_name, args)})\n\n\n" handler.current_turn = turn gen = handler.dispatch(tool_name, args, response, index=ii) try: diff --git a/assets/tmwd_cdp_bridge/background.js b/assets/tmwd_cdp_bridge/background.js index 6792ee9..4a0c87f 100644 --- a/assets/tmwd_cdp_bridge/background.js +++ b/assets/tmwd_cdp_bridge/background.js @@ -32,6 +32,27 @@ async function handleExtMessage(msg, sender) { } } catch (e) { return { ok: false, error: e.message }; } } + if (msg.cmd === 'management') { + try { + if (msg.method === 'list') { + const all = await chrome.management.getAll(); + return { ok: true, data: all.map(e => ({ id: e.id, name: e.name, enabled: e.enabled, type: e.type, version: e.version })) }; + } + if (msg.method === 'reload') { + chrome.alarms.create('tmwd-self-reload', { when: Date.now() + 200 }); + return { ok: true }; + } + if (msg.method === 'disable') { + await chrome.management.setEnabled(msg.extId, false); + return { ok: true }; + } + if (msg.method === 'enable') { + await chrome.management.setEnabled(msg.extId, true); + return { ok: true }; + } + return { ok: false, error: 'Unknown method: ' + msg.method }; + } catch (e) { return { ok: false, error: e.message }; } + } return { ok: false, error: 'Unknown cmd: ' + msg.cmd }; } @@ -136,11 +157,12 @@ function buildExecScript(code, errorHandler) { const lastLine = lines.length > 0 ? lines[lines.length - 1].trim() : ''; const AsyncFunction = Object.getPrototypeOf(async function(){}).constructor; let r; + function _air(c) { const ls = c.split(/\\r?\\n/); let i = ls.length - 1; while (i >= 0 && !ls[i].trim()) i--; if (i < 0) return c; const t = ls[i].trim(); if (/^(return |return;|return$|let |const |var |if |if\\(|for |for\\(|while |while\\(|switch|try |throw |class |function |async |import |export |\\/\\/|})/.test(t)) return c; ls[i] = ls[i].match(/^(\\s*)/)[1] + 'return ' + t; return ls.join('\\n'); } if (lastLine.startsWith('return')) { r = await (new AsyncFunction(jsCode))(); } else { try { r = eval(jsCode); if (r instanceof Promise) r = await r; } catch (e) { - if (e instanceof SyntaxError && (/return/i.test(e.message) || /await/i.test(e.message))) { r = await (new AsyncFunction(jsCode))(); } else throw e; + if (e instanceof SyntaxError && (/return/i.test(e.message) || /await/i.test(e.message))) { r = await (new AsyncFunction(_air(jsCode)))(); } else throw e; } } return { ok: true, data: smartProcessResult(r) }; @@ -190,6 +212,10 @@ async function isServerAlive() { } chrome.alarms.onAlarm.addListener(async (alarm) => { + if (alarm.name === 'tmwd-self-reload') { + chrome.runtime.reload(); + return; + } if (alarm.name === 'tmwd-ws-keepalive') { // Keepalive: ping to keep SW alive + detect dead connections if (ws && ws.readyState === WebSocket.OPEN) { @@ -220,8 +246,11 @@ async function handleWsExec(data) { ws.send(JSON.stringify({ type: 'error', id: data.id, error: 'No tabId provided' })); return; } + // Use onCreated listener to reliably capture new tabs (avoids race condition with query-diff) + const newTabIds = new Set(); + const onCreated = (tab) => { newTabIds.add(tab.id); }; + chrome.tabs.onCreated.addListener(onCreated); try { - const tabsBefore = new Set((await chrome.tabs.query({})).map(t => t.id)); let res; try { const result = await chrome.scripting.executeScript({ @@ -260,7 +289,14 @@ async function handleWsExec(data) { res = { ok: false, error: { name: 'Error', message: 'CDP fallback failed: ' + cdpErr.message, stack: '' } }; } } - const newTabs = (await chrome.tabs.query({})).filter(t => !tabsBefore.has(t.id)).map(t => ({id: t.id, url: t.url, title: t.title})); + // Grace period for async tab creation (e.g. link click with target=_blank) + if (newTabIds.size === 0) await new Promise(r => setTimeout(r, 200)); + chrome.tabs.onCreated.removeListener(onCreated); + // Get full info for captured new tabs + const newTabs = []; + for (const id of newTabIds) { + try { const t = await chrome.tabs.get(id); newTabs.push({id: t.id, url: t.url, title: t.title}); } catch (_) {} + } if (res?.ok) { ws.send(JSON.stringify({ type: 'result', id: data.id, result: res.data, newTabs })); } else { @@ -269,6 +305,8 @@ async function handleWsExec(data) { } } catch (e) { ws.send(JSON.stringify({ type: 'error', id: data.id, error: { name: e.name || 'Error', message: e.message || String(e), stack: e.stack || '' } })); + } finally { + chrome.tabs.onCreated.removeListener(onCreated); } } diff --git a/assets/tmwd_cdp_bridge/manifest.json b/assets/tmwd_cdp_bridge/manifest.json index 295b577..b0366ff 100644 --- a/assets/tmwd_cdp_bridge/manifest.json +++ b/assets/tmwd_cdp_bridge/manifest.json @@ -10,7 +10,8 @@ "debugger", "scripting", "alarms", - "declarativeNetRequest" + "declarativeNetRequest", + "management" ], "host_permissions": [""], "background": { diff --git a/assets/tools_schema.json b/assets/tools_schema.json index 244c1f2..42e1fb7 100644 --- a/assets/tools_schema.json +++ b/assets/tools_schema.json @@ -35,7 +35,7 @@ }}, {"type": "function", "function": { "name": "web_scan", - "description": "Get simplified HTML and tab list. Filters sidebars/floating elements; use execute_js for filtered content. Call after switching pages", + "description": "Get simplified HTML and tab list. Removes hidden/floating/covered elements. Call after switching pages", "parameters": {"type": "object", "properties": { "tabs_only": {"type": "boolean", "description": "Show tab list only, no HTML", "default": false}, "switch_tab_id": {"type": "string", "description": "[Optional] Tab ID to switch to"}, @@ -43,7 +43,7 @@ }}, {"type": "function", "function": { "name": "web_execute_js", - "description": "Execute JS to control browser. Use precisely to reduce web_scan calls. Put code in ```javascript blocks in reply body to avoid escaping", + "description": "Execute JS to control browser. No guessing. Act accurately to reduce web_scan calls. Put code in ```javascript blocks in reply body to avoid escaping", "parameters": {"type": "object", "properties": { "script": {"type": "string", "description": "[Mutually exclusive] JS code or script path. NEVER use this param when use reply code block"}, "save_to_file": {"type": "string", "description": "file path; **only** for long result", "default": ""}, diff --git a/assets/tools_schema_cn.json b/assets/tools_schema_cn.json index 507067a..ad7a6b6 100644 --- a/assets/tools_schema_cn.json +++ b/assets/tools_schema_cn.json @@ -35,7 +35,7 @@ }}, {"type": "function", "function": { "name": "web_scan", - "description": "获取当前页面的简化HTML内容和标签页列表。注意:简化会过滤边栏、浮动元素等非主体内容,如需查看被过滤内容请用execute_js。切换页面后一般应先调用查看", + "description": "获取当前页面的简化HTML内容和标签页列表。会移除隐藏/浮动/被遮盖的元素。切换页面后一般应先调用查看", "parameters": {"type": "object", "properties": { "tabs_only": {"type": "boolean", "description": "仅返回标签页列表和当前标签信息,不获取HTML内容", "default": false}, "switch_tab_id": {"type": "string", "description": "可选的标签页 ID。如果提供,系统将在扫描前切换到该标签页"}, @@ -43,7 +43,7 @@ }}, {"type": "function", "function": { "name": "web_execute_js", - "description": "执行 JS 控制浏览器。建议精准使用减少 web_scan。为免转义问题,代码优先考虑放回复正文 ```javascript 块", + "description": "执行 JS 控制浏览器。禁止猜测,准确操作以减少 web_scan 调用。为免转义问题,代码优先考虑放回复正文 ```javascript 块", "parameters": {"type": "object", "properties": { "script": {"type": "string", "description": "[Optional] JS代码或路径。为免转义建议留空,改用正文代码块(与此参数互斥)"}, "save_to_file": {"type": "string", "description": "结果存文件,适合返回值较长时", "default": ""}, diff --git a/memory/tmwebdriver_sop.md b/memory/tmwebdriver_sop.md index 2ac2b2b..eff344d 100644 --- a/memory/tmwebdriver_sop.md +++ b/memory/tmwebdriver_sop.md @@ -2,26 +2,18 @@ - 禁止import,直接用web_scan/web_execute_js工具。本文件只记录特性和坑。 - 底层:`../TMWebDriver.py`通过Chrome扩展(非Tampermonkey)接管用户浏览器(保留登录态/Cookie) -- 非Selenium/Playwright,不需调试浏览器或新数据目录 -- 支撑 `web_scan`(只读DOM) / `web_execute_js`(执行JS) 等高层工具 -- ⚠扩展更新后,已打开的旧tab不会自动加载新版脚本→scan/execute_js无ACK→需刷新页面或切到新tab +- 非Selenium/Playwright,保留用户浏览器登录态 +- ⚠扩展更新后旧tab的content script不重载→需刷新页面 ## 通用特性 -- ✅web_execute_js**完美支持顶层await**(v0.4+),可直接`await fetch()`/`await new Promise()`等 - - ⚠使用await时需**显式`return`**才能拿到返回值(底层async包裹,不写return则返回null) -- ✅web_scan**自动穿透同源iframe**:无需手动操作,scan直接递归输出iframe内部DOM。跨域iframe则需CDP或postMessage(见下方章节) +- ⚠web_execute_js里使用`await`时需**显式`return`**才能拿到返回值(底层async包裹,不写return则返回null) +- ✅web_scan自动穿透同源iframe;跨域iframe需CDP或postMessage(见下方章节) ## 限制(isTrusted) -- JS dispatch的事件`isTrusted=false`,敏感操作(文件上传/部分按钮)会被浏览器拦截 -- ⭐**首选绕过:CDP桥**——CDP派发的Input事件是浏览器原生级别(isTrusted=true),且无需前台,见下方CDP章节 -- 文件上传:JS无法填充`` - - ⭐首选CDP batch:getDocument→querySelector→DOM.setFileInputFiles(无需前台/物理点击) - - 备选ljqCtrl物理点击:SetForegroundWindow→点上传按钮→FindWindow轮询对话框→输入路径→轮询关闭 -- 备选:元素→屏幕物理坐标(ljqCtrl/PostMessage点击前必算):JS一次取rect+窗口信息,公式: - - `physX = (screenX + rect中心x) * dpr`,`physY = (screenY + chromeH + rect中心y) * dpr` - - chromeH = outerHeight - innerHeight,dpr = devicePixelRatio - - 注意:screenX/Y也是CSS像素,所有值先加后统一乘dpr -- 结论:读信息+普通操作用TMWebDriver;需isTrusted事件首选CDP桥;文件上传首选CDP三连(备选ljqCtrl) +- JS事件`isTrusted=false`,敏感操作(如文件上传/部分按钮)可能被拦截;这类场景首选**CDP桥** +- ⚠JS点击按钮打不开新tab→可能是浏览器弹窗拦截,换CDP点击试试 +- 文件上传:JS无法填充``;首选CDP batch:getDocument→querySelector→DOM.setFileInputFiles,备选ljqCtrl物理点击 +- 需转物理坐标时:`physX = (screenX + rect中心x) * dpr`,`physY = (screenY + chromeH + rect中心y) * dpr`;其中 `chromeH = outerHeight - innerHeight` ## 导航 - `web_scan` 仅读当前页不导航,切换网站用 `web_execute_js` + `location.href='url'` @@ -45,14 +37,11 @@ fetch('PDF_URL').then(r=>r.blob()).then(b=>{ 注意:需同源或CORS允许,跨域先导航到目标域再执行 ## Chrome后台标签节流 -- 后台标签中`setTimeout`被Chrome intensive throttling延迟到≥1min/次 -- 扩展content script中detect_newtab的轮询(`setTimeout 150ms × 10`)会超时 -- 已修复:移除脚本内轮询,改由Python侧`get_session_dict()`前后对比检测新标签 -- 同理:扩展脚本中任何后台逻辑都应避免依赖setTimeout轮询 +- 后台标签中`setTimeout`被Chrome intensive throttling延迟到≥1min/次,扩展脚本中避免依赖setTimeout轮询 ## CDP桥(tmwd_cdp_bridge扩展) ⭐首选 扩展路径:`assets/tmwd_cdp_bridge/`(需安装,含debugger权限) -⚠TID约定标识(非密钥):首次运行自动生成到`assets/tmwd_cdp_bridge/config.js`(已gitignore),扩展通过manifest引用 +⚠TID约定标识:首次运行自动生成到`assets/tmwd_cdp_bridge/config.js`(已gitignore),扩展通过manifest引用 调用:`web_execute_js` script直传JSON字符串(工具层自动识别对象格式,走WS→background.js cmd路由) ```js // 直接传JSON字符串作为script参数,无需DOM操作 @@ -62,43 +51,23 @@ web_execute_js script='{"cmd": "cdp", "tabId": N, "method": "...", "params": {.. web_execute_js script='{"cmd": "batch", "commands": [...]}' // 返回值直接是JSON结果 ``` -⚠旧DOM方式(TID元素+MutationObserver)仍可用但已不推荐 -单命令:`{cmd:'tabs'}` | `{cmd:'cookies'}` | `{cmd:'cdp', tabId:N, method:'...', params:{...}}` +通信方式:⭐JSON字符串直传(首选) | TID DOM方式(TID元素+MutationObserver,web_scan/execute_js底层依赖) +单命令:`{cmd:'tabs'}` | `{cmd:'cookies'}` | `{cmd:'cdp', tabId:N, method:'...', params:{...}}` | `{cmd:'management', method:'list|reload|disable|enable', extId:'...'}` +- management:list返回所有扩展信息;reload/disable/enable需传extId - ⭐batch混合:`{cmd:'batch', commands:[{cmd:'cookies'},{cmd:'tabs'},{cmd:'cdp',...},...]}` - 返回`{ok:true, results:[...]}`,一次请求多命令,CDP懒attach复用session - 子命令会自动继承外层batch的tabId(如cookies命令可正确获取当前页面URL) - `$N.path`引用第N个结果字段(0-indexed),如`"nodeId":"$2.root.nodeId"` - - ⚠batch前序命令失败时后续`$N`引用拿到undefined,整条链路**静默失败不报错**,需检查返回results数组中每项的ok状态(未验证,BBS#46) - - 典型:文件上传三连 getDocument(**depth:1**性能优化,200ms+→个位数ms)→querySelector(input[type=file])→setFileInputFiles(未验证,BBS#38) - - ⚠nodeId路径一致性:getDocument+querySelector路径和performSearch+getSearchResults路径的nodeId**不互通**,同一batch内不可混用(未验证,BBS#45) - - ⚠文件上传后前端框架(React/Vue)可能不感知→JS补发**两个事件**(Vue3需input事件而非仅change)(未验证,BBS#35/#39): - ```js - el.dispatchEvent(new Event('input', {bubbles:true})); - el.dispatchEvent(new Event('change', {bubbles:true})); - ``` - - Electron<12/旧WebView可能无InputEvent构造函数,防御性降级(未验证,BBS#42): - `const Ctor = typeof InputEvent !== 'undefined' ? InputEvent : Event; el.dispatchEvent(new Ctor('input', {bubbles:true}));` - - 极端情况(框架仍不响应):Runtime.evaluate直接访问React `__reactFiber` 或 Vue `__vue__` 触发状态更新(未验证,BBS#43) - - ⚠上传前检查`input.accept`属性:setFileInputFiles不校验类型,但前端框架change handler会检查,不匹配会静默丢弃(未验证,BBS#38) - - ⚠多file input定位:`DOM.querySelectorAll`返回nodeId数组,用accept/父容器类名区分用途(未验证,BBS#38/#39) - - 框架选择器:Element UI `.el-upload__input` | Ant Design `.ant-upload input[type=file]` | Naive UI `.n-upload-trigger input[type=file]` | Dropzone `.dz-hidden-input`(未验证,BBS#39) - - ⚠Dropzone拖拽上传:90%底层仍创建隐藏``,先querySelectorAll('input[type=file]')全局扫(未验证,BBS#35/#38) - - ⭐轻量元素存在检测:`DOM.performSearch({query:'input[type=file]'})`返回resultCount,不触发DOM树构建,轮询等待元素时避免重复getDocument(未验证,BBS#39) - - performSearch支持三种语法:CSS选择器 / XPath(`//input[@type='file']`) / 纯文本,自动识别(未验证,BBS#41) - - ⭐瞬态file input处理(Ant Design等框架点击上传按钮时动态创建input,上传完立即销毁)(未验证,BBS#42/#43): - - 方案A(批处理):在同一batch内完成 performSearch→getSearchResults→setFileInputFiles→**discardSearchResults**,缩小input被销毁的时间窗口,discardSearchResults防searchId泄漏(未验证,BBS#46) - - 方案B(事件监听):`DOM.enable`后监听`DOM.childNodeInserted`事件捕获input创建瞬间,零延迟拿到nodeId - - 前提:须先对document.body的nodeId调`DOM.requestChildNodes`,否则CDP不推送子树变更 - - ⚠`DOM.disable`会使所有已获取nodeId失效,setFileInputFiles必须在disable之前。正确时序:DOM.enable→requestChildNodes→[等事件]→setFileInputFiles→DOM.disable(未验证,BBS#45) - - 方案C(猴子补丁兜底):Runtime.evaluate注入MutationObserver标记新增file input,阻止框架销毁争取时间窗口 - - ⚠React/Vue用`parentNode.removeChild(node)`而非`node.remove()`,需patch `Element.prototype.removeChild`过滤`input[type=file]`(未验证,BBS#45) - - ⚠Svelte等框架可能用`replaceChild`或`textContent=''`清空父容器间接移除,绕过removeChild补丁,极端场景性价比低建议回退方案B(未验证,BBS#46) - - ⚠阻止销毁会内存泄漏,用完后手动清理被标记的节点(未验证,BBS#45) - - FileList只读,最终仍需CDP setFileInputFiles + - ⚠batch前序命令失败时,后续`$N`引用会静默变成undefined;要检查results数组中每项的ok状态 + - 典型文件上传:getDocument(**depth:1**) → querySelector(`input[type=file]`) → setFileInputFiles + - 思想: + - 同一链路内保持nodeId来源一致,不混用querySelector路径与performSearch路径 + - 上传后前端框架可能不感知,必要时JS补发`input`/`change`事件 + - 上传前检查`input.accept`;多input时用accept/父容器语义区分 + - 等待元素优先用`DOM.performSearch('input[type=file]')`做轻量轮询 + - 瞬态input的核心是**缩短发现→setFileInputFiles时间窗**:优先同batch完成;再不行用DOM事件监听;猴子补丁仅作兜底思路 - ⚠tabId:CDP默认sender.tab.id(当前注入页),跨tab需显式tabId或先batch内tabs查 -- CDP可用任意方法(Input/Network/DOM/Page/Runtime/Emulation等),单条每次attach→send→detach - ⭐跨tab无需前台:指定tabId即可操作后台标签页 -- ⭐绕过isTrusted:CDP派发的Input事件是浏览器原生级别 ## CDP点击完整生命周期(未验证,BBS#23) - 通用点击需**三事件序列**:mouseMoved → mousePressed → mouseReleased(间隔50-100ms) @@ -118,10 +87,8 @@ web_execute_js script='{"cmd": "batch", "commands": [...]}' - postMessage中继方案仅在content script已注入iframe时有效,第三方支付iframe通常无注入 ## CDP文本输入(未验证,BBS#23) -- `Input.insertText({text:'...'})` — 直接插入,快,不触发keydown/keyup -- `Input.dispatchKeyEvent` — 逐键派发,慢但完整模拟 -- React/Vue受控组件:先insertText,再JS手动dispatch `input`事件(input事件不检查isTrusted) -- 简单输入框用insertText够用 +- `insertText`快但无key事件;受控组件需补dispatch `input`事件 +- 需完整键盘模拟时用`dispatchKeyEvent`逐键派发 ## CDP DOM域穿透 closed Shadow DOM(未验证,BBS#24/#25) - `DOM.getDocument({depth:-1, pierce:true})` 穿透所有Shadow边界(含closed) @@ -130,49 +97,29 @@ web_execute_js script='{"cmd": "batch", "commands": [...]}' - ⚠不能简化为对角线平均——元素有transform:rotate/skew时四点非矩形 - querySelector**不能跨Shadow边界写组合选择器**,需分步:先找host再在其shadow内找子元素 - ⚠nodeId在DOM变更后失效 → 用`backendNodeId`更稳定,或重新getDocument刷新 -- 渲染检查:`DOM.resolveNode` → `Runtime.callFunctionOn` 检查offsetHeight>0 -- 完整pipeline: getDocument(pierce) → querySelector → getBoxModel → 四点平均坐标 → Input三事件点击 -## autofill获取与登录 (需 v0.4+ 脚本支持 await) + +## autofill获取与登录 检测:web_scan输出input带`data-autofilled="true"`,value显示为受保护提示(非真实值,Chrome安全保护需点击释放) - ⚠**前置条件:必须先CDP `Page.bringToFront` 切tab到前台**,Chrome仅在前台tab释放autofill保护值,后台tab物理点击无效 -- ⭐**一键释放与登录**:利用顶层 `await`,在单次 `web_execute_js` 中连贯完成: - 1. CDP batch发送 `Page.bringToFront` 切到前台。 - 2. JS获取输入框坐标。 - 3. CDP发送 `Input.dispatchMouseEvent` (mousePressed) 物理点击释放autofill。 - 4. `await new Promise(r => setTimeout(r, 500))` 等待释放。 - 5. 派发 `input`/`change` 事件唤醒前端框架(解禁登录按钮)。 - 6. 触发登录点击。 -- ⚠只需 `mousePressed`,无需 `mouseReleased`。点击一个字段即释放全页。 -- ⚠使用await时需显式`return`返回值,否则async包裹层默认返回null。 +- ⭐**一键释放与登录**:bringToFront → mousePressed点任一字段(无需Released,一个释放全页) → 等500ms → 补input/change事件 → 点登录 ## 验证码/页面视觉截图 - ⭐首选CDP截图:`Page.captureScreenshot`(format:'png')→返回base64,无需前台/后台tab也行,全页高清 - 验证码canvas/img:JS `canvas.toDataURL()` 直接拿base64最干净 -- 备选:`window.open(location.href,'_blank')` 前台开新标签→win32截图→完后close ## simphtml与TMWebDriver调试 -- ⭐**simphtml唯一调试方法**:必须通过 `code_run` 注入JS到真实浏览器执行,因为Python端无法完全模拟DOM行为。 - ```python - import sys - sys.path.append('../') - from TMWebDriver import * - from simphtml import * - driver = TMWebDriver() - res = driver.execute_js(js_optHTML) # js_optHTML为simphtml中注入的JS代码 - ``` -- `d=TMWebDriver()`, `d.set_session('url_pattern')`, `d.execute_js('code')` → 返回`{'data': value}`(非裸值) -- 配合simphtml:`str(simphtml.optimize_html_for_tokens(html))` → 注意返回BS4 Tag需str() -- ⚠**DOMRect坑点(hasOverlap)**:`DOMRect` 对象在某些浏览器/上下文中可能缺少 `x` 和 `y` 属性(只有 `left`/`top`),直接访问 `rect.x` 会得到 `undefined`,导致数学计算(如重叠判定)变成 `NaN`,从而引发逻辑错误(如错误判定为重叠导致元素被误删)。必须兼容:`const x = rect.x !== undefined ? rect.x : rect.left;` +- simphtml调试必须通过`code_run`注入JS到真实浏览器(Python端无法模拟DOM) +- `d=TMWebDriver()`, `d.set_session('url_pattern')`, `d.execute_js(code)` → 返回`{'data': value}` +- simphtml:`str(simphtml.optimize_html_for_tokens(html))` — 返回BS4 Tag需str() +- ⚠**DOMRect坑(hasOverlap)**:某些上下文`rect.x/y`为undefined(只有left/top),导致NaN→误判重叠。兼容:`rect.x ?? rect.left` ## 跨域iframe操控(postMessage中继) -- 跨域iframe的contentDocument不可访问,web_execute_js只在顶层执行 -- 扩展content script已支持:iframe内不return,改为监听postMessage并eval执行+回传结果 +- 扩展content script支持:顶层发postMessage到iframe,iframe内eval+回传结果 - 顶层发送:`iframe.contentWindow.postMessage({type:'ljq_exec', id, code}, '*')` - iframe回传:`{type:'ljq_result', id, result}` 通过window.addEventListener('message')接收 -- ⚠只能eval表达式,不支持return/函数体包装,构造代码时注意 -- 流程:发postMessage→等→读window._ljqResults[id]获取结果 -- 已验证:读取iframe内DOM(document.title)、填写input均成功 +- ⚠只能eval表达式,不支持return/函数体包装 +- 已验证:读取iframe内DOM、填写input均成功 ## 连不上排查 web_scan失败时按序排查: @@ -180,6 +127,3 @@ web_scan失败时按序排查: 没找到→走web_setup_sop;找到→确认已启用 ②浏览器没开?→检查①对应的浏览器进程是否在跑(tasklist/ps),没有则启动并打开正常URL(⚠about:blank等内部页不加载扩展) ③WS后台挂了?→socket.connect_ex(('127.0.0.1',18766))非0即dead→手动`from TMWebDriver import TMWebDriver; TMWebDriver()`起master - -## 性能 -- ⚠ URL必须用`127.0.0.1`不用`localhost`。Windows下localhost先尝试IPv6(::1)超时2s再回退IPv4,每次HTTP请求多2s \ No newline at end of file diff --git a/simphtml.py b/simphtml.py index 839a95d..881ec3b 100644 --- a/simphtml.py +++ b/simphtml.py @@ -824,7 +824,7 @@ def execute_js_rich(script, driver, no_monitor=False): try: print(f"Executing: {script[:250]} ...") response = driver.execute_js(script) - result = response.get('data') or response.get('result') + result = response['data'] if 'data' in response else response.get('result') if response.get('closed', 0) == 1: reloaded = True time.sleep(1) except Exception as e: @@ -835,10 +835,10 @@ def execute_js_rich(script, driver, no_monitor=False): rr = { "status": "failed" if error_msg else "success", "js_return": result, - "environment": {"reloaded": reloaded}, "tab_id": driver.default_session_id } - if response.get('newTabs'): rr['environment']['newTabs'] = response['newTabs'] + if reloaded: rr['reloaded'] = reloaded + if response.get('newTabs'): rr['newTabs'] = response['newTabs'] else: after = driver.get_session_dict() new_sids = {k: v for k, v in after.items() if k not in before_sids}