diff --git a/agentmain.py b/agentmain.py index 71bb589..18e89ec 100644 --- a/agentmain.py +++ b/agentmain.py @@ -35,10 +35,7 @@ class GeneraticAgent: try: if 'claude' in k: llm_sessions += [ClaudeSession(api_key=cfg['apikey'], api_base=cfg['apibase'], model=cfg['model'])] if 'oai' in k: llm_sessions += [LLMSession( - api_key=cfg['apikey'], - api_base=cfg['apibase'], - model=cfg['model'], - proxy=cfg.get('proxy'), + api_key=cfg['apikey'], api_base=cfg['apibase'], model=cfg['model'], proxy=cfg.get('proxy'), api_mode=cfg.get('api_mode', 'chat_completions'), max_retries=cfg.get('max_retries', 2), connect_timeout=cfg.get('connect_timeout', 10), @@ -46,7 +43,7 @@ class GeneraticAgent: )] if 'xai' in k: llm_sessions += [XaiSession(cfg, mykeys.get('proxy', ''))] if 'sider' in k: llm_sessions += [SiderLLMSession(cfg, default_model=x) for x in \ - ["gemini-3.0-flash", "claude-haiku-4.5", "kimi-k2"]] + ["gemini-3.0-flash", "claude-haiku-4.5"]] except: pass if len(llm_sessions) > 0: self.llmclient = ToolClient(llm_sessions, auto_save_tokens=True) else: self.llmclient = None diff --git a/assets/insight_fixed_structure.txt b/assets/insight_fixed_structure.txt index 2d692f2..d90f480 100644 --- a/assets/insight_fixed_structure.txt +++ b/assets/insight_fixed_structure.txt @@ -4,6 +4,6 @@ Insight是极简索引,L2/L3变更时同步Insight,索引必须极简。写 [CONSTITUTION] 1. 改自身源码先请示;./内可自主实验,允许装包和portable工具。 2. 决策前查记忆库;未查证不断言。 -3. 分步执行逐步验证;3次失败请求干预。 +3. 分步执行,控制粒度,限制失败半径;3次失败请求干预。 4. 密钥文件仅引用,不读取/移动。 5. 写任何记忆前读META-SOP核验,memory下文件只能patch修改(除非新建)。 diff --git a/assets/tmwd_cdp_bridge/background.js b/assets/tmwd_cdp_bridge/background.js index a2f150e..3020aff 100644 --- a/assets/tmwd_cdp_bridge/background.js +++ b/assets/tmwd_cdp_bridge/background.js @@ -2,17 +2,33 @@ chrome.runtime.onInstalled.addListener(() => console.log('CDP Bridge installed')); chrome.runtime.onMessage.addListener((msg, sender, sendResponse) => { - if (msg.action === 'getCookies') { - handleGetCookies(msg, sender).then(sendResponse); + if (msg.action === 'cookies') { + handleCookies(msg, sender).then(sendResponse); return true; } if (msg.action === 'cdp') { handleCDP(msg, sender).then(sendResponse); return true; } + if (msg.action === 'tabs') { + (async () => { + try { + if (msg.method === 'switch') { + const tab = await chrome.tabs.update(msg.tabId, { active: true }); + await chrome.windows.update(tab.windowId, { focused: true }); + sendResponse({ ok: true }); + } else { + const tabs = await chrome.tabs.query({}); + const data = tabs.map(t => ({ id: t.id, url: t.url, title: t.title, active: t.active, windowId: t.windowId })); + sendResponse({ ok: true, data }); + } + } catch (e) { sendResponse({ ok: false, error: e.message }); } + })(); + return true; + } }); -async function handleGetCookies(msg, sender) { +async function handleCookies(msg, sender) { try { const url = msg.url || sender.tab?.url; const origin = url.match(/^https?:\/\/[^\/]+/)[0]; diff --git a/assets/tmwd_cdp_bridge/content.js b/assets/tmwd_cdp_bridge/content.js index 722d87e..96426f0 100644 --- a/assets/tmwd_cdp_bridge/content.js +++ b/assets/tmwd_cdp_bridge/content.js @@ -12,15 +12,15 @@ new MutationObserver(muts => { async function handle(el) { try { - const cmd = el.dataset.cmd || 'cookies'; + const req = el.textContent.trim() ? JSON.parse(el.textContent) : { cmd: 'cookies' }; + const cmd = req.cmd || 'cookies'; let resp; if (cmd === 'cookies') { - resp = await chrome.runtime.sendMessage({ action: 'getCookies', url: location.href }); + resp = await chrome.runtime.sendMessage({ action: 'cookies', url: req.url || location.href }); } else if (cmd === 'cdp') { - const method = el.dataset.method; - const params = el.dataset.params ? JSON.parse(el.dataset.params) : {}; - const tabId = el.dataset.tabid ? parseInt(el.dataset.tabid) : undefined; - resp = await chrome.runtime.sendMessage({ action: 'cdp', method, params, tabId }); + resp = await chrome.runtime.sendMessage({ action: 'cdp', method: req.method, params: req.params || {}, tabId: req.tabId }); + } else if (cmd === 'tabs') { + resp = await chrome.runtime.sendMessage({ action: 'tabs', method: req.method, tabId: req.tabId }); } else { resp = { ok: false, error: 'unknown cmd: ' + cmd }; } diff --git a/assets/tmwd_cdp_bridge/popup.js b/assets/tmwd_cdp_bridge/popup.js index 00c8839..fdf3baa 100644 --- a/assets/tmwd_cdp_bridge/popup.js +++ b/assets/tmwd_cdp_bridge/popup.js @@ -10,7 +10,7 @@ async function fetchCookies() { try { const [tab] = await chrome.tabs.query({ active: true, currentWindow: true }); if (!tab?.url) { out.textContent = 'No active tab'; return; } - const resp = await chrome.runtime.sendMessage({ action: 'getCookies', url: tab.url }); + const resp = await chrome.runtime.sendMessage({ action: 'cookies', url: tab.url }); if (!resp?.ok) { out.textContent = 'Error: ' + (resp?.error || 'unknown'); return; } if (!resp.data.length) { out.textContent = '(no cookies)'; return; } // 展示带标记 diff --git a/memory/autonomous_operation_sop.md b/memory/autonomous_operation_sop.md index 91e4d00..3423146 100644 --- a/memory/autonomous_operation_sop.md +++ b/memory/autonomous_operation_sop.md @@ -5,7 +5,18 @@ 授权你进行自主行动,只要不对环境造成副作用都可进行。 请先选择核心目标,再选择一个小目标进行。最终探测结果形成报告(含操作申请),待用户回来确认后再进行可能的写入或修改操作。 -> **must call update_working_checkpoint first**:`自主探索|≤15回合|只有cwd内可写|用户不在(问题存报告)|报告目录:./autonomous_reports/|收尾:重读本SOP确认报告目录+更新history|产出=报告+记忆提案 | **路径警告**:autonomous_reports 在 temp/ 下,用./autonomous_reports/访问,**严禁**`../memory/autonomous_reports/`或`../autonomous_reports/`!` +--- +## 🚫 Step 0(阻塞):写入约束便签 — 未完成禁止进入后续步骤 + +**必须第一个动作就调用** `update_working_checkpoint`,写入以下内容: + +``` +自主探索|≤15回合|只有cwd内可写|用户不在(问题存报告)|报告目录:./autonomous_reports/|收尾:重读本SOP确认报告目录+更新history|产出=报告+记忆提案 | 路径警告:autonomous_reports在temp/下,用./autonomous_reports/访问,严禁../memory/autonomous_reports/或../autonomous_reports/! +``` + +⛔ **跳过 Step 0 = 违规**。历史教训:R175因跳过此步,报告存错路径(Desktop而非./autonomous_reports/)。 + +--- ## 📋 大纲 - 报告目录与规则 diff --git a/memory/plan_sop.md b/memory/plan_sop.md index ca99c34..00f6cc2 100644 --- a/memory/plan_sop.md +++ b/memory/plan_sop.md @@ -1,17 +1,40 @@ # Plan Mode SOP -## 拆分 -1. 分析任务,必要时先读相关SOP确定子步骤 -2. 拆成可验证的原子步骤,风险步前置 +## 1. 分类:识别任务结构 +分析子任务间关系,选择匹配的结构: -## 写入 checkpoint -将以下写入 working checkpoint(忽略长度限制): +- **Sequential** — 步骤间有输入输出依赖 (部署/ETL/构建) +- **MapReduce** — 多独立维度,各自深入后汇总 (5P/SWOT/多文件审查) +- **Branch** — 结果不确定,按条件选路径 (调试/探测/方案选择) +- **Loop** — 重复直到满足条件 (优化/翻页/迭代修改) +- **DAG** — 混合依赖,部分可并行 (项目开发) -=== PLAN === -[ ] 步骤1 -[ ] 步骤2 +可嵌套:大结构某步内部用另一种结构 + +## 2. 分解模板 + +**Sequential:** `[ ] A → [ ] B → [ ] C` + +**MapReduce:** +``` +MAP [子流程: 读现状→分析→输出]: +[ ] 维度1: ... +[ ] 维度2: ... +REDUCE: +[ ] 汇总 → 终稿 +``` + +**Branch:** `[ ] 尝试X → 成功:[ ]Y / 失败:[ ]Z` + +**Loop:** `[ ] LOOP(max=N): 执行→检查→调整` + +**DAG:** `[ ] A → [ ]{B,C}并行 → [ ]D汇聚` + +## 3. 写入 checkpoint + +=== PLAN (结构类型) === ... === PLAN RULES === - 每完成/跳过一步,重新 update working checkpoint - 任何 checkpoint update 必须保留 PLAN -================ +================ \ No newline at end of file diff --git a/memory/tmwebdriver_sop.md b/memory/tmwebdriver_sop.md index 508486c..b2ad40a 100644 --- a/memory/tmwebdriver_sop.md +++ b/memory/tmwebdriver_sop.md @@ -7,13 +7,14 @@ ## 限制(isTrusted) - JS dispatch的事件`isTrusted=false`,敏感操作(文件上传/部分按钮)会被浏览器拦截 -- 文件上传:JS无法填充``,必须ljqCtrl物理点击+Win32轮询文件对话框 +- ⭐**首选绕过:CDP桥**——CDP派发的Input事件是浏览器原生级别(isTrusted=true),且无需前台,见下方CDP章节 +- 文件上传:JS无法填充``,仍需ljqCtrl物理点击+Win32轮询文件对话框 - 流程:SetForegroundWindow→ljqCtrl点上传按钮→FindWindow轮询对话框→输入路径→轮询关闭 -- 元素→屏幕物理坐标(ljqCtrl点击前必算):JS一次取rect+窗口信息,公式: +- 备选:元素→屏幕物理坐标(ljqCtrl/PostMessage点击前必算):JS一次取rect+窗口信息,公式: - `physX = (screenX + rect中心x) * dpr`,`physY = (screenY + chromeH + rect中心y) * dpr` - chromeH = outerHeight - innerHeight,dpr = devicePixelRatio - 注意:screenX/Y也是CSS像素,所有值先加后统一乘dpr -- 结论:读信息+普通操作用TMWebDriver;文件上传等敏感操作需配合ljqCtrl +- 结论:读信息+普通操作用TMWebDriver;需isTrusted事件首选CDP桥;文件上传需配合ljqCtrl ## 导航 - `web_scan` 仅读当前页不导航,切换网站用 `web_execute_js` + `location.href='url'` @@ -42,37 +43,27 @@ fetch('PDF_URL').then(r=>r.blob()).then(b=>{ - 已修复:移除TM脚本内轮询,改由Python侧`get_session_dict()`前后对比检测新标签 - 同理:TM脚本中任何后台逻辑都应避免依赖setTimeout轮询 -## Cookie+CDP桥(tmwd_cdp_bridge扩展) -前提:需先安装`assets/tmwd_cdp_bridge/`扩展(含debugger权限) -触发ID:`__ljq_ctrl` -### Cookie提取(含HttpOnly) -注入`id="__ljq_ctrl"`的div(无需data-cmd,默认cookies)→扩展写回JSON到textContent +## CDP桥(tmwd_cdp_bridge扩展) ⭐首选 +扩展路径:`assets/tmwd_cdp_bridge/`(需安装,含debugger权限) +调用:MutationObserver监听addedNodes(id=`__ljq_ctrl`),⚠每次必须remove旧→createElement新→设textContent JSON→appendChild ```js -const d=document.createElement('div');d.id='__ljq_ctrl'; -document.body.appendChild(d); -await new Promise(r=>setTimeout(r,300)); -return d.textContent; // {ok:true, data:[...]} +const old = document.getElementById('__ljq_ctrl'); +if (old) old.remove(); +const el = document.createElement('div'); +el.id = '__ljq_ctrl'; el.style.display = 'none'; +el.textContent = JSON.stringify({cmd:'...', ...}); +document.body.appendChild(el); // 响应写回el.textContent ``` -### CDP命令(任意Chrome DevTools Protocol) -```js -const d=document.createElement('div');d.id='__ljq_ctrl'; -d.dataset.cmd='cdp'; d.dataset.method='Network.getCookies'; -d.dataset.params=JSON.stringify({urls:[location.href]}); -document.body.appendChild(d); -await new Promise(r=>setTimeout(r,500)); -return d.textContent; // {ok:true, data:{...}} -``` -- 可用任意CDP方法(Network/DOM/Page/Runtime等),参数通过data-params传JSON -- 每次调用会attach→sendCommand→detach debugger,页面顶部会短暂显示调试提示 +命令:`{cmd:'tabs'}` | `{cmd:'cookies'}` | `{cmd:'cdp', tabId:N, method:'...', params:{...}}` +- CDP可用任意方法(Input/Network/DOM/Page/Runtime/Emulation等),每次attach→send→detach +- ⭐跨tab无需前台:指定tabId即可操作后台标签页 +- ⭐绕过isTrusted:CDP派发的Input事件是浏览器原生级别 -## 登录凭证autofill获取 -检测:simphtml.py已内置autofill检测,`web_scan`输出的input会带`data-autofilled="true"`属性,value显示为`⚠️受保护-读tmwebdriver_sop的autofill章节提取`(非真实值) -问题:`:-webkit-autofill`可探测autofill状态,但`input.value`为空(Chrome安全保护,需物理点击释放) -突破:PostMessage点击输入框触发释放 -前置:枚举Chrome主窗口标题匹配web_scan当前页标题,不匹配则切换标签页(避免点到后台tab) -流程:JS检查`:-webkit-autofill`→获取`getBoundingClientRect()*devicePixelRatio`→PostMessage发`WM_LBUTTONDOWN/UP`到`Chrome_RenderWidgetHostHWND`子窗口→读`value` -坑:多个RenderWidgetHostHWND共存(NexonLauncher等非浏览器Chrome应用也有),必须EnumWindows按父窗口标题匹配目标页再取其子RenderWidget -平台:Windows用PostMessage;macOS用CGEvent(未测试) +## autofill获取 +检测:web_scan输出input带`data-autofilled="true"`,value显示为受保护提示(非真实值,Chrome安全保护需点击释放) +- ⭐首选CDP:tabs获取tabId→CDP mousePressed点击输入框→autofill值释放→JS读`.value`(无需前台) +- 备选PostMessage物理点击(仅Windows/需前台):枚举Chrome窗口标题匹配→rect*dpr→WM_LBUTTONDOWN/UP到Chrome_RenderWidgetHostHWND子窗口 + - 坑:多RenderWidgetHostHWND共存,必须按父窗口标题匹配再取子窗口 ## 验证码/页面视觉截图 - 优先:JS `canvas.toDataURL()` 直接拿base64(验证码是canvas/img时最干净,无需截屏) diff --git a/sidercall.py b/sidercall.py index 9f99a8b..8edd163 100644 --- a/sidercall.py +++ b/sidercall.py @@ -37,7 +37,7 @@ class SiderLLMSession: return full_text class ClaudeSession: - def __init__(self, api_key, api_base, model="claude-opus", context_win=9000): + def __init__(self, api_key, api_base, model="claude-opus", context_win=10000): self.api_key, self.api_base, self.default_model, self.context_win = api_key, api_base.rstrip('/'), model, context_win self.raw_msgs, self.lock = [], threading.Lock() def _trim_messages(self, messages): @@ -51,7 +51,7 @@ class ClaudeSession: else: break if current > self.context_win * 3.6: print(f'[DEBUG] {len(result)} contexts, whole length {current//4} tokens.') return result[::-1] or messages[-2:] - def raw_ask(self, messages, model=None, temperature=0.5, max_tokens=4096): + def raw_ask(self, messages, model=None, temperature=0.5, max_tokens=6144): model = model or self.default_model headers = {"x-api-key": self.api_key, "Content-Type": "application/json", "anthropic-version": "2023-06-01"} payload = {"model": model, "messages": messages, "temperature": temperature, "max_tokens": max_tokens, "stream": True} diff --git a/simphtml.py b/simphtml.py index 2a7067a..1064f89 100644 --- a/simphtml.py +++ b/simphtml.py @@ -18,6 +18,7 @@ function createEnhancedDOMCopy() { const clone = sourceNode.cloneNode(false); if ((sourceNode.tagName === 'INPUT' || sourceNode.tagName === 'TEXTAREA') && sourceNode.value) clone.setAttribute('value', sourceNode.value); else if (sourceNode.tagName === 'SELECT' && sourceNode.value) clone.setAttribute('data-selected', sourceNode.value); + try { if (sourceNode.matches && sourceNode.matches(':-webkit-autofill')) { clone.setAttribute('data-autofilled', 'true'); if (!sourceNode.value) clone.setAttribute('value', '⚠️受保护-读tmwebdriver_sop的autofill章节提取'); } } catch(e) {} const isDropdown = sourceNode.classList?.contains('dropdown-menu') || /dropdown|menu/i.test(sourceNode.className) || sourceNode.getAttribute('role') === 'menu'; diff --git a/tgapp.py b/tgapp.py index 803ea39..083a5cb 100644 --- a/tgapp.py +++ b/tgapp.py @@ -70,10 +70,14 @@ async def handle_msg(update, ctx): return await update.message.reply_text("no") msg = await update.message.reply_text("thinking...") dq = agent.put_task(update.message.text, source="telegram") - await _stream(dq, msg) + task = asyncio.create_task(_stream(dq, msg)) + ctx.user_data['stream_task'] = task async def cmd_abort(update, ctx): agent.abort() + task = ctx.user_data.get('stream_task') + if task and not task.done(): + task.cancel() await update.message.reply_text("Aborted") async def cmd_llm(update, ctx):