CDP bridge扩展完善+SOP精简+insight修复+plan_sop+autonomous_sop更新

This commit is contained in:
Liang Jiaqing
2026-03-04 12:02:50 +08:00
parent ffe1f3c4c0
commit 3f8407a853
11 changed files with 103 additions and 60 deletions

View File

@@ -35,10 +35,7 @@ class GeneraticAgent:
try:
if 'claude' in k: llm_sessions += [ClaudeSession(api_key=cfg['apikey'], api_base=cfg['apibase'], model=cfg['model'])]
if 'oai' in k: llm_sessions += [LLMSession(
api_key=cfg['apikey'],
api_base=cfg['apibase'],
model=cfg['model'],
proxy=cfg.get('proxy'),
api_key=cfg['apikey'], api_base=cfg['apibase'], model=cfg['model'], proxy=cfg.get('proxy'),
api_mode=cfg.get('api_mode', 'chat_completions'),
max_retries=cfg.get('max_retries', 2),
connect_timeout=cfg.get('connect_timeout', 10),
@@ -46,7 +43,7 @@ class GeneraticAgent:
)]
if 'xai' in k: llm_sessions += [XaiSession(cfg, mykeys.get('proxy', ''))]
if 'sider' in k: llm_sessions += [SiderLLMSession(cfg, default_model=x) for x in \
["gemini-3.0-flash", "claude-haiku-4.5", "kimi-k2"]]
["gemini-3.0-flash", "claude-haiku-4.5"]]
except: pass
if len(llm_sessions) > 0: self.llmclient = ToolClient(llm_sessions, auto_save_tokens=True)
else: self.llmclient = None

View File

@@ -4,6 +4,6 @@ Insight是极简索引L2/L3变更时同步Insight索引必须极简。写
[CONSTITUTION]
1. 改自身源码先请示;./内可自主实验允许装包和portable工具。
2. 决策前查记忆库;未查证不断言。
3. 分步执行逐步验证3次失败请求干预。
3. 分步执行,控制粒度,限制失败半径3次失败请求干预。
4. 密钥文件仅引用,不读取/移动。
5. 写任何记忆前读META-SOP核验memory下文件只能patch修改除非新建

View File

@@ -2,17 +2,33 @@
chrome.runtime.onInstalled.addListener(() => console.log('CDP Bridge installed'));
chrome.runtime.onMessage.addListener((msg, sender, sendResponse) => {
if (msg.action === 'getCookies') {
handleGetCookies(msg, sender).then(sendResponse);
if (msg.action === 'cookies') {
handleCookies(msg, sender).then(sendResponse);
return true;
}
if (msg.action === 'cdp') {
handleCDP(msg, sender).then(sendResponse);
return true;
}
if (msg.action === 'tabs') {
(async () => {
try {
if (msg.method === 'switch') {
const tab = await chrome.tabs.update(msg.tabId, { active: true });
await chrome.windows.update(tab.windowId, { focused: true });
sendResponse({ ok: true });
} else {
const tabs = await chrome.tabs.query({});
const data = tabs.map(t => ({ id: t.id, url: t.url, title: t.title, active: t.active, windowId: t.windowId }));
sendResponse({ ok: true, data });
}
} catch (e) { sendResponse({ ok: false, error: e.message }); }
})();
return true;
}
});
async function handleGetCookies(msg, sender) {
async function handleCookies(msg, sender) {
try {
const url = msg.url || sender.tab?.url;
const origin = url.match(/^https?:\/\/[^\/]+/)[0];

View File

@@ -12,15 +12,15 @@ new MutationObserver(muts => {
async function handle(el) {
try {
const cmd = el.dataset.cmd || 'cookies';
const req = el.textContent.trim() ? JSON.parse(el.textContent) : { cmd: 'cookies' };
const cmd = req.cmd || 'cookies';
let resp;
if (cmd === 'cookies') {
resp = await chrome.runtime.sendMessage({ action: 'getCookies', url: location.href });
resp = await chrome.runtime.sendMessage({ action: 'cookies', url: req.url || location.href });
} else if (cmd === 'cdp') {
const method = el.dataset.method;
const params = el.dataset.params ? JSON.parse(el.dataset.params) : {};
const tabId = el.dataset.tabid ? parseInt(el.dataset.tabid) : undefined;
resp = await chrome.runtime.sendMessage({ action: 'cdp', method, params, tabId });
resp = await chrome.runtime.sendMessage({ action: 'cdp', method: req.method, params: req.params || {}, tabId: req.tabId });
} else if (cmd === 'tabs') {
resp = await chrome.runtime.sendMessage({ action: 'tabs', method: req.method, tabId: req.tabId });
} else {
resp = { ok: false, error: 'unknown cmd: ' + cmd };
}

View File

@@ -10,7 +10,7 @@ async function fetchCookies() {
try {
const [tab] = await chrome.tabs.query({ active: true, currentWindow: true });
if (!tab?.url) { out.textContent = 'No active tab'; return; }
const resp = await chrome.runtime.sendMessage({ action: 'getCookies', url: tab.url });
const resp = await chrome.runtime.sendMessage({ action: 'cookies', url: tab.url });
if (!resp?.ok) { out.textContent = 'Error: ' + (resp?.error || 'unknown'); return; }
if (!resp.data.length) { out.textContent = '(no cookies)'; return; }
// 展示带标记

View File

@@ -5,7 +5,18 @@
授权你进行自主行动,只要不对环境造成副作用都可进行。
请先选择核心目标,再选择一个小目标进行。最终探测结果形成报告(含操作申请),待用户回来确认后再进行可能的写入或修改操作。
> **must call update_working_checkpoint first**`自主探索≤15回合只有cwd内可写用户不在(问题存报告)|报告目录:./autonomous_reports/|收尾:重读本SOP确认报告目录+更新history产出=报告+记忆提案 | **路径警告**autonomous_reports 在 temp/ 下,用./autonomous_reports/访问,**严禁**`../memory/autonomous_reports/`或`../autonomous_reports/``
---
## 🚫 Step 0阻塞写入约束便签 — 未完成禁止进入后续步骤
**必须第一个动作就调用** `update_working_checkpoint`,写入以下内容:
```
自主探索≤15回合只有cwd内可写用户不在(问题存报告)|报告目录:./autonomous_reports/|收尾:重读本SOP确认报告目录+更新history产出=报告+记忆提案 | 路径警告autonomous_reports在temp/下,用./autonomous_reports/访问,严禁../memory/autonomous_reports/或../autonomous_reports/
```
**跳过 Step 0 = 违规**。历史教训R175因跳过此步报告存错路径Desktop而非./autonomous_reports/)。
---
## 📋 大纲
- 报告目录与规则

View File

@@ -1,17 +1,40 @@
# Plan Mode SOP
## 拆分
1. 分析任务必要时先读相关SOP确定子步骤
2. 拆成可验证的原子步骤,风险步前置
## 1. 分类:识别任务结构
分析任务间关系,选择匹配的结构:
## 写入 checkpoint
将以下写入 working checkpoint忽略长度限制
- **Sequential** — 步骤间有输入输出依赖 (部署/ETL/构建)
- **MapReduce** — 多独立维度,各自深入后汇总 (5P/SWOT/多文件审查)
- **Branch** — 结果不确定,按条件选路径 (调试/探测/方案选择)
- **Loop** — 重复直到满足条件 (优化/翻页/迭代修改)
- **DAG** — 混合依赖,部分可并行 (项目开发)
=== PLAN ===
[ ] 步骤1
[ ] 步骤2
可嵌套:大结构某步内部用另一种结构
## 2. 分解模板
**Sequential:** `[ ] A → [ ] B → [ ] C`
**MapReduce:**
```
MAP [子流程: 读现状→分析→输出]:
[ ] 维度1: ...
[ ] 维度2: ...
REDUCE:
[ ] 汇总 → 终稿
```
**Branch:** `[ ] 尝试X → 成功:[ ]Y / 失败:[ ]Z`
**Loop:** `[ ] LOOP(max=N): 执行→检查→调整`
**DAG:** `[ ] A → [ ]{B,C}并行 → [ ]D汇聚`
## 3. 写入 checkpoint
=== PLAN (结构类型) ===
...
=== PLAN RULES ===
- 每完成/跳过一步,重新 update working checkpoint
- 任何 checkpoint update 必须保留 PLAN
================
================

View File

@@ -7,13 +7,14 @@
## 限制(isTrusted)
- JS dispatch的事件`isTrusted=false`,敏感操作(文件上传/部分按钮)会被浏览器拦截
- 文件上传JS无法填充`<input type=file>`必须ljqCtrl物理点击+Win32轮询文件对话框
- ⭐**首选绕过CDP桥**——CDP派发的Input事件是浏览器原生级别(isTrusted=true)且无需前台见下方CDP章节
- 文件上传JS无法填充`<input type=file>`仍需ljqCtrl物理点击+Win32轮询文件对话框
- 流程SetForegroundWindow→ljqCtrl点上传按钮→FindWindow轮询对话框→输入路径→轮询关闭
- 元素→屏幕物理坐标(ljqCtrl点击前必算)JS一次取rect+窗口信息,公式:
- 备选:元素→屏幕物理坐标(ljqCtrl/PostMessage点击前必算)JS一次取rect+窗口信息,公式:
- `physX = (screenX + rect中心x) * dpr``physY = (screenY + chromeH + rect中心y) * dpr`
- chromeH = outerHeight - innerHeightdpr = devicePixelRatio
- 注意screenX/Y也是CSS像素所有值先加后统一乘dpr
- 结论:读信息+普通操作用TMWebDriver文件上传等敏感操作需配合ljqCtrl
- 结论:读信息+普通操作用TMWebDriver需isTrusted事件首选CDP桥文件上传需配合ljqCtrl
## 导航
- `web_scan` 仅读当前页不导航,切换网站用 `web_execute_js` + `location.href='url'`
@@ -42,37 +43,27 @@ fetch('PDF_URL').then(r=>r.blob()).then(b=>{
- 已修复移除TM脚本内轮询改由Python侧`get_session_dict()`前后对比检测新标签
- 同理TM脚本中任何后台逻辑都应避免依赖setTimeout轮询
## Cookie+CDP桥(tmwd_cdp_bridge扩展)
前提:需先安装`assets/tmwd_cdp_bridge/`扩展(含debugger权限)
触发ID`__ljq_ctrl`
### Cookie提取(含HttpOnly)
注入`id="__ljq_ctrl"`的div(无需data-cmd默认cookies)→扩展写回JSON到textContent
## CDP桥(tmwd_cdp_bridge扩展) ⭐首选
扩展路径:`assets/tmwd_cdp_bridge/`(需安装,含debugger权限)
调用MutationObserver监听addedNodes(id=`__ljq_ctrl`)⚠每次必须remove旧→createElement新→设textContent JSON→appendChild
```js
const d=document.createElement('div');d.id='__ljq_ctrl';
document.body.appendChild(d);
await new Promise(r=>setTimeout(r,300));
return d.textContent; // {ok:true, data:[...]}
const old = document.getElementById('__ljq_ctrl');
if (old) old.remove();
const el = document.createElement('div');
el.id = '__ljq_ctrl'; el.style.display = 'none';
el.textContent = JSON.stringify({cmd:'...', ...});
document.body.appendChild(el); // 响应写回el.textContent
```
### CDP命令(任意Chrome DevTools Protocol)
```js
const d=document.createElement('div');d.id='__ljq_ctrl';
d.dataset.cmd='cdp'; d.dataset.method='Network.getCookies';
d.dataset.params=JSON.stringify({urls:[location.href]});
document.body.appendChild(d);
await new Promise(r=>setTimeout(r,500));
return d.textContent; // {ok:true, data:{...}}
```
- 可用任意CDP方法(Network/DOM/Page/Runtime等)参数通过data-params传JSON
- 每次调用会attach→sendCommand→detach debugger页面顶部会短暂显示调试提示
命令:`{cmd:'tabs'}` | `{cmd:'cookies'}` | `{cmd:'cdp', tabId:N, method:'...', params:{...}}`
- CDP可用任意方法(Input/Network/DOM/Page/Runtime/Emulation等)每次attach→send→detach
- ⭐跨tab无需前台指定tabId即可操作后台标签页
- ⭐绕过isTrustedCDP派发的Input事件是浏览器原生级别
## 登录凭证autofill获取
检测:simphtml.py已内置autofill检测`web_scan`输出input`data-autofilled="true"`属性value显示为`⚠️受保护-读tmwebdriver_sop的autofill章节提取`(非真实值)
问题:`:-webkit-autofill`可探测autofill状态`input.value`为空Chrome安全保护需物理点击释放
突破:PostMessage点击输入框触发释放
前置枚举Chrome主窗口标题匹配web_scan当前页标题不匹配则切换标签页避免点到后台tab
流程JS检查`:-webkit-autofill`→获取`getBoundingClientRect()*devicePixelRatio`→PostMessage发`WM_LBUTTONDOWN/UP``Chrome_RenderWidgetHostHWND`子窗口→读`value`
多个RenderWidgetHostHWND共存(NexonLauncher等非浏览器Chrome应用也有)必须EnumWindows按父窗口标题匹配目标页再取其子RenderWidget
平台Windows用PostMessagemacOS用CGEvent未测试
## autofill获取
检测web_scan输出input带`data-autofilled="true"`value显示为受保护提示(非真实值Chrome安全保护需点击释放)
- ⭐首选CDPtabs获取tabId→CDP mousePressed点击输入框→autofill值释放→JS读`.value`(无需前台)
- 备选PostMessage物理点击(仅Windows/需前台)枚举Chrome窗口标题匹配→rect*dpr→WM_LBUTTONDOWN/UP到Chrome_RenderWidgetHostHWND子窗口
-多RenderWidgetHostHWND共存必须按父窗口标题匹配再取子窗口
## 验证码/页面视觉截图
- 优先JS `canvas.toDataURL()` 直接拿base64验证码是canvas/img时最干净无需截屏

View File

@@ -37,7 +37,7 @@ class SiderLLMSession:
return full_text
class ClaudeSession:
def __init__(self, api_key, api_base, model="claude-opus", context_win=9000):
def __init__(self, api_key, api_base, model="claude-opus", context_win=10000):
self.api_key, self.api_base, self.default_model, self.context_win = api_key, api_base.rstrip('/'), model, context_win
self.raw_msgs, self.lock = [], threading.Lock()
def _trim_messages(self, messages):
@@ -51,7 +51,7 @@ class ClaudeSession:
else: break
if current > self.context_win * 3.6: print(f'[DEBUG] {len(result)} contexts, whole length {current//4} tokens.')
return result[::-1] or messages[-2:]
def raw_ask(self, messages, model=None, temperature=0.5, max_tokens=4096):
def raw_ask(self, messages, model=None, temperature=0.5, max_tokens=6144):
model = model or self.default_model
headers = {"x-api-key": self.api_key, "Content-Type": "application/json", "anthropic-version": "2023-06-01"}
payload = {"model": model, "messages": messages, "temperature": temperature, "max_tokens": max_tokens, "stream": True}

View File

@@ -18,6 +18,7 @@ function createEnhancedDOMCopy() {
const clone = sourceNode.cloneNode(false);
if ((sourceNode.tagName === 'INPUT' || sourceNode.tagName === 'TEXTAREA') && sourceNode.value) clone.setAttribute('value', sourceNode.value);
else if (sourceNode.tagName === 'SELECT' && sourceNode.value) clone.setAttribute('data-selected', sourceNode.value);
try { if (sourceNode.matches && sourceNode.matches(':-webkit-autofill')) { clone.setAttribute('data-autofilled', 'true'); if (!sourceNode.value) clone.setAttribute('value', '⚠️受保护-读tmwebdriver_sop的autofill章节提取'); } } catch(e) {}
const isDropdown = sourceNode.classList?.contains('dropdown-menu') ||
/dropdown|menu/i.test(sourceNode.className) || sourceNode.getAttribute('role') === 'menu';

View File

@@ -70,10 +70,14 @@ async def handle_msg(update, ctx):
return await update.message.reply_text("no")
msg = await update.message.reply_text("thinking...")
dq = agent.put_task(update.message.text, source="telegram")
await _stream(dq, msg)
task = asyncio.create_task(_stream(dq, msg))
ctx.user_data['stream_task'] = task
async def cmd_abort(update, ctx):
agent.abort()
task = ctx.user_data.get('stream_task')
if task and not task.done():
task.cancel()
await update.message.reply_text("Aborted")
async def cmd_llm(update, ctx):