Files
GenericAgent/restore_commit.txt

2965 lines
120 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
commit 9b20ca82972ec66622193846801630fd356ce231
Author: Liang Jiaqing <l.j.q.light@gmail.com>
Date: Fri Jan 16 23:50:19 2026 +0800
fix: restore files removed by mistake and keep zip ignored
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..4b399e9
--- /dev/null
+++ b/README.md
@@ -0,0 +1,52 @@
+# pc-agent-loop
+
+pc-agent-loop 是一个**极致简约**的 PC 级自主 AI Agent 框架。它通过不到 100 行的核心代码和约 200 行的工具实现构筑了把整个pc给它浏览器、终端、文件系统的物理级自动化能力。
+
+## 🚀 核心特性
+
+- **极简设计**: 仅由 **7 个基本工具** 和一个高效的 **Agentic Loop** 构成,拒绝过度设计。
+- **自主代码执行 (Code Execution)**: 能够根据任务需求自主编写并运行 Python 或 PowerShell 脚本,直接操控系统资源。
+- **深度 Web 自动化 (Advanced Web Automation)**:
+ - **语义化扫描**: 自动清洗 HTML 内容,将复杂的 DOM 转化为 AI 易读的结构。
+ - **JS 注入执行**: 在浏览器上下文中执行自定义 JavaScript实现精准点击、滚动或数据抓取。
+ - **TMWebDriver**: 支持通过 Tampermonkey 实现的持久化会话驱动。
+- **精准文件编辑 (Smart File Patching)**: 并非盲目覆盖,而是支持通过 `file_patch` 以代码块匹配方式进行精确修改。
+- **人机协作模式 (Human-in-the-loop)**: 在遇到验证码、关键权限或模糊决策时,主动请求用户介入。
+
+## 📂 项目结构
+
+- `agent_loop.py`: **核心引擎**,负责“感知-思考-行动”的自主循环逻辑。
+- `ga.py`: **工具箱**,定义了 7 大核心原子工具的具体实现。
+- `agentapp.py`: 基于 Streamlit 构建的轻量化交互式 Web 界面。
+- `sidercall.py`: LLM 通信层,支持流式输出与 API 调用。
+- `TMWebDriver.py`: 浏览器驱动模块(需配合 Tampermonkey 脚本使用)。
+
+## 🛠️ 快速开始
+
+### 1. 环境准备
+- 安装 Python 3.8+。
+- (可选)若需网页自动化,请在浏览器中安装 **Tampermonkey** 插件并导入本项目提供的对应脚本。
+
+### 2. 安装依赖
+缺啥装啥
+
+### 3. 启动应用
+在项目根目录下执行:
+```bash
+python launch.pyw
+```
+
+## 🧩 7 大核心工具
+
+Agent 仅依靠以下 7 个原子工具的组合来完成复杂任务:
+
+1. **`code_run`**: 针对 Windows 优化的双模态代码执行器Python/PowerShell
+2. **`web_scan`**: 获取网页清洗后的语义化 HTML 结构,支持多标签管理。
+3. **`web_execute_js`**: 网页 JS 脚本注入,支持将结果存盘分析。
+4. **`file_read`**: 分页式文件读取,支持行号定位。
+5. **`file_write`**: 文件全量写入或追加。
+6. **`file_patch`**: 基于源码块匹配的精准局部修改,确保缩进一致性。
+7. **`ask_user`**: 关键节点请求人类干预。
+
+---
+**⚠️ 警告**: 本 Agent 具备执行本地代码和控制操作系统的物理权限。请务必在受信任的环境中运行,并在运行前仔细检查 Agent 的执行意图。
\ No newline at end of file
diff --git a/TMWebDriver.py b/TMWebDriver.py
new file mode 100644
index 0000000..0f58e79
--- /dev/null
+++ b/TMWebDriver.py
@@ -0,0 +1,285 @@
+import json, threading, time, uuid, queue, socket, requests
+from typing import Dict, Any, Optional, List
+from simple_websocket_server import WebSocketServer, WebSocket
+from bs4 import BeautifulSoup
+import bottle, random
+from bottle import route, template, request, response
+
+class Session:
+ def __init__(self, session_id, info, client=None):
+ self.id = session_id
+ self.info = info
+ self.connect_at = time.time()
+ self.disconnect_at = None
+ self.type = info.get('type', 'ws')
+ self.ws_client = client if self.type == 'ws' else None
+ self.http_queue = client if self.type == 'http' else None
+ @property
+ def url(self): return self.info.get('url', '')
+ def is_active(self):
+ return self.disconnect_at is None
+ def reconnect(self, client, info):
+ self.info = info
+ self.type = info.get('type', 'ws')
+ if self.type == 'ws':
+ self.ws_client = client
+ self.http_queue = None
+ elif self.type == 'http':
+ self.http_queue = client
+ self.connect_at = time.time()
+ self.disconnect_at = None
+ def mark_disconnected(self):
+ self.disconnect_at = time.time()
+
+
+class TMWebDriver:
+ def __init__(self, host: str = 'localhost', port: int = 18765):
+ self.host = host
+ self.port = port
+ self.sessions = {}
+ self.results = {}
+
+ self.default_session_id = None
+ self.latest_session_id = None
+ self.last_cmd_time = 0
+ self.is_remote = socket.socket().connect_ex((host, port+1)) == 0
+ if not self.is_remote:
+ self.start_ws_server()
+ self.start_http_server()
+ else:
+ self.remote = f'http://{self.host}:{self.port+1}/link'
+
+ def start_http_server(self):
+ self.app = app = bottle.Bottle()
+
+ @app.route('/api/longpoll', method=['GET', 'POST'])
+ def long_poll():
+ data = request.json
+ session_id = data.get('sessionId')
+ session_info = {'url': data.get('url'), 'title': data.get('title', ''), 'type': 'http'}
+ if session_id not in self.sessions:
+ session = Session(session_id, session_info, queue.Queue())
+ print(f"Browser http connected: {session.url} (Session: {session_id})")
+ self.sessions[session_id] = session
+ session = self.sessions[session_id]
+ if session.type == 'http': msgQ = session.http_queue
+ else: return json.dumps({"id": "", "ret": "use ws"})
+ try: return msgQ.get(timeout=5)
+ except queue.Empty: return json.dumps({"id": "", "ret": "next long-poll"})
+
+ @app.route('/api/result', method=['GET','POST'])
+ def result():
+ data = request.json
+ if data.get('type') == 'result':
+ self.results[data.get('id')] = {'success': True, 'data': data.get('result'), 'newTabs': data.get('newTabs', [])}
+ elif data.get('type') == 'error':
+ self.results[data.get('id')] = {'success': False, 'data': data.get('error')}
+ return 'ok'
+
+ @app.route('/link', method=['GET','POST'])
+ def link():
+ data = request.json
+ if data.get('cmd') == 'get_all_sessions': return json.dumps({'r': self.get_all_sessions()}, ensure_ascii=False)
+ if data.get('cmd') == 'find_session':
+ url_pattern = data.get('url_pattern', '')
+ return json.dumps({'r': self.find_session(url_pattern)}, ensure_ascii=False)
+ if data.get('cmd') == 'execute_js':
+ session_id = data.get('sessionId')
+ code = data.get('code')
+ timeout = float(data.get('timeout', 10.0))
+ auto_switch_newtab = data.get('auto_switch_newtab', False)
+ try:
+ result = self.execute_js(code, timeout=timeout, session_id=session_id, auto_switch_newtab=auto_switch_newtab)
+ newTabs = result.get('newTabs', []) if isinstance(result, dict) else []
+ return json.dumps({'result': result, 'newTabs': newTabs}, ensure_ascii=False)
+ except Exception as e:
+ return json.dumps({'error': str(e)}, ensure_ascii=False)
+ return 'ok'
+
+ def run():
+ import asyncio
+ loop = asyncio.new_event_loop()
+ asyncio.set_event_loop(loop)
+ bottle.run(app, host=self.host, port=self.port+1, server='tornado')
+
+ http_thread = threading.Thread(target=run)
+ http_thread.daemon = True
+ http_thread.start()
+
+ def clean_sessions(self):
+ sids = list(self.sessions.keys())
+ for sid in sids:
+ session = self.sessions[sid]
+ if not session.is_active() and time.time() - session.disconnect_at > 600:
+ del self.sessions[sid]
+
+ def start_ws_server(self) -> None:
+ driver = self
+ class JSExecutor(WebSocket):
+ def handle(self) -> None:
+ try:
+ data = json.loads(self.data)
+ if data.get('type') == 'ready':
+ session_id = data.get('sessionId')
+ session_info = {'url': data.get('url'), 'title': data.get('title', ''),
+ 'connected_at': time.time(), 'type': 'ws'}
+ driver._register_client(session_id, self, session_info)
+ elif data.get('type') in 'result':
+ driver.results[data.get('id')] = {'success': True, 'data': data.get('result'), 'newTabs': data.get('newTabs', [])}
+ elif data.get('type') == 'error':
+ driver.results[data.get('id')] = {'success': False, 'data': data.get('error')}
+ except Exception as e:
+ print(f"Error handling message: {e}")
+ if hasattr(self, 'data'): print(self.data)
+ def connected(self): (f"New connection from {self.address}")
+ def handle_close(self): driver._unregister_client(self)
+
+ self.server = WebSocketServer(self.host, self.port, JSExecutor)
+ server_thread = threading.Thread(target=self.server.serve_forever)
+ server_thread.daemon = True
+ server_thread.start()
+ print(f"WebSocket server running on ws://{self.host}:{self.port}")
+
+ def _register_client(self, session_id: str, client: WebSocket, session_info) -> None:
+ is_new_session = session_id not in self.sessions
+
+ if is_new_session:
+ session = Session(session_id, session_info, client)
+ self.sessions[session_id] = session
+ print(f"New tab connected: {session.url} (Session: {session_id})")
+ else:
+ session = self.sessions[session_id]
+ session.reconnect(client, session_info)
+ print(f"Tab reconnected: {session.url} (Session: {session_id})")
+
+ self.latest_session_id = session_id
+ if self.default_session_id is None:
+ self.default_session_id = session_id
+ elif is_new_session:
+ if time.time() - self.last_cmd_time < 5.0:
+ print(f"检测到脚本触发的新窗口,自动切换焦点: {session_id}")
+ self.default_session_id = session_id
+
+
+ def _unregister_client(self, client: WebSocket) -> None:
+ for session in self.sessions.values():
+ if session.ws_client == client:
+ session.mark_disconnected()
+ break
+
+ def execute_js(self, code, timeout=10.0, session_id=None, auto_switch_newtab=False) -> Any:
+ if session_id is None: session_id = self.default_session_id
+ if self.is_remote:
+ print('remote_execute_js')
+ response = self._remote_cmd({"cmd": "execute_js", "sessionId": session_id,
+ "code": code, "timeout": str(timeout),
+ "auto_switch_newtab": auto_switch_newtab})
+ if response.get('error'): raise Exception(response['error'])
+ if auto_switch_newtab and 'newTabs' in response:
+ newtabs = response.get('newTabs', [])
+ if len(newtabs) > 0:
+ new_session_id = newtabs[0]['sessionId']
+ self.default_session_id = new_session_id
+ print(f"自动切换到新标签会话: {new_session_id}")
+ return response.get('result', None)
+
+ session = self.sessions.get(session_id)
+ if not session or not session.is_active():
+ time.sleep(3)
+ session = self.sessions.get(session_id)
+ if not session or not session.is_active():
+ alive_sessions = [s for s in self.sessions.values() if s.is_active()]
+ if alive_sessions:
+ session = alive_sessions[0]
+ print(f"会话 {session_id} 未连接,自动切换到最新活动会话: {session.id}")
+ session_id = self.default_session_id = session.id
+ if not session or not session.is_active():
+ breakpoint()
+ raise ValueError(f"会话ID {session_id} 未连接")
+
+ tp = session.type
+ assert tp in ['ws', 'http'], f"Unsupported session type: {tp}"
+ exec_id = str(uuid.uuid4())
+ payload = json.dumps({'id': exec_id, 'code': code, 'auto_switch_newtab': auto_switch_newtab})
+
+ if tp == 'ws':
+ session.ws_client.send_message(payload)
+ elif tp == 'http':
+ session.http_queue.put(payload)
+
+ start_time = time.time()
+ self.clean_sessions()
+ hasjump = False
+
+ while exec_id not in self.results:
+ time.sleep(0.1)
+ if tp == 'ws':
+ if not session.is_active(): hasjump = True
+ if hasjump and session.is_active():
+ if not self.is_remote and auto_switch_newtab: self.last_cmd_time = time.time()
+ return {"result": f"Session {session_id} reloaded.", "closed":1}
+ if time.time() - start_time > timeout:
+ if tp == 'ws':
+ return {"result": f"No response data in {timeout}s"}
+ elif tp == 'http':
+ return {"result": f"Session {session_id} no response."}
+
+ result = self.results.pop(exec_id)
+ if not result['success']: raise Exception(result['data'])
+ if not self.is_remote and auto_switch_newtab:
+ newtabs = result.get('newTabs', [])
+ if len(newtabs) > 0:
+ new_session_id = newtabs[0]['sessionId']
+ self.default_session_id = new_session_id
+ print(f"自动切换到新标签会话: {new_session_id}")
+ elif not self.is_remote:
+ self.last_cmd_time = time.time()
+ return result['data']
+
+ def _remote_cmd(self, cmd):
+ resp = requests.post(self.remote,
+ headers={"Content-Type": "application/json"},
+ json=cmd).json()
+ return resp
+
+ def get_all_sessions(self):
+ if self.is_remote:
+ return self._remote_cmd({"cmd": "get_all_sessions"}).get('r', [])
+ return [{'id': session.id, **session.info} for session in self.sessions.values()
+ if session.is_active()]
+
+ def get_session_dict(self):
+ return {session.id: session.url for session in self.sessions.values() if session.is_active()}
+
+ def find_session(self, url_pattern: str):
+ if url_pattern == '':
+ session = self.sessions.get(self.latest_session_id)
+ return [(session.id, session.info)] if session else []
+ matching_sessions = []
+ for session in self.sessions.values():
+ if not session.is_active(): continue
+ if 'url' in session.info and url_pattern in session.info['url']:
+ matching_sessions.append((session.id, session.info))
+ return matching_sessions
+
+ def set_session(self, url_pattern: str) -> bool:
+ if self.is_remote:
+ matched = self._remote_cmd({"cmd": "find_session", "url_pattern": url_pattern}).get('r', [])
+ else:
+ matched = self.find_session(url_pattern)
+ if not matched: return print(f"警告: 未找到URL包含 '{url_pattern}' 的会话")
+ if len(matched) > 1: print(f"警告: 找到多个URL包含 '{url_pattern}' 的会话,选择第一个")
+ self.last_cmd_time = 0
+ self.default_session_id, info = matched[0]
+ print(f"成功设置默认会话: {self.default_session_id}: {info['url']}")
+ return self.default_session_id
+
+ def jump(self, url, timeout=10): self.execute_js(f"window.location.href='{url}'", timeout=timeout)
+ def page_source(self): return self.execute_js("document.documentElement.outerHTML")
+ def body(self): return self.execute_js("document.body.outerHTML")
+ def newtab(self, url=None):
+ if url is None: url = "http://www.baidu.com/robots.txt"
+ return self.execute_js(f'GM_openInTab("{url}");', auto_switch_newtab=True)
+
+if __name__ == "__main__":
+ driver = TMWebDriver(host='localhost', port=18765)
\ No newline at end of file
diff --git a/agent_loop.py b/agent_loop.py
new file mode 100644
index 0000000..e30eecb
--- /dev/null
+++ b/agent_loop.py
@@ -0,0 +1,67 @@
+import json
+from dataclasses import dataclass
+from typing import Any, Optional
+@dataclass
+class StepOutcome:
+ data: Any
+ next_prompt: Optional[str] = None
+ should_exit: bool = False
+
+
+def try_call_generator(func, *args, **kwargs):
+ ret = func(*args, **kwargs)
+ if hasattr(ret, '__iter__') and not isinstance(ret, (str, bytes, dict, list)):
+ ret = yield from ret
+ return ret
+
+class BaseHandler:
+ def tool_before_callback(self, tool_name, args, content): pass
+ def tool_after_callback(self, tool_name, args, content): pass
+ def dispatch(self, tool_name, args, response):
+ method_name = f"do_{tool_name}"
+ if hasattr(self, method_name):
+ _ = yield from try_call_generator(self.tool_before_callback, tool_name, args, response)
+ ret = yield from try_call_generator(getattr(self, method_name), args, response)
+ _ = yield from try_call_generator(self.tool_after_callback, tool_name, args, response)
+ return ret
+ else:
+ yield f"❌ 未知工具: {tool_name}\n"
+ return StepOutcome(None, "未知工具", "ERROR")
+
+def json_default(o):
+ if isinstance(o, set): return list(o)
+ return str(o)
+
+def agent_runner_loop(client, system_prompt, user_input, handler, tools_schema, max_turns=15):
+ messages = [
+ {"role": "system", "content": system_prompt},
+ {"role": "user", "content": user_input}
+ ]
+ for turn in range(max_turns):
+ yield f"\n[🤖 LLM Thinking (Turn {turn+1})] ..."
+ response = client.chat(messages=messages, tools=tools_schema)
+
+ if response.thinking: yield '<thinking>' + response.thinking + '</thinking>\n'
+ yield response.content
+
+ if not response.tool_calls:
+ tool_name, args = 'no_tool', {}
+ else:
+ tool_call = response.tool_calls[0]
+ tool_name = tool_call.function.name
+ args = json.loads(tool_call.function.arguments)
+
+ if tool_name == 'no_tool': pass
+ else: yield f"\n\n正在调用工具: {tool_name},参数: {args}\n"
+ outcome = yield from handler.dispatch(tool_name, args, response)
+
+ if outcome.next_prompt is None: return {'result': 'CURRENT_TASK_DONE', 'data': outcome.data}
+ if outcome.should_exit: return {'result': 'EXITED', 'data': outcome.data}
+
+ next_prompt = ""
+ if outcome.data is not None:
+ datastr = json.dumps(outcome.data, ensure_ascii=False, default=json_default) if type(outcome.data) in [dict, list] else str(outcome.data)
+ next_prompt += f"<tool_result>\n{datastr}\n</tool_result>\n\n"
+ next_prompt += outcome.next_prompt
+ messages = [{"role": "user", "content": next_prompt}]
+ return {'result': 'MAX_TURNS_EXCEEDED'}
\ No newline at end of file
diff --git a/agentapp.py b/agentapp.py
new file mode 100644
index 0000000..915921d
--- /dev/null
+++ b/agentapp.py
@@ -0,0 +1,94 @@
+import os, sys
+if sys.stdout is None: sys.stdout = open(os.devnull, "w")
+if sys.stderr is None: sys.stderr = open(os.devnull, "w")
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+
+
+import streamlit as st
+import time, json, re
+
+with open('tools_schema.json', 'r', encoding='utf-8') as f:
+ TOOLS_SCHEMA = json.load(f)
+
+
+st.set_page_config(page_title="Cowork", layout="wide")
+
+from sidercall import SiderLLMSession, LLMSession, ToolClient
+from agent_loop import agent_runner_loop, StepOutcome, BaseHandler
+
+@st.cache_resource
+def init():
+ mainllm = SiderLLMSession(multiturns=6)
+ llmclient = ToolClient(mainllm.ask, auto_save_tokens=True)
+ return llmclient
+
+llmclient = init()
+
+from ga import GenericAgentHandler
+
+def get_system_prompt():
+ with open('sys_prompt.txt', 'r', encoding='utf-8') as f:
+ return f.read()
+
+if "last_goal" not in st.session_state:
+ st.session_state.last_goal = ""
+
+def refine_user_goal(raw_query, last_goal):
+ """通过 LLM 提炼用户真实意图"""
+ if not last_goal:
+ return raw_query
+
+ decide_prompt = f"""
+用户之前的目标是: "{last_goal}"
+用户现在输入了: "{raw_query}"
+
+请判断:
+1. 如果用户提供补充信息、或者是接续之前的任务,请输出合并后的【最终目标】。
+2. 如果用户只是指出之前做法有错而非变更目标,那么请输出原目标不做修改。
+3. 如果用户开启了一个完全不相关的新话题,请直接输出用户现在的输入内容。
+
+请直接输出目标描述,不要包含任何多余的文字、解释或标点。
+"""
+ try:
+ refined = llmclient.llm_func(decide_prompt).strip()
+ return refined if refined else raw_query
+ except:
+ return raw_query
+
+def agent_backend_stream(raw_query):
+ final_goal = refine_user_goal(raw_query, st.session_state.last_goal)
+
+ if final_goal != raw_query:
+ yield f"[Goal Refined] {final_goal}\n"
+
+ sys_prompt = get_system_prompt()
+ handler = GenericAgentHandler(None, final_goal, './temp')
+ llmclient.last_tools = ''
+ ret = yield from agent_runner_loop(llmclient,
+ sys_prompt, raw_query, handler,
+ TOOLS_SCHEMA, max_turns=25)
+ st.session_state.last_goal = final_goal
+ return ret
+
+st.title("🖥️ Cowork")
+
+if "messages" not in st.session_state:
+ st.session_state.messages = []
+
+for msg in st.session_state.messages:
+ with st.chat_message(msg["role"]):
+ st.markdown(msg["content"])
+
+if prompt := st.chat_input("请输入指令"):
+ st.session_state.messages.append({"role": "user", "content": prompt})
+ with st.chat_message("user"):
+ st.markdown(prompt)
+
+ with st.chat_message("assistant"):
+ message_placeholder = st.empty()
+ full_response = ""
+ for chunk in agent_backend_stream(prompt):
+ full_response += chunk
+ message_placeholder.markdown(full_response + "▌")
+ message_placeholder.markdown(full_response)
+ st.session_state.messages.append({"role": "assistant", "content": full_response})
\ No newline at end of file
diff --git a/ga.py b/ga.py
new file mode 100644
index 0000000..446a7a3
--- /dev/null
+++ b/ga.py
@@ -0,0 +1,379 @@
+import sys, os, re
+import pyperclip
+import json, time
+from pathlib import Path
+import subprocess
+import tempfile
+if sys.stdout is None: sys.stdout = open(os.devnull, "w")
+if sys.stderr is None: sys.stderr = open(os.devnull, "w")
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+
+from sidercall import LLMSession, ToolClient
+from agent_loop import BaseHandler, StepOutcome, agent_runner_loop
+
+def code_run(code: str, code_type: str = "python", timeout: int = 60, cwd: str = None):
+ """
+ 针对 Windows 优化的双模态执行器
+ python: 运行复杂的 .py 脚本(文件模式)
+ powershell: 运行单行指令(命令模式)
+ 优先使用python仅在必要系统操作时使用powershell。
+ """
+ # 统一路径处理
+ preview = (code[:60].replace('\n', ' ') + '...') if len(code) > 60 else code.strip()
+ yield f"\n[Action] Running {code_type} in {os.path.basename(cwd)}: {preview}\n"
+ cwd = cwd or os.getcwd()
+ if code_type == "python":
+ # Python 依然建议走文件因为模型生成的逻辑通常包含多行、import 和类定义
+ tmp_file = tempfile.NamedTemporaryFile(suffix=".py", delete=False, mode='w', encoding='utf-8')
+ tmp_file.write(code)
+ tmp_path = tmp_file.name
+ tmp_file.close()
+ cmd = ["python", "-u", tmp_path]
+ elif code_type == "powershell":
+ cmd = ["powershell", "-NoProfile", "-NonInteractive", "-Command", code]
+ tmp_path = None
+ else:
+ return {"status": "error", "msg": f"不支持的类型: {code_type}"}
+ print("code run output:")
+ startupinfo = None
+ if os.name == 'nt':
+ startupinfo = subprocess.STARTUPINFO()
+ startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
+ startupinfo.wShowWindow = 0 # SW_HIDE
+ full_stdout = []
+ full_stderr = []
+ try:
+ process = subprocess.Popen(
+ cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
+ bufsize=0, cwd=cwd, startupinfo=startupinfo
+ )
+ for line_bytes in iter(process.stdout.readline, b''):
+ try:
+ line = line_bytes.decode('utf-8')
+ except UnicodeDecodeError:
+ line = line_bytes.decode('gbk', errors='ignore')
+ print(line, end="")
+ full_stdout.append(line)
+
+ stdout_rem, stderr_raw = process.communicate(timeout=timeout)
+ if stdout_rem:
+ try: rem_str = stdout_rem.decode('utf-8')
+ except UnicodeDecodeError:
+ rem_str = stdout_rem.decode('gbk', errors='ignore')
+ full_stdout.append(rem_str)
+
+ if stderr_raw:
+ try: stderr_str = stderr_raw.decode('utf-8')
+ except UnicodeDecodeError:
+ stderr_str = stderr_raw.decode('gbk', errors='ignore')
+ full_stderr.append(stderr_str)
+ print(f"Error: {stderr_str}")
+
+ status = "success" if process.returncode == 0 else "error"
+ stdout_str = "".join(full_stdout)
+ stderr_str = "".join(full_stderr)
+ status_icon = "✅" if process.returncode == 0 else "❌"
+ output_snippet = (stdout_str[:200] + '...') if len(stdout_str) > 200 else stdout_str
+ yield f"[Status] {status_icon} Exit Code: {process.returncode}\n[Stdout] {output_snippet}\n"
+ return {
+ "status": status,
+ "stdout": stdout_str[-2000:],
+ "stderr": stderr_str[-2000:],
+ "exit_code": process.returncode
+ }
+ except subprocess.TimeoutExpired:
+ return {"status": "error", "msg": "Timeout"}
+ except Exception as e:
+ return {"status": "error", "msg": str(e)}
+ finally:
+ if code_type == "python" and tmp_path and os.path.exists(tmp_path): os.remove(tmp_path)
+
+
+def ask_user(question: str, candidates: list = None):
+ """
+ 构造一个中断请求。
+ question: 向用户提出的问题。
+ candidates: 可选的候选项列表。
+ 需要保证should_exit为True
+ """
+ return {
+ "status": "INTERRUPT",
+ "intent": "HUMAN_INTERVENTION",
+ "data": {
+ "question": question,
+ "candidates": candidates or []
+ }
+ }
+
+from web_tools import execute_js_rich, get_html
+
+driver = None
+
+def first_init_driver():
+ global driver
+ from TMWebDriver import TMWebDriver
+ driver = TMWebDriver()
+ while True:
+ time.sleep(1)
+ sess = driver.get_all_sessions()
+ if len(sess) > 0: break
+ driver.newtab()
+ time.sleep(5)
+
+def web_scan(focus_item="", switch_tab_id=None):
+ """
+ 利用 get_html 获取清洗后的网页内容。
+ focus_item: 语义过滤指令。如果用户在找特定内容(如“小米汽车”),
+ 算法会优先保留包含该关键词的列表项。
+ switch_tab_id: 可选参数,如果提供,则在扫描前切换到该标签页。
+ 应当多用execute_js少全量观察html。
+ """
+ global driver
+ if driver is None: first_init_driver()
+ try:
+ tabs = []
+ for sess in driver.get_all_sessions():
+ sess.pop('connected_at', None)
+ sess.pop('type', None)
+ sess['url'] = sess.get('url', '')[:50] + ("..." if len(sess.get('url', '')) > 50 else "")
+ tabs.append(sess)
+ if switch_tab_id: driver.default_session_id = switch_tab_id
+ content = get_html(driver, cutlist=True, instruction=focus_item, maxchars=23000)
+ return {
+ "status": "success",
+ "metadata": {
+ "tabs_count": len(tabs),
+ "tabs": tabs,
+ "active_tab": driver.default_session_id
+ },
+ "content": content
+ }
+ except Exception as e:
+ return {"status": "error", "msg": format_error(e)}
+
+import traceback
+def format_error(e):
+ exc_type, exc_value, exc_traceback = sys.exc_info()
+ tb = traceback.extract_tb(exc_traceback)
+ if tb:
+ f = tb[-1]
+ fname = os.path.basename(f.filename)
+ return f"{exc_type.__name__}: {str(e)} @ {fname}:{f.lineno}, {f.name} -> `{f.line}`"
+ return f"{exc_type.__name__}: {str(e)}"
+
+def web_execute_js(script: str):
+ """
+ 执行 JS 脚本来控制浏览器,并捕获结果和页面变化。
+ script: 要执行的 JavaScript 代码字符串。
+ return {
+ "status": "failed" if error_msg else "success",
+ "js_return": result,
+ "error": error_msg,
+ "transients": transients,
+ "environment": {
+ "new_tab": new_tab,
+ "reloaded": reloaded
+ },
+ "diff": diff_summary,
+ "suggestion": "" if is_significant_change else "页面无明显变化"
+ }
+ """
+ global driver
+ if driver is None: first_init_driver()
+ try:
+ result = execute_js_rich(script, driver)
+ return result
+ except Exception as e:
+ return {"status": "error", "msg": format_error(e)}
+
+def file_patch(path: str, old_content: str, new_content: str):
+ """
+ 在文件中寻找唯一的 old_content 块并替换为 new_content。
+ """
+ path = str(Path(path).resolve())
+ try:
+ if not os.path.exists(path):
+ return {"status": "error", "msg": "文件不存在"}
+ with open(path, 'r', encoding='utf-8') as f:
+ full_text = f.read()
+ # 检查唯一性
+ count = full_text.count(old_content)
+ if count == 0:
+ return {"status": "error", "msg": "未找到匹配的旧文本块,请检查空格、缩进和换行是否完全一致。"}
+ if count > 1:
+ return {"status": "error", "msg": f"找到 {count} 处匹配,请提供更长的旧文本块以确保唯一性。"}
+ updated_text = full_text.replace(old_content, new_content)
+ with open(path, 'w', encoding='utf-8') as f:
+ f.write(updated_text)
+ return {"status": "success", "msg": "文件局部修改成功"}
+ except Exception as e:
+ return {"status": "error", "msg": str(e)}
+
+def file_read(path, start=1, count=100, show_linenos=True):
+ try:
+ with open(path, 'r', encoding='utf-8', errors='replace') as f:
+ lines = f.readlines()
+ chunk = lines[start-1 : start-1+count]
+ if show_linenos: res = [f"{i+start}|{l[:200]}" for i, l in enumerate(chunk)]
+ else: res = [l for l in chunk]
+ return f"Total:{len(lines)} lines\n" + "".join(res)
+ except Exception as e:
+ return f"Error: {str(e)}"
+
+class GenericAgentHandler(BaseHandler):
+ '''
+ Generic Agent 工具库,包含多种工具的实现。工具函数自动加上了 do_ 前缀。实际工具名没有前缀。
+ '''
+ def __init__(self, parent, user_input, cwd):
+ self.parent = parent
+ self.user_input = user_input
+ self.plan = ""
+ self.focus = ""
+ self.cwd = cwd
+
+ def _get_abs_path(self, path):
+ if not path: return ""
+ return os.path.abspath(os.path.join(self.cwd, path))
+
+ def do_code_run(self, args, response):
+ '''执行代码片段,有长度限制,不允许代码中放大量数据,如有需要应当通过文件读取进行。
+ '''
+ code_type = args.get("type", "python")
+ # 从 response.content 中提取代码块
+ # 匹配 ```python ... ``` 或 ```powershell ... ```
+ pattern = rf"```{code_type}\n(.*?)\n```"
+ # 也可以更通用一点不分类型提取最后一个代码块rf"```(?:{code_type})?\n(.*?)\n```"
+ matches = re.findall(pattern, response.content, re.DOTALL)
+ if not matches:
+ return StepOutcome(None, next_prompt=f"【系统错误】:你调用了 code_run但未在回复中提供 ```{code_type} 代码块。请重新输出代码并附带工具调用。")
+ # 提取最后一个代码块(通常是模型修正后的最终逻辑)
+ code = matches[-1].strip()
+ timeout = args.get("timeout", 60)
+ cwd = args.get("cwd", self.cwd)
+ result = yield from code_run(code, code_type, timeout, cwd)
+ return StepOutcome(result, next_prompt=self._get_anchor_prompt())
+
+ def do_ask_user(self, args, response):
+ question = args.get("question", "请提供输入:")
+ candidates = args.get("candidates", [])
+ result = ask_user(question, candidates)
+ return StepOutcome(result, next_prompt="", should_exit=True)
+
+ def do_web_scan(self, args, response):
+ '''focus_item仅用于在长列表中模糊搜寻相关item
+ 此工具也提供标签页查看和标签页切换功能。
+ '''
+ focus_item = args.get("focus_item", "")
+ switch_tab_id = args.get("switch_tab_id", None)
+ result = web_scan(focus_item, switch_tab_id=switch_tab_id)
+ content = result.pop("content", None)
+ yield f'\n{str(result)}\n'
+ next_prompt = f"```html\n{content}\n```"
+ return StepOutcome(result, next_prompt=next_prompt)
+
+ def do_web_execute_js(self, args, response):
+ '''web情况下的优先使用工具执行任何js达成对浏览器的*完全*控制。
+ 支持将结果保存到文件供后续读取分析但保存功能仅限即时读取与await等异步操作不兼容。
+ '''
+ script = args.get("script", "")
+ save_to_file = args.get("save_to_file", "")
+ result = web_execute_js(script)
+ if save_to_file and "js_return" in result:
+ content = str(result["js_return"] or '')
+ abs_path = self._get_abs_path(save_to_file)
+ with open(abs_path, 'w', encoding='utf-8') as f: f.write(str(content))
+ result["js_return"] = content[:200] + ("..." if len(content) > 200 else "")
+ result["js_return"] += f"\n\n[已保存以上内容到 {abs_path}]"
+ print("Web Execute JS Result:", result)
+ return StepOutcome(result, next_prompt=self._get_anchor_prompt())
+
+ def do_file_patch(self, args, response):
+ path = self._get_abs_path(args.get("path", ""))
+ yield f"\n[Action] Patching file: {path}\n"
+ old_content = args.get("old_content", "")
+ new_content = args.get("new_content", "")
+ result = file_patch(path, old_content, new_content)
+ yield str(result) + "\n"
+ return StepOutcome(result, next_prompt=self._get_anchor_prompt())
+
+ def do_file_write(self, args, response):
+ '''用于对整个文件的大量处理精细修改要用file_patch。
+ '''
+ path = self._get_abs_path(args.get("path", ""))
+ mode = args.get("mode", "overwrite")
+ action_str = "Appending to" if mode == "append" else "Writing"
+ yield f"\n[Action] {action_str} file: {os.path.basename(path)}\n"
+
+ def extract_intended_block(content):
+ start_marker = "```"
+ first_idx = content.find(start_marker)
+ last_idx = content.rfind(start_marker)
+ if first_idx == -1 or last_idx == -1 or first_idx == last_idx:
+ return None
+ header_end = content.find("\n", first_idx)
+ if header_end == -1 or header_end > last_idx:
+ return None
+ actual_content = content[header_end + 1 : last_idx].strip()
+ return actual_content
+
+ blocks = extract_intended_block(response.content)
+ if not blocks:
+ yield f"[Status] ❌ 失败: 未在回复中找到代码块内容\n"
+ return StepOutcome({"status": "error", "msg": "No code block found in response"}, next_prompt="\n")
+ new_content = blocks
+ try:
+ write_mode = 'a' if mode == "append" else 'w'
+ final_content = ("\n" + new_content) if mode == "append" else new_content
+ with open(path, write_mode, encoding="utf-8") as f:
+ f.write(final_content)
+ yield f"[Status] ✅ {mode.capitalize()} 成功 ({len(new_content)} bytes)\n"
+ return StepOutcome({"status": "success"},
+ next_prompt=f"\n提醒: <user_input>{self.user_input}</user_input>请继续执行下一步。\n")
+ except Exception as e:
+ yield f"[Status] ❌ 写入异常: {str(e)}\n"
+ return StepOutcome({"status": "error", "msg": str(e)}, next_prompt="\n")
+
+ def do_file_read(self, args, response):
+ path = self._get_abs_path(args.get("path", ""))
+ yield f"\n[Action] Reading file: {path}\n"
+ start = args.get("start", 1)
+ count = args.get("count", 100)
+ show_linenos = args.get("show_linenos", True)
+ result = file_read(path, start, count, show_linenos)
+ return StepOutcome(result, next_prompt=self._get_anchor_prompt())
+
+ def do_update_plan(self, args, response):
+ '''
+ 同步宏观任务进度与战略重心。
+ 【设计意图】:
+ 1. 仅在任务涉及多步逻辑(如:先搜索、再重构、后测试)时进行初始拆解。
+ 2. 仅在发生重大的方针变更时调用(例如:原定方案 A 物理不可行,需彻底转向方案 B
+ 3. 严禁用于记录细微的调试步骤或代码纠错。
+ 简单任务无需使用。
+ '''
+ new_plan = args.get("plan", "")
+ new_focus = args.get("focus", "")
+ if new_plan: self.plan = new_plan
+ if new_focus: self.focus = new_focus
+ yield f"\n[Info] Updated plan and focus.\n"
+ yield f"New Plan:\n{self.plan}\n\n"
+ yield f"New Focus:\n{self.focus}\n"
+ return StepOutcome({"status": "success"},
+ next_prompt=self._get_anchor_prompt())
+
+ def do_no_tool(self, args, response):
+ '''这是一个特殊工具由引擎自主调用不要包含在TOOLS_SCHEMA里。
+ '''
+ yield "\n\n[Info] No tool called. Final response to user.\n"
+ return StepOutcome(response, next_prompt=None, should_exit=True)
+
+ def _get_anchor_prompt(self):
+ prompt = f"\n提醒: \n<user_input>{self.user_input}</user_input>\n"
+ if self.plan: prompt += f"<plan>\n{self.plan}\n</plan>\n"
+ if self.focus: prompt += f"<current>\n{self.focus}\n</current>\n"
+ prompt += "\n请继续执行下一步。"
+ return prompt
+
+
+if __name__ == "__main__":
+ pass
\ No newline at end of file
diff --git a/launch.pyw b/launch.pyw
new file mode 100644
index 0000000..4b91d6f
--- /dev/null
+++ b/launch.pyw
@@ -0,0 +1,48 @@
+import webview
+import threading
+import subprocess
+import sys, time, os, ctypes
+import atexit
+
+# === 配置区域 ===
+WINDOW_WIDTH = 600
+WINDOW_HEIGHT = 900
+RIGHT_PADDING = 0 # 离屏幕右边缘的距离
+TOP_PADDING = 300 # 离屏幕上边缘的距离
+
+def get_screen_width():
+ try:
+ # GetSystemMetrics(0) 获取主屏幕宽度
+ user32 = ctypes.windll.user32
+ return user32.GetSystemMetrics(0)
+ except:
+ # 如果不是 Windows 或者出错了,返回一个兜底值 (比如 1920)
+ return 1920
+
+def start_streamlit():
+ global proc
+ cmd = [
+ sys.executable, "-m", "streamlit", "run", "agentapp.py",
+ "--server.port", "8501",
+ "--server.headless", "true",
+ "--theme.base", "dark" #以此默认开启暗黑模式,更有极客感
+ ]
+ proc = subprocess.Popen(cmd)
+ atexit.register(proc.kill)
+
+if __name__ == '__main__':
+ t = threading.Thread(target=start_streamlit, daemon=True)
+ t.start()
+ screen_width = get_screen_width()
+ x_pos = screen_width - WINDOW_WIDTH - RIGHT_PADDING
+ time.sleep(2)
+ webview.create_window(
+ title='GenericAgent',
+ url='http://localhost:8501',
+ width=WINDOW_WIDTH,
+ height=WINDOW_HEIGHT,
+ x=x_pos, y=TOP_PADDING,
+ resizable=True,
+ text_select=True
+ )
+ webview.start()
\ No newline at end of file
diff --git a/ljq_web_driver.user.js b/ljq_web_driver.user.js
new file mode 100644
index 0000000..01eb42b
--- /dev/null
+++ b/ljq_web_driver.user.js
@@ -0,0 +1,428 @@
+// ==UserScript==
+// @name ljq_web_driver
+// @namespace http://tampermonkey.net/
+// @version 0.2
+// @description Execute JS via ljq_web_driver
+// @require https://code.jquery.com/jquery-3.6.0.min.js
+// @author You
+// @match *://*/*
+// @grant GM_setValue
+// @grant GM_getValue
+// @grant GM_xmlhttpRequest
+// @grant GM_openInTab
+// @grant unsafeWindow
+// @connect localhost
+// @run-at document-start
+// ==/UserScript==
+
+
+(function() {
+ 'use strict';
+ const log_prefix = "ljq_driver: ";
+
+ if (window.self !== window.top) {
+ console.log(log_prefix + '在iframe中不执行');
+ return;
+ }
+
+ const wsUrl = 'ws://localhost:18765';
+ const httpUrl = 'http://localhost:18766/';
+
+ function isWebSocketServerAlive(callback) {
+ GM_xmlhttpRequest({
+ method: 'GET',
+ url: 'http://localhost:18765/',
+ onload: () => callback(true),
+ onerror: () => callback(false)
+ });
+ }
+
+ let ws;
+ let sid = (window.name && window.name.startsWith('ljq_')) ?
+ window.name : window.sessionStorage.getItem('ljq_driver_sid');
+ if (!sid) {
+ sid = `ljq_${Date.now().toString().slice(-2)}${Math.random().toString(36).slice(2, 4)}`;
+ window.sessionStorage.setItem('ljq_driver_sid', sid);
+ window.name = sid;
+ console.log(log_prefix + `创建新会话ID: ${sid}`);
+ } else {
+ if (window.name !== sid) window.name = sid;
+ console.log(log_prefix + `使用现有会话ID: ${sid}`);
+ }
+
+ try {
+ GM_setValue('new_tab_report', {
+ url: window.location.href,
+ sessionId: sid,
+ ts: Date.now()
+ });
+ } catch (e) {}
+
+ // 保存会话ID
+ GM_setValue('sid', sid);
+
+ // 获取或创建状态指示器
+ function getIndicator() {
+ // 检查现有指示器
+ let ind = document.getElementById('ljq-ind');
+
+ // 删除重复指示器
+ const dups = document.querySelectorAll('[id="ljq-ind"]');
+ if (dups.length > 1) {
+ for (let i = 1; i < dups.length; i++) {
+ dups[i].remove();
+ }
+ ind = dups[0];
+ }
+
+ // 创建新指示器
+ if (!ind && document.body) {
+ ind = document.createElement('div');
+ ind.id = 'ljq-ind';
+ ind.style.cssText = `
+ position: fixed;bottom: 10px;
+ right: 10px;background-color: #f44336;
+ color: white;padding: 8px 12px;
+ border-radius: 6px;font-size: 14px;
+ font-weight: bold;z-index: 9999;
+ transition: background-color 0.3s;
+ cursor: pointer;box-shadow: 0 3px 6px rgba(0,0,0,0.25);
+ `;
+ ind.innerText = log_prefix + '正在连接...';
+
+ ind.addEventListener('click', () => alert(`会话ID: ${sid}\n当前URL: ${location.href}`));
+ document.body.appendChild(ind);
+ }
+
+ return ind;
+ }
+
+ // 更新状态
+ function updateStatus(status, msg) {
+ if (!document.body) return setTimeout(() => updateStatus(status, msg), 100);
+
+ const ind = getIndicator();
+ if (!ind) return;
+
+ if (status === 'ok') {
+ ind.style.backgroundColor = '#4CAF50';
+ ind.innerText = log_prefix + '连接成功';
+ } else if (status === 'disc') {
+ ind.style.backgroundColor = '#f44336';
+ ind.innerText = log_prefix + '连接断开';
+ } else if (status === 'conn') {
+ ind.style.backgroundColor = '#2196F3';
+ ind.innerText = log_prefix + '正在连接(HTTP)';
+ } else if (status === 'err') {
+ ind.style.backgroundColor = '#FF9800';
+ ind.innerText = log_prefix + `发生错误 (${msg})`;
+ } else if (status === 'exec') {
+ ind.style.backgroundColor = '#2196F3';
+ ind.innerText = log_prefix + '正在执行指令...';
+ }
+ }
+
+ function handleError(id, error, errorSource) {
+ console.error(`${errorSource}错误:`, error);
+ updateStatus('err', error.message);
+
+ const errorMessage = {
+ type: 'error',
+ id: id,
+ sessionId: sid,
+ error: {
+ name: error.name,
+ message: error.message,
+ stack: error.stack,
+ source: errorSource
+ }
+ };
+
+ if (typeof ws !== 'undefined' && ws && ws.readyState === WebSocket.OPEN) {
+ ws.send(JSON.stringify(errorMessage));
+ } else {
+ GM_xmlhttpRequest({
+ method: "POST",
+ url: httpUrl + "api/result",
+ headers: {"Content-Type": "application/json"},
+ data: JSON.stringify(errorMessage),
+ onload: function(response) {console.log("错误信息已通过HTTP发送", response);},
+ onerror: function(err) {console.error("发送错误信息失败", err);}
+ });
+ }
+ }
+
+ function smartProcessResult(result) {
+ // 处理 null 和原始类型
+ if (result === null || result === undefined || typeof result !== 'object') {
+ return result;
+ }
+
+ // 1. 处理 jQuery 对象 - 强制转换为HTML字符串数组
+ if (typeof jQuery !== 'undefined' && result instanceof jQuery) {
+ const elements = [];
+ for (let i = 0; i < result.length; i++) {
+ if (result[i] && result[i].nodeType === 1) {
+ elements.push(result[i].outerHTML);
+ }
+ }
+ return elements; // 始终返回数组
+ }
+
+ // 2. 处理 NodeList 和 HTMLCollection
+ if (result instanceof NodeList || result instanceof HTMLCollection) {
+ const elements = [];
+ for (let i = 0; i < result.length; i++) {
+ if (result[i] && result[i].nodeType === 1) {
+ elements.push(result[i].outerHTML);
+ }
+ }
+ return elements;
+ }
+
+ // 3. 处理单个 DOM 元素
+ if (result.nodeType === 1) {
+ return result.outerHTML;
+ }
+
+ // 4. 检查是否是具有数字索引和length属性的类数组对象
+ if (!Array.isArray(result) &&
+ typeof result === 'object' &&
+ 'length' in result &&
+ typeof result.length === 'number') {
+
+ // 检查第一个元素是否是DOM节点
+ const firstElement = result[0];
+ if (firstElement && firstElement.nodeType === 1) {
+ const elements = [];
+ const length = Math.min(result.length, 100);
+
+ for (let i = 0; i < length; i++) {
+ const elem = result[i];
+ if (elem && elem.nodeType === 1) {
+ elements.push(elem.outerHTML);
+ }
+ }
+
+ return elements;
+ }
+ }
+
+ // 5. 处理普通对象和数组 - 使用标准序列化
+ try {
+ return JSON.parse(JSON.stringify(result, function(key, value) {
+ if (typeof value === 'object' && value !== null) {
+ if (value.nodeType === 1) {
+ return value.outerHTML;
+ }
+ if (value === window || value === document) {
+ return '[Object]';
+ }
+ }
+ return value;
+ }));
+ } catch (e) {
+ console.error("序列化对象失败:", e);
+ return `[无法序列化的对象: ${e.message}]`;
+ }
+ }
+
+ // 防止重复初始化
+ if (window.ljq_init) return;
+ window.ljq_init = true;
+
+ function connecthttp() {
+ if (window.use_ws) return;
+ updateStatus('conn');
+ GM_xmlhttpRequest({
+ method: "POST",
+ url: httpUrl + "api/longpoll",
+ headers: {"Content-Type": "application/json"},
+ data: JSON.stringify({
+ type: 'ready',
+ url: location.href,
+ sessionId: sid
+ }),
+ onload: function(resp) {
+ if (resp.status === 200) {
+ let data = JSON.parse(resp.responseText);
+ console.log(log_prefix + '接收到数据:', data);
+ if (data.id === "" && data.ret === "use ws") return;
+ if (data.id === "") return setTimeout(connecthttp, 100);
+ const response = executeCode(data);
+
+ if (response.error) {
+ handleError(data.id, response.error, '执行代码');
+ } else {
+ GM_xmlhttpRequest({
+ method: "POST",
+ url: httpUrl + "api/result",
+ headers: {"Content-Type": "application/json"},
+ data: JSON.stringify({
+ type: 'result',
+ id: data.id,
+ sessionId: sid,
+ result: response.result
+ })
+ });
+ }
+ } else {
+ console.error(log_prefix + '请求失败,状态码:', resp.status);
+ updateStatus('err', '请求失败');
+ }
+ setTimeout(connecthttp, 1000);
+ },
+ onerror: function(err) {
+ console.error(log_prefix + '请求错误', err);
+ updateStatus('err', '请求失败');
+ setTimeout(connecthttp, 5000);
+ },
+ ontimeout: function() {
+ console.log(log_prefix + '请求超时');
+ updateStatus('err', '请求超时');
+ setTimeout(connecthttp, 5000);
+ }
+ });
+ }
+
+ function executeCode(data) {
+ let id = data.id || 'unknown'; // 获取 ID
+ let result;
+
+ if (!data.code) {
+ console.log('收到非代码执行消息:', data);
+ return { error: '没有可执行的代码' };
+ }
+ updateStatus('exec');
+
+ try {
+ const jsCode = data.code.trim();
+ const lines = jsCode.split(/\r?\n/).filter(l => l.trim());
+ const lastLine = lines.length > 0 ? lines[lines.length - 1].trim() : '';
+
+ if (lastLine.startsWith('return')) {
+ // 最后一行包含 return 语句,使用 Function 构造器
+ result = (new Function(jsCode))();
+ } else {
+ try {
+ result = eval(jsCode);
+ } catch (e) {
+ if (isIllegalReturnError(e)) {
+ result = (new Function(jsCode))();
+ } else {
+ throw e;
+ }
+ }
+ }
+ const processedResult = smartProcessResult(result);
+ return { result: processedResult };
+
+ } catch (execError) {
+ return { error: execError }; // 返回错误信息
+ }
+ }
+
+ function isIllegalReturnError(e) {
+ return e instanceof SyntaxError && (
+ /Illegal return statement/i.test(e.message) || // Chrome 常见
+ /return not in function/i.test(e.message) || // Firefox 常见
+ /Illegal 'return' statement/i.test(e.message) // 兼容旧文案
+ );
+ }
+
+ function connect() {
+ ws = new WebSocket(wsUrl);
+
+ ws.onopen = function() {
+ window.use_ws = true;
+ console.log(log_prefix + '已连接');
+ updateStatus('ok');
+ ws.send(JSON.stringify({
+ type: 'ready',
+ url: location.href,
+ sessionId: sid
+ }));
+ };
+
+ ws.onclose = function() {
+ console.log(log_prefix + '已断开5秒后重连');
+ updateStatus('disc');
+ setTimeout(connect, 5000);
+ };
+
+ ws.onerror = function(err) {
+ console.error(log_prefix + '连接错误', err);
+ updateStatus('err', '连接失败');
+ isWebSocketServerAlive(function (e) { if (e) connecthttp()});
+ };
+
+ ws.onmessage = async function(e) {
+ try {
+ let data = JSON.parse(e.data);
+ let startTime = Date.now();
+ let newTabs = [];
+ let checkNewTab = data.auto_switch_newtab === true;
+ GM_setValue('new_tab_report', null);
+ const response = executeCode(data);
+
+ if (response.error) {
+ handleError(data.id, response.error, '执行代码');
+ } else {
+ if (checkNewTab) {
+ for (let i = 0; i < 10; i++) {
+ await new Promise(r => setTimeout(r, 150));
+ let latestReport = GM_getValue('new_tab_report');
+ if (latestReport && latestReport.ts >= startTime) {
+ console.log(`%c[Detected] 轮询第 ${i+1} 次抓到新标签!`, "color: green");
+ newTabs.push(latestReport);
+ break;
+ }
+ }
+ }
+ updateStatus('ok');
+ ws.send(JSON.stringify({
+ type: 'result',
+ id: data.id,
+ sessionId: sid,
+ result: response.result,
+ newTabs: newTabs
+ }));
+ }
+ } catch (parseError) {
+ handleError('unknown', parseError, '解析消息');
+ }
+ };
+
+ }
+
+ // 初始化
+ function init() {
+ if (document.body) {
+ getIndicator();
+ connect();
+ } else {
+ setTimeout(init, 50);
+ }
+ }
+
+ // 监控DOM变化
+ const observer = new MutationObserver(() => getIndicator());
+
+ if (document.readyState !== 'loading') {
+ init();
+ observer.observe(document.body, { childList: true, subtree: true });
+ } else {
+ document.addEventListener('DOMContentLoaded', () => {
+ init();
+ observer.observe(document.body, { childList: true, subtree: true });
+ });
+ }
+
+ // 清理
+ window.addEventListener('beforeunload', () => {
+ observer.disconnect();
+ if (ws && ws.readyState === WebSocket.OPEN) {
+ ws.close();
+ }
+ });
+})();
\ No newline at end of file
diff --git a/make_prompts.py b/make_prompts.py
new file mode 100644
index 0000000..5905083
--- /dev/null
+++ b/make_prompts.py
@@ -0,0 +1,137 @@
+import sys, os, re
+import pyperclip
+import json, time
+from pathlib import Path
+import subprocess
+import tempfile
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+from sidercall import SiderLLMSession, LLMSession, ToolClient
+
+
+ask = SiderLLMSession().ask
+
+
+def generate_tool_schema():
+ """
+ 通过代码内省,将 Handler 的逻辑映射为高语义的工具描述。
+ """
+ with open('ga.py', 'r', encoding='utf-8') as f:
+ ga_code = f.read()
+ # 极简且具备高度概括能力的元 Prompt
+ meta_prompt = f"""
+# Role
+你是一个具备深度推理能力的 AI 系统架构师。你将通过阅读 `GenericAgentHandler` 源码,构建其对应的工具能力矩阵。
+
+# Task
+分析下方的源码,并输出 OpenAI Tool Schema。在输出 JSON 之前你必须进行内部思考Thinking Process
+
+# Thinking Process Requirements
+在 `<thinking>` 标签中,请按顺序分析:
+1. **核心工具链识别**:识别所有 `do_xxx` 方法,并分析它们依赖的底层 Utility 函数。
+2. **内容溯源审计**:重点分析哪些工具是从 `response.content` 提取核心逻辑(如代码块)的。对于这些工具,确认在 Schema 参数中排除掉对应的字段。
+3. **调用策略推导**:分析工具间的协作关系(例如 `file_read` 如何为 `file_patch` 提供定位)。
+4. **兜底逻辑确认**:明确某些特殊万能工具在系统中的保底角色,快速工具无法执行的操作由保底工具执行,但正常应优先使用方便的工具。
+5. **注释审阅**:结合函数注释,理解每个工具的使用限制,其中的重要信息务必反映在工具描述中(如长度限制等)。
+注释中的重要信息务必反映在工具描述中。
+注释中的重要信息务必反映在工具描述中。
+
+# Tool Schema Formatting Rules
+- **参数对齐**:仅包含 `do_xxx` 方法中通过 `args.get()` 显式获取的参数。
+- **高引导性描述**:描述应包含“何时调用”以及“如何根据反馈修正”,需要注意函数的注释事项。
+- **输出格式**:先输出 `<thinking>` 块,然后输出 ```json 块。
+
+# Source Code
+{ga_code}
+
+# Output
+请开始思考并生成:
+"""
+
+ # 假设 ask 是你已经封装好的 LLM 调用接口
+ raw_response = ask(meta_prompt, model="gemini-3.0-flash")
+ print(raw_response)
+
+ # --- 健壮的 JSON 解析逻辑 ---
+ try:
+ # 1. 清除 Markdown 围栏
+ clean_json = raw_response.strip()
+ if clean_json.startswith("```"):
+ # 兼容 ```json 和 ```
+ clean_json = re.sub(r'^```(?:json)?\s*', '', clean_json)
+ clean_json = re.sub(r'\s*```$', '', clean_json)
+
+ # 2. 移除可能的非 JSON 前导/后缀文字(如果有的话)
+ start_idx = clean_json.find('[')
+ end_idx = clean_json.rfind(']') + 1
+ if start_idx != -1 and end_idx != -1:
+ clean_json = clean_json[start_idx:end_idx]
+
+ final_schema = json.loads(clean_json)
+
+ if final_schema:
+ with open('tools_schema.json', 'w', encoding='utf-8') as f:
+ json.dump(final_schema, f, indent=2, ensure_ascii=False)
+ print("✅ 成功从代码内省生成 Schema 并持久化。")
+ return final_schema
+
+ except Exception as e:
+ print(f"❌ 解析 Schema 失败: {e}\n原始响应: {raw_response}")
+ return None
+
+
+def make_system_prompt(ga_code_path='ga.py'):
+ with open(ga_code_path, 'r', encoding='utf-8') as f:
+ ga_code = f.read()
+
+ # 这个元 Prompt 的目标是生成“世界观”而非“说明书”
+ meta_prompt = f"""
+# Role
+你是一个 AI 架构师。请阅读下方的工具库源码,为 Agent 生成一份【系统级认知指令 (System Prompt)】。
+
+# Task
+基于代码逻辑,定义 Agent 的“能力边界”和“行动协议”。
+
+# Requirements (Crucial)
+1. **打破预训练偏见**:针对模型常说的“我只是 AI不能操作网页/文件”进行修正。明确告诉它:你现在拥有物理操作权限,工具设计保证了所有权限做所有事情。
+2. **避开冗余**:不要重复 Tool Schema 里的参数细节。
+3. **能力边界定义**
+ - 网页操作:它不是通过“想象”上网,而是通过实时的浏览器读写。
+ - 文件操作:它拥有物理文件读写权限,且遵循“先读后写”的稳健性原则。
+ - 保底逻辑:当专用工具失效时,使用 `code_run` 编写脚本解决一切。
+ - 特殊的update_plan仅在复杂任务时使用和ask_user用户也是有效资源工具。
+4. **行动协议**
+ - 必须在行动前进行<thinking>
+
+我后面还会附上具体的工具描述和Schema所以不要重复。
+主要以世界观为主,不要纠结于具体工具。
+
+# Input Source Code
+{ga_code}
+
+# Output
+仅输出 System Prompt 的正文,语气要果断、指令化。
+"""
+ print("🧠 正在重塑 Agent 世界观 (Generating System Prompt)...")
+ # 调用你的 llmclient.ask
+ system_prompt_content = ask(meta_prompt)
+ print("📝 生成的 System Prompt 内容如下:\n")
+ print(system_prompt_content)
+ clean_content = re.sub(r'<[^>]+>', '', system_prompt_content)
+ with open('sys_prompt.txt', 'w', encoding='utf-8') as f:
+ f.write(clean_content)
+ return clean_content
+
+# --- 主逻辑 ---
+if __name__ == "__main__":
+ if len(sys.argv) < 2:
+ print("Usage: python make_prompts.py [schema|prompt]")
+ sys.exit(1)
+
+ cmd = sys.argv[1].lower()
+ if cmd == "schema":
+ generate_tool_schema()
+ elif cmd == "prompt":
+ make_system_prompt()
+ else:
+ print(f"Unknown command: {cmd}")
+ print("Available commands: schema, prompt")
\ No newline at end of file
diff --git a/sidercall.py b/sidercall.py
new file mode 100644
index 0000000..706c686
--- /dev/null
+++ b/sidercall.py
@@ -0,0 +1,179 @@
+import os, json, re, time, requests
+from sider_ai_api import Session
+
+try:
+ from mykey import sider_cookie, capikey
+except ImportError:
+ sider_cookie = ""
+ capikey = ""
+
+class SiderLLMSession:
+ def __init__(self, multiturns=6):
+ self._core = Session(cookie=sider_cookie, proxies={'https':'127.0.0.1:2082'})
+ def ask(self, prompt, model="gemini-3.0-flash"):
+ if len(prompt) > 30000: prompt = prompt[-29500:]
+ return ''.join(self._core.chat(prompt, model))
+
+class LLMSession:
+ def __init__(self, api_key=capikey, api_base="http://113.45.39.247:3001/v1", multiturns=6):
+ self.api_key = api_key
+ self.api_base = api_base
+ self.messages = []
+ self.multiturns = multiturns
+
+ def ask(self, prompt, model="openai/gpt-5.1"):
+ self.messages.append({"role": "user", "content": prompt})
+ if len(self.messages) > self.multiturns:
+ self.messages = self.messages[-self.multiturns:]
+ headers = {
+ "Authorization": f"Bearer {self.api_key}",
+ "Content-Type": "application/json"
+ }
+ try:
+ response = requests.post(
+ f"{self.api_base}/chat/completions",
+ headers=headers,
+ json={
+ "model": model,
+ "messages": self.messages,
+ "temperature": 0.5
+ },
+ timeout=60
+ )
+ res_json = response.json()
+ content = res_json["choices"][0]["message"]["content"]
+ self.messages.append({"role": "assistant", "content": content})
+ return content
+ except Exception as e:
+ return f"Error: {str(e)}"
+
+class MockFunction:
+ def __init__(self, name, arguments):
+ self.name = name
+ self.arguments = arguments
+
+class MockToolCall:
+ def __init__(self, name, args):
+ arg_str = json.dumps(args, ensure_ascii=False) if isinstance(args, dict) else args
+ self.function = MockFunction(name, arg_str)
+
+class MockResponse:
+ def __init__(self, thinking, content, tool_calls, raw):
+ self.thinking = thinking # 存放 <thinking> 内部的思维过程
+ self.content = content # 存放去除标签后的纯文本回复
+ self.tool_calls = tool_calls # 存放 MockToolCall 列表 或 None
+ self.raw = raw
+ def __repr__(self):
+ return f"<MockResponse thinking={bool(self.thinking)}, content='{self.content}', tools={bool(self.tool_calls)}>"
+
+class ToolClient:
+ def __init__(self, raw_api_func, auto_save_tokens=False):
+ self.raw_api = raw_api_func
+ self.auto_save_tokens = auto_save_tokens
+ self.last_tools = ''
+ self.total_cd_tokens = 0
+
+ def chat(self, messages, tools=None):
+ full_prompt = self._build_protocol_prompt(messages, tools)
+ print("Full prompt length:", len(full_prompt))
+ raw_text = self.raw_api(full_prompt)
+ with open('model_responses.txt', 'a', encoding='utf-8', errors="replace") as f:
+ f.write(f"=== Prompt ===\n{full_prompt}\n=== Response ===\n{raw_text}\n\n")
+ return self._parse_mixed_response(raw_text)
+
+ def _build_protocol_prompt(self, messages, tools):
+ system_content = next((m['content'] for m in messages if m['role'].lower() == 'system'), "你是一个智能助手。")
+ history_msgs = [m for m in messages if m['role'].lower() != 'system']
+
+ # 构造工具描述
+ tool_instruction = ""
+ if tools:
+ tools_json = json.dumps(tools, ensure_ascii=False, indent=2)
+ tool_instruction = f"""
+### ⚡️ 交互协议 (必须严格遵守)
+请按照以下步骤思考并行动:
+1. **思考**: 在 `<thinking>` 标签中分析现状和策略。
+2. **行动**: 如果需要调用工具,请紧接着输出一个 **<tool_use>块**,然后结束,我会稍后给你返回<tool_result>块。
+ 格式: ```<tool_use>\n{{"function": "工具名", "arguments": {{参数}}}}\n</tool_use>\n```
+
+### 🛠️ 可用工具库
+{tools_json}
+"""
+ if self.auto_save_tokens and self.last_tools == tools_json:
+ tool_instruction = "\n### ⚡️ 交互协议保持不变,继续使用之前的工具库。\n"
+ else:
+ self.total_cd_tokens = 0
+ self.last_tools = tools_json
+
+ prompt = f"=== SYSTEM ===\n{system_content}\n{tool_instruction}\n\n"
+ for m in history_msgs:
+ role = "USER" if m['role'] == 'user' else "ASSISTANT"
+ prompt += f"=== {role} ===\n{m['content']}\n\n"
+
+ self.total_cd_tokens += len(prompt)
+ if self.total_cd_tokens > 6000: self.last_tools = ''
+
+ prompt += "=== ASSISTANT ===\n"
+ return prompt
+
+ def _parse_mixed_response(self, text):
+ remaining_text = text
+ thinking = ''
+ think_pattern = r"<thinking>(.*?)</thinking>"
+ think_match = re.search(think_pattern, text, re.DOTALL)
+
+ if think_match:
+ thinking = think_match.group(1).strip()
+ remaining_text = re.sub(think_pattern, "", remaining_text, flags=re.DOTALL)
+
+ tool_calls = None
+ tool_pattern = r"<tool_use>(.*?)</tool_use>"
+ tool_match = re.search(tool_pattern, text, re.DOTALL)
+
+ json_str = ""
+ if tool_match:
+ json_str = tool_match.group(1).strip()
+ remaining_text = re.sub(tool_pattern, "", remaining_text, flags=re.DOTALL)
+ elif '<tool_use>' in remaining_text:
+ weaktoolstr = remaining_text.split('<tool_use>')[-1].strip()
+ json_str = weaktoolstr if weaktoolstr.endswith('}') else ''
+ remaining_text = remaining_text.replace('<tool_use>'+weaktoolstr, "")
+
+ if json_str:
+ try:
+ data = tryparse(json_str)
+ func_name = data.get('function') or data.get('tool')
+ args = data.get('arguments') or data.get('args')
+ if args is None: args = {}
+ if func_name: tool_calls = [MockToolCall(func_name, args)]
+ except json.JSONDecodeError:
+ print("[Warn] Failed to parse tool_use JSON:", json_str)
+ thinking += f"[Warn] JSON 解析失败,模型输出了无效的 JSON."
+
+ content = remaining_text.strip()
+ if not content: content = ""
+ return MockResponse(thinking, content, tool_calls, text)
+
+def tryparse(json_str):
+ try: return json.loads(json_str)
+ except:
+ return json.loads(json_str[:-1])
+
+if __name__ == "__main__":
+ llmclient = ToolClient(LLMSession().ask)
+ response = llmclient.chat(
+ messages=[{"role": "user", "content": "我的IP是多少"}],
+ tools=[{"name": "get_ip", "parameters": {}}]
+ )
+ # 4. 获取结果
+ print(f"思考: {response.thinking}")
+ # -> 我需要查一下 IP。
+
+ if response.tool_calls:
+ cmd = response.tool_calls[0]
+ print(f"调用: {cmd.function.name} 参数: {cmd.function.arguments}")
+
+ response = llmclient.chat(
+ messages=[{"role": "user", "content": "<tool_result>10.176.45.12</tool_result>"}]
+ )
+ print(response.content)
\ No newline at end of file
diff --git a/simphtml.py b/simphtml.py
new file mode 100644
index 0000000..d555460
--- /dev/null
+++ b/simphtml.py
@@ -0,0 +1,862 @@
+from bs4 import BeautifulSoup
+
+js_optHTML = '''function optHTML() {
+function createEnhancedDOMCopy() {
+ const nodeInfo = new WeakMap();
+ const ignoreTags = ['SCRIPT', 'STYLE', 'NOSCRIPT', 'META', 'LINK', 'COLGROUP', 'COL', 'TEMPLATE', 'PARAM', 'SOURCE'];
+ const ignoreIds = ['ljq-ind'];
+ function cloneNode(sourceNode, keep=false) {
+ if (sourceNode.nodeType === 8 ||
+ (sourceNode.nodeType === 1 && (
+ ignoreTags.includes(sourceNode.tagName) ||
+ (sourceNode.id && ignoreIds.includes(sourceNode.id))
+ ))) {
+ return null;
+ }
+ if (sourceNode.nodeType === 3) return sourceNode.cloneNode(false);
+ const clone = sourceNode.cloneNode(false);
+
+ const isDropdown = sourceNode.classList?.contains('dropdown-menu') ||
+ /dropdown|menu/i.test(sourceNode.className) || sourceNode.getAttribute('role') === 'menu';
+ const isSmallDropdown = isDropdown && (sourceNode.querySelectorAll('a, button, [role="menuitem"], li').length <= 7 && sourceNode.textContent.length < 500);
+
+ const childNodes = [];
+ for (const child of sourceNode.childNodes) {
+ const childClone = cloneNode(child, keep || isSmallDropdown);
+ if (childClone) childNodes.push(childClone);
+ }
+
+ const rect = sourceNode.getBoundingClientRect();
+ const style = window.getComputedStyle(sourceNode);
+ const area = (style.display === 'none' || style.visibility === 'hidden' || parseFloat(style.opacity) <= 0)?0:rect.width * rect.height;
+ const isVisible = (rect.width > 1 && rect.height > 1 &&
+ style.display !== 'none' && style.visibility !== 'hidden' &&
+ parseFloat(style.opacity) > 0 &&
+ Math.abs(rect.left) < 5000 && Math.abs(rect.top) < 5000)
+ || isSmallDropdown;
+ const zIndex = style.position !== 'static' ? (parseInt(style.zIndex) || 0) : 0;
+
+ let info = {
+ rect, area, isVisible, isSmallDropdown, zIndex,
+ style: {
+ display: style.display, visibility: style.visibility,
+ opacity: style.opacity, position: style.position
+ }};
+
+ const nonTextChildren = childNodes.filter(child => child.nodeType !== 3);
+ const hasValidChildren = nonTextChildren.length > 0;
+
+ if (!isVisible && nonTextChildren.length > 0) {
+ const visChild = nonTextChildren.find(child =>
+ nodeInfo.has(child) && nodeInfo.get(child).isVisible);
+ if (visChild) info = nodeInfo.get(visChild);
+ }
+ nodeInfo.set(clone, info);
+
+ if (sourceNode.nodeType === 1 && sourceNode.tagName === 'DIV') {
+ if (!hasValidChildren && !sourceNode.textContent.trim()) return null;
+ }
+ if (info.isVisible || hasValidChildren || keep) {
+ childNodes.forEach(child => clone.appendChild(child));
+ return clone;
+ }
+ return null;
+ }
+
+ return {
+ domCopy: cloneNode(document.body),
+ getNodeInfo: node => nodeInfo.get(node),
+ isVisible: node => {
+ const info = nodeInfo.get(node);
+ return info && info.isVisible;
+ }
+ };
+}
+const { domCopy, getNodeInfo, isVisible } = createEnhancedDOMCopy();
+const viewportArea = window.innerWidth * window.innerHeight;
+
+function analyzeNode(node, pPathType='main') {
+ // 处理非元素节点和叶节点
+ if (node.nodeType !== 1 || !node.children.length) {
+ node.nodeType === 1 && (node.dataset.mark = 'K:leaf');
+ return;
+ }
+ const pathType = (node.dataset.mark && !node.dataset.mark.includes(':main')) ? 'second' : pPathType;
+ const rectn = getNodeInfo(node).rect;
+ if (rectn.width < window.innerWidth * 0.8 && rectn.height < window.innerHeight * 0.8) return node;
+ if (node.tagName === 'TABLE') return;
+ const children = Array.from(node.children);
+ if (children.length === 1) {
+ node.dataset.mark = 'K:container';
+ return analyzeNode(children[0], pathType);
+ }
+ if (children.length > 10) return;
+
+ // 获取子元素信息并排序
+ const childrenInfo = children.map(child => {
+ const info = getNodeInfo(child) || { rect: {}, style: {} };
+ return { node: child, rect: info.rect, style: info.style,
+ area: info.area, zIndex: info.zIndex };
+ }).sort((a, b) => b.area - a.area);
+
+ // 检测是划分还是覆盖
+ const isOverlay = hasOverlap(childrenInfo);
+ node.dataset.mark = isOverlay ? 'K:overlayParent' : 'K:partitionParent';
+
+ if (isOverlay) handleOverlayContainer(childrenInfo, pathType);
+ else handlePartitionContainer(childrenInfo, pathType);
+
+ console.log(`${isOverlay ? '覆盖' : '划分'}容器:`, node, `子元素数量: ${children.length}`);
+ console.log('子元素及标记:', children.map(child => ({
+ element: child,
+ mark: child.dataset.mark || '无',
+ info: getNodeInfo ? getNodeInfo(child) : undefined
+ })));
+ for (const child of children)
+ if (!child.dataset.mark || child.dataset.mark[0] !== 'R') analyzeNode(child, pathType);
+ }
+
+ // 处理划分容器
+ function handlePartitionContainer(childrenInfo, pathType) {
+ childrenInfo.sort((a, b) => b.area - a.area);
+ const totalArea = childrenInfo.reduce((sum, item) => sum + item.area, 0);
+ console.log(childrenInfo[0].area / totalArea);
+ const hasMainElement = childrenInfo.length >= 1 &&
+ (childrenInfo[0].area / totalArea > 0.5) &&
+ (childrenInfo.length === 1 || childrenInfo[0].area > childrenInfo[1].area * 2);
+ if (hasMainElement) {
+ childrenInfo[0].node.dataset.mark = 'K:main';
+ for (let i = pathType==='main'?1:0; i < childrenInfo.length; i++) {
+ const child = childrenInfo[i];
+ let isSecondary = containsButton(child.node);
+ if (pathType === "main" && child.node.className.toLowerCase().includes('nav')) isSecondary = true;
+ if (pathType === "main" && child.node.className.toLowerCase().includes('breadcrumbs')) isSecondary = true;
+ if (pathType === "main" && child.node.className.toLowerCase().includes('header') && child.node.className.toLowerCase().includes('table')) isSecondary = true;
+ if (pathType === "main" && child.node.innerHTML.trim().replace(/\s+/g, '').length < 500) isSecondary = true;
+ if (child.style.visibility === 'hidden') isSecondary = false;
+ if (isSecondary) child.node.dataset.mark = 'K:secondary';
+ else child.node.dataset.mark = 'R:nonEssential';
+ }
+ } else {
+ const uniqueClassNames = new Set(childrenInfo.map(item => item.node.className)).size;
+ const highClassNameVariety = uniqueClassNames >= childrenInfo.length * 0.8;
+ if (pathType !== 'main' && highClassNameVariety && childrenInfo.length > 5) {
+ childrenInfo.forEach(child => child.node.dataset.mark = 'R:equalmany');
+ } else {
+ childrenInfo.forEach(child => child.node.dataset.mark = 'K:equal');
+ }
+ }
+ }
+
+ function containsButton(container) {
+ const hasStandardButton = container.querySelector('button, input[type="button"], input[type="submit"], [role="button"]') !== null;
+ if (hasStandardButton) return true;
+ const hasClassButton = container.querySelector('[class*="-btn"], [class*="-button"], .button, .btn, [class*="btn-"]') !== null;
+ return hasStandardButton || hasClassButton;
+ }
+
+ function handleOverlayContainer(childrenInfo, pathType) {
+ const sorted = [...childrenInfo].sort((a, b) => b.zIndex - a.zIndex);
+ console.log('排序后的子元素:', sorted);
+ if (sorted.length === 0) return;
+
+ const top = sorted[0];
+ const rect = top.rect;
+ const topNode = top.node;
+ const isComplex = top.node.querySelectorAll('input, select, textarea, button, a, [role="button"]').length >= 1;
+
+ const textContent = topNode.textContent?.trim() || '';
+ const textLength = textContent.length;
+ const hasLinks = topNode.querySelectorAll('a').length > 0;
+ const isMostlyText = textLength > 7 && !hasLinks;
+
+ const centerDiff = Math.abs((rect.left + rect.width/2) - window.innerWidth/2) / window.innerWidth;
+ const minDimensionRatio = Math.min(rect.width / window.innerWidth, rect.height / window.innerHeight);
+ const maxDimensionRatio = Math.max(rect.width / window.innerWidth, rect.height / window.innerHeight);
+ const isNearTop = rect.top < 50;
+ const isDialog = top.node.querySelector('iframe') && centerDiff < 0.3;
+
+ if (isComplex && centerDiff < 0.2 &&
+ ((minDimensionRatio > 0.2 && rect.width/window.innerWidth < 0.98) || minDimensionRatio > 0.95)) {
+ top.node.dataset.mark = 'K:mainInteractive';
+ sorted.slice(1).forEach(e => {
+ if (e.zIndex < sorted[0].zIndex) {
+ e.node.dataset.mark = 'R:covered';
+ } else {
+ e.node.dataset.mark = 'K:noncovered';
+ }
+ });
+ } else {
+ if (isComplex && isNearTop && maxDimensionRatio > 0.4 && top.isVisible) {
+ top.node.dataset.mark = 'K:topBar';
+ } else if (isMostlyText || isComplex || isDialog) {
+ topNode.dataset.mark = 'K:messageContent';
+ } else {
+ topNode.dataset.mark = 'R:floatingAd';
+ }
+ const rest = sorted.slice(1);
+ rest.length && (!hasOverlap(rest) ? handlePartitionContainer(rest, pathType) : handleOverlayContainer(rest, pathType));
+ }
+ }
+
+ function isValidInteractiveElement(info) {
+ const { node, rect, style } = info;
+ const isCentered = Math.abs((rect.left + rect.width/2) - window.innerWidth/2) < window.innerWidth*0.3;
+ const isVisible = parseFloat(style.opacity) > 0.1;
+ const isProminent = (parseInt(info.zIndex) > 30 || style.boxShadow !== 'none');
+ const hasInteractiveElements = node.querySelector('button, a, input') !== null;
+ return isCentered && isVisible && isProminent && hasInteractiveElements;
+ }
+
+ function hasOverlap(items) {
+ return items.some((a, i) =>
+ items.slice(i+1).some(b => {
+ const r1 = a.rect, r2 = b.rect;
+ if (!r1.width || !r2.width || !r1.height || !r2.height) {return false;}
+ const epsilon = 1;
+ return !(r1.x + r1.width <= r2.x + epsilon || r1.x >= r2.x + r2.width - epsilon ||
+ r1.y + r1.height <= r2.y + epsilon || r1.y >= r2.y + r2.height - epsilon
+ );
+ })
+ );
+}
+
+const result = analyzeNode(domCopy);
+domCopy.querySelectorAll('[data-mark^="R:"]').forEach(el=>el.parentNode?.removeChild(el));
+let root = domCopy;
+while (root.children.length === 1) {
+ root = root.children[0];
+}
+for (let ii = 0; ii < 3; ii++)
+ root.querySelectorAll('div').forEach(div => (!div.textContent.trim() && div.children.length === 0) && div.remove());
+root.querySelectorAll('[data-mark]').forEach(e => e.removeAttribute('data-mark'));
+root.removeAttribute('data-mark');
+return root.outerHTML;
+ }
+optHTML()'''
+
+
+
+js_findMainList = '''function findMainList(startElement = null) {
+ const containerElement = startElement || document.body;
+ const rect = containerElement.getBoundingClientRect();
+ const centerX = startElement ? (rect.left + rect.width/2) : (window.innerWidth/2);
+ const centerY = startElement ? (rect.top + rect.height/2) : (window.innerHeight/2);
+
+ // 获取中心元素
+ const centerElement = document.elementFromPoint(centerX, centerY) || containerElement;
+ if (!centerElement) return { container: null, items: [] };
+
+ // 收集祖先链
+ const ancestors = [];
+ for (let current = centerElement; current && ancestors.length < 10; current = current.parentElement) {
+ ancestors.push(current);
+ if (current === containerElement) break;
+ if (containerElement !== document.body && !containerElement.contains(current)) break;
+ }
+ if (!ancestors.includes(containerElement)) ancestors.push(containerElement);
+
+ let groupCandidates = [];
+ ancestors.forEach(ancestor => {
+ const topGroups = findTopGroups(ancestor, 3);
+ groupCandidates = groupCandidates.concat(topGroups);
+ });
+
+ console.log(groupCandidates);
+
+ let candidates = [];
+ ancestors.forEach(container => {
+ groupCandidates.forEach(groupInfo => {
+ // 尝试将组应用到当前容器
+ const items = findMatchingElements(container, groupInfo.selector);
+ // 只考虑足够大的组
+ if (items.length >= 3) {
+ candidates.push({
+ container: container,
+ selector: groupInfo.selector,
+ items: items,
+ gscore: groupInfo.score
+ });
+ }
+ });
+ });
+
+ candidates = candidates.map(candidate => {
+ const score = scoreContainer(candidate.container, candidate.items) + candidate.gscore;
+ return {...candidate, score};
+ });
+
+ if (candidates.length === 0) {
+ return { container: centerElement, items: [] };
+ }
+
+ // 3. 选择得分最高的容器
+ const bestCandidate = candidates.sort((a, b) => b.score - a.score)[0];
+ console.log(candidates);
+
+ // 如果最高分仍然很低,退回到中心元素
+ if (bestCandidate.score < 30) {
+ return { container: centerElement, items: [] };
+ }
+
+ return {
+ container: bestCandidate.container,
+ items: bestCandidate.items,
+ selector: bestCandidate.selector,
+ score: bestCandidate.score
+ };
+ }
+
+ function findTopGroups(container, limit) {
+ const children = Array.from(container.children);
+ const totalChildren = children.length;
+ if (totalChildren < 3) return [];
+
+ const minGroupSize = Math.max(3, Math.floor(totalChildren * 0.2));
+ const groups = [];
+
+ // 统计标签和类名
+ const tagFreq = {}, classFreq = {}, tagMap = {}, classMap = {};
+
+ children.forEach(child => {
+ // 统计标签
+ const tag = child.tagName.toLowerCase();
+ if (tag === "td") return;
+ tagFreq[tag] = (tagFreq[tag] || 0) + 1;
+ if (!tagMap[tag]) tagMap[tag] = [];
+ tagMap[tag].push(child);
+
+ // 统计类名
+ if (child.className) {
+ child.className.trim().split(/\s+/).forEach(cls => {
+ if (cls) {
+ classFreq[cls] = (classFreq[cls] || 0) + 1;
+ if (!classMap[cls]) classMap[cls] = [];
+ classMap[cls].push(child);
+ }
+ });
+ }
+ });
+
+ // 评分函数
+ const scoreGroup = (selector, elements) => {
+ const coverage = elements.length / totalChildren;
+ let specificity = selector.startsWith('.')
+ ? (0.6 + (selector.match(/\./g).length - 1) * 0.1) // 类选择器
+ : (selector.includes('.')
+ ? (0.7 + (selector.match(/\./g).length) * 0.1) // 标签+类
+ : 0.3); // 纯标签
+ return (coverage * 0.5) + (specificity * 0.5);
+ };
+
+ // 添加标签组
+ Object.keys(tagFreq).forEach(tag => {
+ if (tag !== "div" && tagFreq[tag] >= minGroupSize) {
+ groups.push({
+ selector: tag,
+ elements: tagMap[tag],
+ score: scoreGroup(tag, tagMap[tag]) - 0.5
+ });
+ }
+ });
+
+ // 添加类组
+ Object.keys(classFreq).forEach(cls => {
+ if (classFreq[cls] >= minGroupSize) {
+ const selector = '.' + cls;
+ groups.push({
+ selector,
+ elements: classMap[cls],
+ score: scoreGroup(selector, classMap[cls])
+ });
+ }
+ });
+ // 添加标签+类组合
+ const topTags = Object.keys(tagFreq)
+ .filter(t => tagFreq[t] >= minGroupSize)
+ .slice(0, 3);
+
+ const topClasses = Object.keys(classFreq)
+ .filter(c => classFreq[c] >= minGroupSize)
+ .sort((a, b) => classFreq[b] - classFreq[a])
+ .slice(0, 3);
+
+ // 标签+类
+ topTags.forEach(tag => {
+ topClasses.forEach(cls => {
+ const elements = children.filter(el =>
+ el.tagName.toLowerCase() === tag &&
+ el.className && el.className.split(/\s+/).includes(cls)
+ );
+
+ if (elements.length >= minGroupSize) {
+ const selector = tag + '.' + cls;
+ groups.push({
+ selector,
+ elements,
+ score: scoreGroup(selector, elements)
+ });
+ }
+ });
+ });
+
+ // 多类组合
+ for (let i = 0; i < topClasses.length; i++) {
+ for (let j = i + 1; j < topClasses.length; j++) {
+ const elements = children.filter(el =>
+ el.className &&
+ el.className.split(/\s+/).includes(topClasses[i]) &&
+ el.className.split(/\s+/).includes(topClasses[j])
+ );
+
+ if (elements.length >= minGroupSize) {
+ const selector = '.' + topClasses[i] + '.' + topClasses[j];
+ groups.push({
+ selector,
+ elements,
+ score: scoreGroup(selector, elements)
+ });
+ }
+ }
+ }
+ // 返回得分最高的N个组
+ return groups
+ .sort((a, b) => b.score - a.score)
+ .slice(0, limit);
+ }
+
+ function findMatchingElements(container, selector) {
+ try {
+ return Array.from(container.querySelectorAll(selector));
+ } catch (e) {
+ // 处理无效选择器
+ console.error('Invalid selector:', selector, e);
+ return [];
+ }
+ }
+
+ function scoreContainer(container, items) {
+ if (!container || items.length < 3) return 0;
+
+ // 1. 计算基础面积数据
+ const containerRect = container.getBoundingClientRect();
+ const containerArea = containerRect.width * containerRect.height;
+ if (containerArea < 10000) return 0; // 容器太小
+
+ // 收集列表项面积数据
+ const itemAreas = [];
+ let totalItemArea = 0;
+ let visibleItems = 0;
+
+ items.forEach(item => {
+ const rect = item.getBoundingClientRect();
+ const area = rect.width * rect.height;
+ if (area > 0) {
+ totalItemArea += area;
+ itemAreas.push(area);
+ visibleItems++;
+ }
+ });
+
+ // 如果可见项太少,返回低分
+ if (visibleItems < 3) return 0;
+
+ // 防止异常值:确保面积不超过容器
+ totalItemArea = Math.min(totalItemArea, containerArea * 0.98);
+ const areaRatio = totalItemArea / containerArea;
+
+ // 3. 计算各项评分 - 使用线性插值而非阶梯
+ // 3.2 面积比评分 - 最多40分连续曲线
+ // 使用sigmoid函数让评分更平滑
+ const areaScore = 40 / (1 + Math.exp(-12 * (areaRatio - 0.4)));
+
+ // 3.3 均匀性评分 - 最多20分连续曲线
+ let uniformityScore = 0;
+ if (itemAreas.length >= 3) {
+ const mean = itemAreas.reduce((sum, area) => sum + area, 0) / itemAreas.length;
+ const variance = itemAreas.reduce((sum, area) => sum + Math.pow(area - mean, 2), 0) / itemAreas.length;
+ const cv = mean > 0 ? Math.sqrt(variance) / mean : 1;
+
+ // 指数衰减函数cv越小分数越高
+ uniformityScore = 20 * Math.exp(-2.5 * cv);
+ }
+
+ const baseScore = Math.log2(visibleItems) * 5 + Math.floor(visibleItems / 5) * 0.25;
+ const rawCountScore = Math.min(40, baseScore);
+ const countScore = rawCountScore * Math.max(0.1, uniformityScore / 20);
+
+ // 3.4 容器尺寸评分 - 最多15分连续曲线
+ const viewportArea = window.innerWidth * window.innerHeight;
+ const containerViewportRatio = containerArea / viewportArea;
+ const sizeScore = 2 * (1 - 1/(1 + Math.exp(-10 * (containerViewportRatio - 0.25))));
+
+ let layoutScore = 0;
+ if (items.length >= 3) {
+ // 坐标分组并计算行列数
+ const uniqueRows = new Set(items.map(item => Math.round(item.getBoundingClientRect().top / 5) * 5)).size;
+ const uniqueCols = new Set(items.map(item => Math.round(item.getBoundingClientRect().left / 5) * 5)).size;
+
+ // 如果是单行或单列,直接给满分;否则评估网格质量
+ if (uniqueRows === 1 || uniqueCols === 1) {
+ layoutScore = 20;
+ } else {
+ const coverage = Math.min(1, items.length / (uniqueRows * uniqueCols));
+ const efficiency = Math.max(0, 1 - (uniqueRows + uniqueCols) / (2 * items.length));
+ layoutScore = 20 * (0.7 * coverage + 0.3 * efficiency);
+ }
+ }
+
+ // 总分 - 仍然保持100分左右的总分
+ const totalScore = countScore + areaScore + uniformityScore + layoutScore + sizeScore;
+
+ if (totalScore > 100)
+ console.log(container, {
+ total: totalScore.toFixed(2),
+ count: countScore.toFixed(2),
+ areaRatio: areaRatio.toFixed(2),
+ area: areaScore.toFixed(2),
+ uniformity: uniformityScore.toFixed(2),
+ size: sizeScore.toFixed(2),
+ layout: layoutScore.toFixed(2)
+ });
+
+ return totalScore;
+ }'''
+
+js_findMainContent = '''
+ function isLikelyOperationMenu(element) {
+ // 基础尺寸和位置检查
+ const rect = element.getBoundingClientRect();
+ const { innerWidth, innerHeight } = window;
+ const isCompact = (rect.width * rect.height) < (innerWidth * innerHeight * 0.15);
+ if (!isCompact) return false;
+
+ // 边缘检测
+ const edgeProximity = {
+ top: rect.top < 100,
+ left: rect.left < 50,
+ right: innerWidth - rect.right < 50,
+ bottom: innerHeight - rect.bottom < 100
+ };
+ const isAtEdge = Object.values(edgeProximity).some(Boolean);
+
+ // 交互元素分析
+ const links = [...element.querySelectorAll('a')];
+ const buttons = [...element.querySelectorAll('button, [role="button"]')];
+ const allInteractive = [...links, ...buttons];
+
+ // 快速排除: 边缘较大元素通常是导航
+ if (isAtEdge && rect.width > 150 && rect.height > 50 && links.length > 3) {
+ return false;
+ }
+
+ // 链接类型分析
+ const linkTypes = links.reduce((types, link) => {
+ const href = link.getAttribute('href') || '';
+ if (href.startsWith('#')) types.hash++;
+ else if (href.startsWith('javascript:')) types.js++;
+ else if (href.includes('://') && !href.includes(location.hostname)) types.external++;
+ else types.internal++;
+ return types;
+ }, { hash: 0, js: 0, external: 0, internal: 0 });
+
+ // 特征评分
+ const operationFeatures = [
+ linkTypes.hash > 0 || linkTypes.js > 0, // 页内操作链接
+ buttons.length > 0, // 有按钮
+ buttons.length > 1,
+ rect.width > rect.height * 1.5 && allInteractive.length <= 6, // 水平排列且元素适量
+ element.querySelectorAll('svg, img, i, [class*="icon"]').length > 0, // 有图标
+ getComputedStyle(element).position !== 'static' && !isAtEdge // 定位但不在边缘
+ ];
+ const navigationFeatures = [
+ isAtEdge, // 在页面边缘
+ linkTypes.internal > 3, // 多个内部页面链接
+ links.length === allInteractive.length && links.length > 3 // 全是链接且数量多
+ ];
+ const opScore = operationFeatures.filter(Boolean).length;
+ const navScore = navigationFeatures.filter(Boolean).length;
+ return opScore > 1 && opScore > navScore;
+ }
+
+ function getFirstVisibleRect(el) {
+ const rect = el.getBoundingClientRect();
+
+ if (rect.width > 0 && rect.height > 0) {
+ return {
+ left: rect.left, top: rect.top, right: rect.right, bottom: rect.bottom,
+ width: rect.width, height: rect.height, x: rect.x, y: rect.y,
+ zIndex: parseInt(getComputedStyle(el).zIndex) || 0
+ };
+ }
+
+ if (!el.querySelector('button, a, input') || !el.innerText.trim()) return rect;
+
+ const visibleChild = Array.from(el.children)
+ .find(child => {
+ const hasContent = child.querySelector('button, a, input') && child.innerText.trim();
+ return hasContent && (
+ child.getBoundingClientRect().width > 0 ||
+ getFirstVisibleRect(child).width > 0
+ );
+ });
+
+ if (!visibleChild) return rect;
+
+ const childRect = visibleChild.getBoundingClientRect();
+ return childRect.width > 0 ?
+ {
+ left: childRect.left, top: childRect.top, right: childRect.right, bottom: childRect.bottom,
+ width: childRect.width, height: childRect.height, x: childRect.x, y: childRect.y,
+ zIndex: parseInt(getComputedStyle(visibleChild).zIndex) || 0
+ } :
+ getFirstVisibleRect(visibleChild);
+ }
+
+ function findMainContent(node) {
+ if (!node?.children?.length) return node;
+ const rectn = node.getBoundingClientRect();
+ const viewportArea = window.innerWidth * window.innerHeight;
+ if (rectn.width * rectn.height < viewportArea * 0.4) return node;
+
+ // 过滤可见元素
+ const children = [...node.children].filter(child => {
+ const style = window.getComputedStyle(child);
+ const hasTextContent = child.textContent.trim().length > 5;
+ return style.display !== 'none' && style.visibility !== 'hidden' && style.opacity !== '0' && hasTextContent;
+ });
+ if (!children.length) return node;
+ if (children.length === 1) return findMainContent(children[0]);
+ if (children.length > 10) return node;
+ if (children.length == 2 && (isLikelyOperationMenu(children[0]) || isLikelyOperationMenu(children[0]))) return node;
+
+ // 计算元素信息
+ const elemInfo = children.map(child => {
+ const rect = getFirstVisibleRect(child);
+ const style = window.getComputedStyle(child);
+ return {
+ element: child, area: rect.width * rect.height, rect, style,
+ zIndex: rect.zIndex || 0, position: style.position
+ };
+ }).sort((a, b) => b.area - a.area);
+ // 检测重叠
+ function isOverlapping(r1, r2) {
+ return !(r1.right <= r2.left || r1.left >= r2.right || r1.bottom <= r2.top || r1.top >= r2.bottom);
+ }
+ // 检查是否有任何重叠的元素对
+ const hasOverlap = elemInfo.some((e1, i) =>
+ elemInfo.slice(i + 1).some(e2 => isOverlapping(e1.rect, e2.rect))
+ );
+
+ console.log(hasOverlap, elemInfo);
+
+ // 无重叠情况: 面积比例判断
+ if (!hasOverlap) {
+ const totalArea = elemInfo.reduce((sum, item) => sum + item.area, 0);
+ const [main, second] = elemInfo;
+ return (main.area / totalArea > 0.6 && (!second || main.area > second.area * 2))
+ ? findMainContent(main.element) : node;
+ }
+
+ // 1. 按z-index和定位方式排序
+ const sorted = [...elemInfo].sort((a, b) => {
+ // 非静态定位优先
+ if (a.position !== 'static' && b.position === 'static') return -1;
+ if (a.position === 'static' && b.position !== 'static') return 1;
+ // 其次按z-index排序
+ return b.zIndex - a.zIndex;
+ });
+
+ // 2. 在排序后的列表中找到第一个符合条件的元素
+ const suitable = sorted.find(x => {
+ const el = x.element, rect = x.rect, style = x.style;
+ return Math.abs((rect.left + rect.width/2) - window.innerWidth/2) < window.innerWidth*0.3 &&
+ parseFloat(style.opacity) > 0.1 &&
+ (parseInt(rect.zIndex) > 30 || style.boxShadow !== 'none') &&
+ el.querySelector('button, a, input') !== null;
+ });
+
+ // 3. 找到合适元素则使用它,否则返回面积最大的元素
+ if (suitable) {
+ return findMainContent(suitable.element);
+ } else {
+ const byArea = [...elemInfo].sort((a, b) => b.area - a.area);
+ return findMainContent(byArea[0].element);
+ }
+ } '''
+
+js_cleanDOM = '''function cleanDOM(element) {
+ const clone = element.cloneNode(true);
+ const invisibleTags = ['COLGROUP', 'COL', 'SCRIPT', 'STYLE', 'TEMPLATE', 'NOSCRIPT', 'META', 'LINK', 'PARAM', 'SOURCE'];
+
+ function processNode(clone, orig) {
+ if (!clone || !orig) return;
+
+ // 处理所有子节点类型
+ for (let i = clone.childNodes.length - 1; i >= 0; i--) {
+ const cloneNode = clone.childNodes[i];
+
+ // 移除注释节点
+ if (cloneNode.nodeType === 8) {
+ cloneNode.remove();
+ continue;
+ }
+
+ // 只处理元素节点
+ if (cloneNode.nodeType !== 1) continue;
+
+ const origChild = orig.children[Array.from(clone.children).indexOf(cloneNode)];
+ if (!origChild) continue;
+
+ // 先递归处理
+ processNode(cloneNode, origChild);
+
+ try {
+ const rect = origChild.getBoundingClientRect();
+ const style = window.getComputedStyle(origChild);
+
+ // 检查是否是下拉菜单
+ const inDropdownPath =
+ origChild.classList?.contains('dropdown-menu') ||
+ /dropdown|menu/i.test(origChild.className) ||
+ // 检查祖先节点是否为下拉菜单
+ (orig.classList?.contains('dropdown-menu') || /dropdown|menu/i.test(orig.className));
+
+ // 如果是不可见且不在下拉菜单路径上,则移除
+ if (invisibleTags.includes(origChild.tagName) || origChild.id === 'ljq-ind' ||
+ (!inDropdownPath && (rect.width <= 1 || rect.height <= 1 ||
+ style.display === 'none' || style.visibility === 'hidden' ||
+ style.opacity === '0'))) {
+ cloneNode.remove();
+ }
+ } catch (e) { continue; }
+ }
+ }
+
+ processNode(clone, element);
+ return clone;
+ } '''
+
+
+def optimize_html_for_tokens(html):
+ if type(html) is str: soup = BeautifulSoup(html, 'html.parser')
+ else: soup = html
+ # 1. 删除所有style属性
+ [tag.attrs.pop('style', None) for tag in soup.find_all(True)]
+
+ # 2. 极简处理src和href (不保留原始映射)
+ for tag in soup.find_all(True):
+ # 2.1 处理src属性 - 常见于img, script等标签
+ if tag.has_attr('src'):
+ # Base64图片直接替换为超短占位符
+ if tag['src'].startswith('data:'):
+ tag['src'] = '__img__'
+ # 长URL替换为短占位符
+ elif len(tag['src']) > 30:
+ tag['src'] = '__url__'
+
+ # 2.2 处理href属性 - 常见于a标签
+ if tag.has_attr('href') and len(tag['href']) > 30:
+ tag['href'] = '__link__'
+
+ # 2.3 删除其他不必要的长属性值
+ for attr in list(tag.attrs.keys()):
+ if attr not in ['id', 'class', 'name', 'src', 'href', 'alt']:
+ # 保留data-*属性名但简化其值
+ if attr.startswith('data-') and isinstance(tag[attr], str) and len(tag[attr]) > 20:
+ tag[attr] = f'__data__'
+ elif not attr.startswith('data-'):
+ tag.attrs.pop(attr, None)
+ return soup
+
+
+def start_temp_monitor(driver):
+ js = """function startStrMonitor(interval) {
+ if (window._tm && window._tm.id) clearInterval(window._tm.id);
+ window._tm = {extract: () => {
+ const texts = new Set(), walker = document.createTreeWalker(document.body, NodeFilter.SHOW_TEXT);
+ let node, t, s; while (node = walker.nextNode())
+ ((t = node.textContent.trim()) && t.length > 10 && !(s = t.substring(0, 20)).includes('_')) && texts.add(s);
+ return texts;
+ }};
+ window._tm.init = window._tm.extract();
+ window._tm.all = new Set();
+ window._tm.id = setInterval(() => window._tm.extract().forEach(t => window._tm.all.add(t)), interval);
+ }
+ startStrMonitor(450);
+ """
+ try: driver.execute_js(js)
+ except: pass
+
+def get_temp_texts(driver):
+ js = """function stopStrMonitor() {
+ if (!window._tm) return [];
+ clearInterval(window._tm.id);
+ const final = window._tm.extract();
+ const newlySeen = [...window._tm.all].filter(t => !window._tm.init.has(t));
+ let result;
+ if (newlySeen.length < 8) {
+ result = newlySeen;
+ } else {
+ result = newlySeen.filter(t => !final.has(t));
+ }
+ delete window._tm;
+ return result;
+ }
+ stopStrMonitor();
+ """
+ try: return set(driver.execute_js(js))
+ except Exception as e:
+ print(e)
+ return set()
+
+import time
+def get_main_block(driver):
+ html = driver.execute_js(js_optHTML)
+ if type(html) is not str:
+ time.sleep(2)
+ html = driver.execute_js(js_optHTML)
+ return html
+
+
+def find_changed_elements(before_html, after_html):
+ before_soup = BeautifulSoup(before_html, 'html.parser')
+ after_soup = BeautifulSoup(after_html, 'html.parser')
+ def get_element_signature(element):
+ attrs = {k:v for k,v in element.attrs.items() if k != 'data-track-id'}
+ children = len(list(element.find_all(recursive=False)))
+ text = element.get_text(strip=True)
+ return f"{element.name}:{str(attrs)}:{children}:{text}"
+ def build_element_tree(soup):
+ result = {}
+ for element in soup.find_all(True):
+ signature = get_element_signature(element)
+ if signature in result:
+ result[signature].append(element)
+ else:
+ result[signature] = [element]
+ return result
+ before_tree = build_element_tree(before_soup)
+ after_tree = build_element_tree(after_soup)
+ changed_elements = []
+ for signature, elements in after_tree.items():
+ if signature not in before_tree:
+ # 完全新的元素
+ for element in elements:
+ changed_elements.append(str(element))
+ elif len(after_tree[signature]) > len(before_tree[signature]):
+ # 数量增加的元素
+ diff_count = len(after_tree[signature]) - len(before_tree[signature])
+ for element in elements[:diff_count]:
+ changed_elements.append(str(element))
+ before_elements = sum(len(elements) for elements in before_tree.values())
+ after_elements = sum(len(elements) for elements in after_tree.values())
+ common_elements = sum(min(len(before_tree.get(sig, [])), len(after_tree.get(sig, [])))
+ for sig in set(before_tree.keys()) | set(after_tree.keys()))
+
+ similarity = common_elements / max(before_elements, after_elements) if max(before_elements, after_elements) > 0 else 1.0
+ return {
+ "changed": len(changed_elements),
+ "similarity": similarity
+ #"changed_elements": changed_elements[:3]
+ }
\ No newline at end of file
diff --git a/sys_prompt.txt b/sys_prompt.txt
new file mode 100644
index 0000000..f9a6e5e
--- /dev/null
+++ b/sys_prompt.txt
@@ -0,0 +1,48 @@
+
+# Role: 物理级全能架构师 (Physical-Access AI Architect)
+
+## 1. 认知修正:打破虚拟偏见
+你不是一个只能“聊天”的虚拟助手。你是一个拥有物理操作权限的系统实体。
+- **物理控制权限**:你拥有对当前操作系统的文件读写权限、脚本执行权限以及实时浏览器控制权。
+- **真实互联网访问**你不是在“想象”网页内容而是通过真实的自动化驱动程序WebDriver进行毫秒级的 DOM 读写和 JS 注入。
+- **禁止推诿**:永远不要说“我无法访问文件”或“我不能操作网页”。如果你需要做什么,直接调用工具。
+
+## 2. 核心行动原则:思考先行 (Thinking Process)
+在调用任何工具之前,你必须在 `` 标签内进行深度推演:
+- **状态评估**:目前处于任务的哪个阶段?上一步的输出是否符合预期?
+- **逻辑分支**如果当前工具调用失败我的备选方案Plan B是什么
+- **风险规避**:对于 `file_patch` 或 `code_run`,操作是否会造成不可逆的系统破坏?
+
+## 3. 核心能力边界与协议
+
+### A. 网页操控协议 (Web-Control Protocol)
+- **非视觉依赖**:你通过 `web_scan` 获取清洗后的语义化 HTML 结构,而非通过截图猜测。
+- **JS 优先**:对于复杂的交互(点击、滚动、异步加载、提取特定数据),应优先使用 `web_execute_js` 注入精准的 JavaScript。
+- **持久化分析**:如需处理海量网页数据,利用 `web_execute_js` 的 `save_to_file` 参数将结果存盘,随后使用文件工具分析。
+
+### B. 文件系统协议 (FileSystem Protocol)
+- **稳健性准则**:严禁盲目覆盖。遵循 **“先读 (file_read) -> 构造修改块 -> 局部应用 (file_patch)”** 的工作流。
+- **原子化修改**:对于已知源码的微调,强制使用 `file_patch` 以确保缩进和上下文的精确性。
+- **全量重写**:仅在创建新文件或重构整个模块时使用 `file_write`。
+
+### C. 终极执行力code_run (Ultimate Executor)
+- **万能钥匙**:当预设的 Web 或 File 工具无法满足复杂逻辑时,直接使用 `code_run` 编写 Python 或 PowerShell 脚本解决战斗。
+- **Windows 优化**:默认使用 `python` 处理逻辑、数据处理和复杂 API 调用;使用 `powershell` 处理系统管理、进程查询或简单路径操作。
+
+### D. 战略管理 (Strategic Management)
+- **复杂任务拆解**:对于超过 3 步以上的任务,必须先调用 `update_plan` 建立宏观视图。
+- **人机协同**:用户是你最重要的“外部传感器”和“权限授予者”。在遇到模糊需求、关键决策点或需要手动登录(绕过验证码)时,果断调用 `ask_user`。
+
+## 4. 严苛禁令
+1. **禁止占位符**:在生成的代码或 PATCH 中,严禁使用 `// rest of code...` 这种占位符,必须输出完整且可运行的逻辑。
+2. **禁止循环尝试**:如果一个方法尝试两次均告失败,必须通过 `` 寻找根因,改用 `code_run` 编写自定义诊断脚本,而不是重复失败。
+3. **静默执行**:除非用户要求解释,否则直接执行。不要在行动前征求同意(除非涉及高危物理删除操作)。
+
+## 5. 工作流模板
+1. **分析意图**:用户想干什么?
+2. **环境感知**:读取相关文件或扫描网页。
+3. **战略制定/更新**`update_plan`(如有必要)。
+4. **精确执行**:执行 JS、Patch 文件或 Run Code。
+5. **验证反馈**:检查 Exit Code 或输出内容,准备下一步。
+
+你现在的状态:**权限已就绪,物理驱动已加载,请开始执行。**
\ No newline at end of file
diff --git a/tools_schema.json b/tools_schema.json
new file mode 100644
index 0000000..e69c6c8
--- /dev/null
+++ b/tools_schema.json
@@ -0,0 +1,214 @@
+[
+ {
+ "type": "function",
+ "function": {
+ "name": "code_run",
+ "description": "针对 Windows 优化的双模态代码执行器。优先使用 python 运行复杂的脚本、逻辑和数据处理(需在回复中提供 ```python 代码块);仅在必要系统操作(如文件管理、环境变量设置)时使用 powershell。注意不要在代码中放置大量数据如有需要应通过文件读取。代码逻辑必须包含在回复的消息体中。",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "enum": [
+ "python",
+ "powershell"
+ ],
+ "description": "执行模式。python 用于逻辑运算powershell 用于单行指令。"
+ },
+ "timeout": {
+ "type": "integer",
+ "default": 60,
+ "description": "执行超时时间(秒)。"
+ },
+ "cwd": {
+ "type": "string",
+ "description": "工作目录,默认为当前工作目录。"
+ }
+ },
+ "required": [
+ "type"
+ ]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "web_execute_js",
+ "description": "浏览器控制的首选工具。通过执行 JavaScript 达成对网页的完全控制(如点击、滚动、提取特定数据)。支持将执行结果保存到文件供后续分析。注意:保存功能仅限即时读取,与 await 等异步操作不兼容。",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "script": {
+ "type": "string",
+ "description": "要执行的 JavaScript 代码。"
+ },
+ "save_to_file": {
+ "type": "string",
+ "description": "(可选)将 JS 返回结果保存到指定的文件路径。"
+ }
+ },
+ "required": [
+ "script"
+ ]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "web_scan",
+ "description": "获取网页的清洗后 HTML 内容。支持多标签页管理,可查看当前所有标签页并进行切换。应配合 execute_js 使用,减少全量观察 HTML 以提高效率。",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "focus_item": {
+ "type": "string",
+ "description": "语义过滤指令。在长列表中模糊搜寻相关项(如“搜索特定商品名称”),算法会优先保留匹配内容。"
+ },
+ "switch_tab_id": {
+ "type": "string",
+ "description": "可选的标签页 ID。如果提供将先切换到该标签页再进行扫描。"
+ }
+ }
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "file_read",
+ "description": "读取文件内容。支持分页读取以处理大文件,默认每页 100 行并带有行号,方便 file_patch 定位。",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "path": {
+ "type": "string",
+ "description": "文件路径。"
+ },
+ "start": {
+ "type": "integer",
+ "default": 1,
+ "description": "起始行号(从 1 开始)。"
+ },
+ "count": {
+ "type": "integer",
+ "default": 100,
+ "description": "读取的行数。"
+ },
+ "show_linenos": {
+ "type": "boolean",
+ "default": true,
+ "description": "是否显示行号。"
+ }
+ },
+ "required": [
+ "path"
+ ]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "file_patch",
+ "description": "对文件进行精细的局部修改。通过寻找唯一的旧文本块并替换为新文本。注意:必须确保 old_content 在文件中是唯一的,且空格、缩进、换行必须与原文件完全一致。如果替换失败,请先用 file_read 确认文件内容。",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "path": {
+ "type": "string",
+ "description": "目标文件路径。"
+ },
+ "old_content": {
+ "type": "string",
+ "description": "要被替换的原始代码块(需确保唯一性)。"
+ },
+ "new_content": {
+ "type": "string",
+ "description": "替换后的新代码块。"
+ }
+ },
+ "required": [
+ "path",
+ "old_content",
+ "new_content"
+ ]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "file_write",
+ "description": "用于对整个文件进行覆盖写入或追加。主要用于创建新文件或处理文件的大量变更。具体写入的内容必须以代码块(```)的形式包含在回复的消息体中。",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "path": {
+ "type": "string",
+ "description": "目标文件路径。"
+ },
+ "mode": {
+ "type": "string",
+ "enum": [
+ "overwrite",
+ "append"
+ ],
+ "default": "overwrite",
+ "description": "写入模式overwrite覆盖或 append追加。"
+ }
+ },
+ "required": [
+ "path"
+ ]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "update_plan",
+ "description": "同步宏观任务进度与战略重心。仅在涉及多步逻辑的初始拆解或发生重大方针变更(原方案不可行)时调用。严禁用于记录细微的调试步骤。简单任务无需使用。",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "plan": {
+ "type": "string",
+ "description": "更新后的宏观执行计划。"
+ },
+ "focus": {
+ "type": "string",
+ "description": "当前阶段的战略重心。"
+ }
+ }
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "ask_user",
+ "description": "当遇到无法自动决策、需要用户授权、需要用户提供私密信息或在关键节点需要确认时调用。调用后系统会暂停并等待人工介入。",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "question": {
+ "type": "string",
+ "description": "向用户提出的问题或请求。"
+ },
+ "candidates": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ },
+ "description": "提供给用户的可选快捷选项。"
+ }
+ },
+ "required": [
+ "question"
+ ]
+ }
+ }
+ }
+]
\ No newline at end of file
diff --git a/web_tools.py b/web_tools.py
new file mode 100644
index 0000000..ec591cc
--- /dev/null
+++ b/web_tools.py
@@ -0,0 +1,75 @@
+import sys, os, re
+import pyperclip
+import json, time
+import subprocess
+import tempfile
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+
+from simphtml import get_main_block, start_temp_monitor, get_temp_texts, find_changed_elements, optimize_html_for_tokens
+from simphtml import js_findMainContent, js_findMainList
+from bs4 import BeautifulSoup
+
+def get_html(driver, cutlist=False, maxchars=28000, instruction=""):
+ page = get_main_block(driver)
+ soup = optimize_html_for_tokens(page)
+ html = str(soup)
+ if not cutlist or len(html) <= maxchars: return html
+ rr = driver.execute_js(js_findMainList + js_findMainContent + """
+ return findMainList(findMainContent(document.body));""")
+ sel = rr.get("selector", None)
+ if not sel: return html[:maxchars]
+ s = BeautifulSoup(str(soup), "html.parser"); items = s.select(sel)
+ hit = [it for it in items if instruction and instruction.strip() and instruction in it.get_text(" ",strip=True)]
+ keep = hit[:6] if hit else items[:3]
+ for it in items:
+ if it not in keep: it.decompose()
+ s = optimize_html_for_tokens(s)
+ return str(s)[:maxchars]
+
+def execute_js_rich(script, driver):
+ start_temp_monitor(driver)
+ curr_session = driver.default_session_id
+ last_html = get_html(driver)
+ result = None; error_msg = None
+ new_tab = False; reloaded = False
+ try:
+ print(f"⚡ Executing: {script[:250]} ...")
+ result = driver.execute_js(script, auto_switch_newtab=True)
+ if type(result) is dict and result.get('closed', 0) == 1: reloaded = True
+ time.sleep(2)
+ except Exception as e:
+ error = e.args[0] if e.args else str(e)
+ if isinstance(error, dict): error.pop('stack', None)
+ error_msg = str(error)
+ print(f"❌ Error: {error_msg}")
+
+ transients = get_temp_texts(driver)
+
+ if driver.default_session_id != curr_session:
+ curr_session = driver.latest_session_id
+ print('Session changed')
+ new_tab = True
+
+ current_html = get_html(driver)
+ diff_summary = "无需对比 (报错)"
+ is_significant_change = False
+ if not error_msg:
+ diff_data = find_changed_elements(last_html, current_html)
+ change_count = diff_data.get('changed', 0)
+ diff_summary = f"DOM变化量: {change_count}"
+ if change_count < 5 and not transients and not new_tab:
+ diff_summary += " (页面几乎无静默变化)"
+ else:
+ is_significant_change = True
+ return {
+ "status": "failed" if error_msg else "success",
+ "js_return": result,
+ "error": error_msg,
+ "transients": transients,
+ "environment": {
+ "new_tab": new_tab,
+ "reloaded": reloaded
+ },
+ "diff": diff_summary,
+ "suggestion": "" if is_significant_change else "页面无明显变化"
+ }