diff --git a/.vs/GenericAgent/FileContentIndex/64d56503-36b1-4f61-8f06-cfc8adaca535.vsidx b/.vs/GenericAgent/FileContentIndex/64d56503-36b1-4f61-8f06-cfc8adaca535.vsidx new file mode 100644 index 0000000..f5a6b2c Binary files /dev/null and b/.vs/GenericAgent/FileContentIndex/64d56503-36b1-4f61-8f06-cfc8adaca535.vsidx differ diff --git a/.vs/GenericAgent/v17/.wsuo b/.vs/GenericAgent/v17/.wsuo new file mode 100644 index 0000000..df42d5d Binary files /dev/null and b/.vs/GenericAgent/v17/.wsuo differ diff --git a/.vs/GenericAgent/v17/DocumentLayout.json b/.vs/GenericAgent/v17/DocumentLayout.json new file mode 100644 index 0000000..033598c --- /dev/null +++ b/.vs/GenericAgent/v17/DocumentLayout.json @@ -0,0 +1,12 @@ +{ + "Version": 1, + "WorkspaceRootPath": "C:\\Users\\Ljq\\Documents\\mywork\\MyTools\\AutoOperation\\webagent\\GenericAgent\\", + "Documents": [], + "DocumentGroupContainers": [ + { + "Orientation": 0, + "VerticalTabListWidth": 256, + "DocumentGroups": [] + } + ] +} \ No newline at end of file diff --git a/.vs/ProjectSettings.json b/.vs/ProjectSettings.json new file mode 100644 index 0000000..f8b4888 --- /dev/null +++ b/.vs/ProjectSettings.json @@ -0,0 +1,3 @@ +{ + "CurrentProjectSetting": null +} \ No newline at end of file diff --git a/.vs/VSWorkspaceState.json b/.vs/VSWorkspaceState.json new file mode 100644 index 0000000..6b61141 --- /dev/null +++ b/.vs/VSWorkspaceState.json @@ -0,0 +1,6 @@ +{ + "ExpandedNodes": [ + "" + ], + "PreviewInSolutionExplorer": false +} \ No newline at end of file diff --git a/.vs/slnx.sqlite b/.vs/slnx.sqlite new file mode 100644 index 0000000..365ac73 Binary files /dev/null and b/.vs/slnx.sqlite differ diff --git a/agent_loop.py b/agent_loop.py index 673dd3e..b7d1e77 100644 --- a/agent_loop.py +++ b/agent_loop.py @@ -24,6 +24,8 @@ class BaseHandler: ret = yield from try_call_generator(getattr(self, method_name), args, response) _ = yield from try_call_generator(self.tool_after_callback, tool_name, args, response, ret) return ret + elif tool_name == 'bad_json': + return StepOutcome(None, next_prompt=args.get('msg', 'bad_json'), should_exit=False) else: yield f"❌ 未知工具: {tool_name}\n" return StepOutcome(None, next_prompt=f"未知工具 {tool_name}", should_exit=False) diff --git a/agentmain.py b/agentmain.py index 3d537f0..62f91ac 100644 --- a/agentmain.py +++ b/agentmain.py @@ -30,15 +30,14 @@ class GeneraticAgent: if not os.path.exists('temp'): os.makedirs('temp') from sidercall import sider_cookie, oai_configs, claude_configs llm_sessions = [] + for cfg in claude_configs.values(): + llm_sessions += [ClaudeSession(api_key=cfg['apikey'], api_base=cfg['apibase'], model=cfg['model'])] if sider_cookie: llm_sessions += [SiderLLMSession(default_model=x) for x in \ ["gemini-3.0-flash", "claude-haiku-4.5", "kimi-k2"]] for cfg in oai_configs.values(): llm_sessions += [LLMSession(api_key=cfg['apikey'], api_base=cfg['apibase'], model=cfg['model'])] - for cfg in claude_configs.values(): - llm_sessions += [ClaudeSession(api_key=cfg['apikey'], api_base=cfg['apibase'], model=cfg['model'])] if len(llm_sessions) > 0: - llmclient = ToolClient(llm_sessions, auto_save_tokens=True) - self.llmclient = llmclient + self.llmclient = ToolClient(llm_sessions, auto_save_tokens=True) else: self.llmclient = None self.lock = threading.Lock() diff --git a/ga.py b/ga.py index 0c35b89..f62368b 100644 --- a/ga.py +++ b/ga.py @@ -84,20 +84,10 @@ def code_run(code, code_type="python", timeout=60, cwd=None, code_cwd=None, stop def ask_user(question: str, candidates: list = None): + """question: 向用户提出的问题。candidates: 可选的候选项列表。需要保证should_exit为True """ - 构造一个中断请求。 - question: 向用户提出的问题。 - candidates: 可选的候选项列表。 - 需要保证should_exit为True - """ - return { - "status": "INTERRUPT", - "intent": "HUMAN_INTERVENTION", - "data": { - "question": question, - "candidates": candidates or [] - } - } + return {"status": "INTERRUPT", "intent": "HUMAN_INTERVENTION", + "data": {"question": question, "candidates": candidates or []}} from simphtml import execute_js_rich, get_html diff --git a/restore_commit.txt b/restore_commit.txt new file mode 100644 index 0000000..556d1bf --- /dev/null +++ b/restore_commit.txt @@ -0,0 +1,2964 @@ +commit 9b20ca82972ec66622193846801630fd356ce231 +Author: Liang Jiaqing +Date: Fri Jan 16 23:50:19 2026 +0800 + + fix: restore files removed by mistake and keep zip ignored + +diff --git a/README.md b/README.md +new file mode 100644 +index 0000000..4b399e9 +--- /dev/null ++++ b/README.md +@@ -0,0 +1,52 @@ ++# pc-agent-loop ++ ++pc-agent-loop 是一个**极致简约**的 PC 级自主 AI Agent 框架。它通过不到 100 行的核心代码和约 200 行的工具实现,构筑了把整个pc给它(浏览器、终端、文件系统)的物理级自动化能力。 ++ ++## 🚀 核心特性 ++ ++- **极简设计**: 仅由 **7 个基本工具** 和一个高效的 **Agentic Loop** 构成,拒绝过度设计。 ++- **自主代码执行 (Code Execution)**: 能够根据任务需求自主编写并运行 Python 或 PowerShell 脚本,直接操控系统资源。 ++- **深度 Web 自动化 (Advanced Web Automation)**: ++ - **语义化扫描**: 自动清洗 HTML 内容,将复杂的 DOM 转化为 AI 易读的结构。 ++ - **JS 注入执行**: 在浏览器上下文中执行自定义 JavaScript,实现精准点击、滚动或数据抓取。 ++ - **TMWebDriver**: 支持通过 Tampermonkey 实现的持久化会话驱动。 ++- **精准文件编辑 (Smart File Patching)**: 并非盲目覆盖,而是支持通过 `file_patch` 以代码块匹配方式进行精确修改。 ++- **人机协作模式 (Human-in-the-loop)**: 在遇到验证码、关键权限或模糊决策时,主动请求用户介入。 ++ ++## 📂 项目结构 ++ ++- `agent_loop.py`: **核心引擎**,负责“感知-思考-行动”的自主循环逻辑。 ++- `ga.py`: **工具箱**,定义了 7 大核心原子工具的具体实现。 ++- `agentapp.py`: 基于 Streamlit 构建的轻量化交互式 Web 界面。 ++- `sidercall.py`: LLM 通信层,支持流式输出与 API 调用。 ++- `TMWebDriver.py`: 浏览器驱动模块(需配合 Tampermonkey 脚本使用)。 ++ ++## 🛠️ 快速开始 ++ ++### 1. 环境准备 ++- 安装 Python 3.8+。 ++- (可选)若需网页自动化,请在浏览器中安装 **Tampermonkey** 插件并导入本项目提供的对应脚本。 ++ ++### 2. 安装依赖 ++缺啥装啥 ++ ++### 3. 启动应用 ++在项目根目录下执行: ++```bash ++python launch.pyw ++``` ++ ++## 🧩 7 大核心工具 ++ ++Agent 仅依靠以下 7 个原子工具的组合来完成复杂任务: ++ ++1. **`code_run`**: 针对 Windows 优化的双模态代码执行器(Python/PowerShell)。 ++2. **`web_scan`**: 获取网页清洗后的语义化 HTML 结构,支持多标签管理。 ++3. **`web_execute_js`**: 网页 JS 脚本注入,支持将结果存盘分析。 ++4. **`file_read`**: 分页式文件读取,支持行号定位。 ++5. **`file_write`**: 文件全量写入或追加。 ++6. **`file_patch`**: 基于源码块匹配的精准局部修改,确保缩进一致性。 ++7. **`ask_user`**: 关键节点请求人类干预。 ++ ++--- ++**⚠️ 警告**: 本 Agent 具备执行本地代码和控制操作系统的物理权限。请务必在受信任的环境中运行,并在运行前仔细检查 Agent 的执行意图。 +\ No newline at end of file +diff --git a/TMWebDriver.py b/TMWebDriver.py +new file mode 100644 +index 0000000..0f58e79 +--- /dev/null ++++ b/TMWebDriver.py +@@ -0,0 +1,285 @@ ++import json, threading, time, uuid, queue, socket, requests ++from typing import Dict, Any, Optional, List ++from simple_websocket_server import WebSocketServer, WebSocket ++from bs4 import BeautifulSoup ++import bottle, random ++from bottle import route, template, request, response ++ ++class Session: ++ def __init__(self, session_id, info, client=None): ++ self.id = session_id ++ self.info = info ++ self.connect_at = time.time() ++ self.disconnect_at = None ++ self.type = info.get('type', 'ws') ++ self.ws_client = client if self.type == 'ws' else None ++ self.http_queue = client if self.type == 'http' else None ++ @property ++ def url(self): return self.info.get('url', '') ++ def is_active(self): ++ return self.disconnect_at is None ++ def reconnect(self, client, info): ++ self.info = info ++ self.type = info.get('type', 'ws') ++ if self.type == 'ws': ++ self.ws_client = client ++ self.http_queue = None ++ elif self.type == 'http': ++ self.http_queue = client ++ self.connect_at = time.time() ++ self.disconnect_at = None ++ def mark_disconnected(self): ++ self.disconnect_at = time.time() ++ ++ ++class TMWebDriver: ++ def __init__(self, host: str = 'localhost', port: int = 18765): ++ self.host = host ++ self.port = port ++ self.sessions = {} ++ self.results = {} ++ ++ self.default_session_id = None ++ self.latest_session_id = None ++ self.last_cmd_time = 0 ++ self.is_remote = socket.socket().connect_ex((host, port+1)) == 0 ++ if not self.is_remote: ++ self.start_ws_server() ++ self.start_http_server() ++ else: ++ self.remote = f'http://{self.host}:{self.port+1}/link' ++ ++ def start_http_server(self): ++ self.app = app = bottle.Bottle() ++ ++ @app.route('/api/longpoll', method=['GET', 'POST']) ++ def long_poll(): ++ data = request.json ++ session_id = data.get('sessionId') ++ session_info = {'url': data.get('url'), 'title': data.get('title', ''), 'type': 'http'} ++ if session_id not in self.sessions: ++ session = Session(session_id, session_info, queue.Queue()) ++ print(f"Browser http connected: {session.url} (Session: {session_id})") ++ self.sessions[session_id] = session ++ session = self.sessions[session_id] ++ if session.type == 'http': msgQ = session.http_queue ++ else: return json.dumps({"id": "", "ret": "use ws"}) ++ try: return msgQ.get(timeout=5) ++ except queue.Empty: return json.dumps({"id": "", "ret": "next long-poll"}) ++ ++ @app.route('/api/result', method=['GET','POST']) ++ def result(): ++ data = request.json ++ if data.get('type') == 'result': ++ self.results[data.get('id')] = {'success': True, 'data': data.get('result'), 'newTabs': data.get('newTabs', [])} ++ elif data.get('type') == 'error': ++ self.results[data.get('id')] = {'success': False, 'data': data.get('error')} ++ return 'ok' ++ ++ @app.route('/link', method=['GET','POST']) ++ def link(): ++ data = request.json ++ if data.get('cmd') == 'get_all_sessions': return json.dumps({'r': self.get_all_sessions()}, ensure_ascii=False) ++ if data.get('cmd') == 'find_session': ++ url_pattern = data.get('url_pattern', '') ++ return json.dumps({'r': self.find_session(url_pattern)}, ensure_ascii=False) ++ if data.get('cmd') == 'execute_js': ++ session_id = data.get('sessionId') ++ code = data.get('code') ++ timeout = float(data.get('timeout', 10.0)) ++ auto_switch_newtab = data.get('auto_switch_newtab', False) ++ try: ++ result = self.execute_js(code, timeout=timeout, session_id=session_id, auto_switch_newtab=auto_switch_newtab) ++ newTabs = result.get('newTabs', []) if isinstance(result, dict) else [] ++ return json.dumps({'result': result, 'newTabs': newTabs}, ensure_ascii=False) ++ except Exception as e: ++ return json.dumps({'error': str(e)}, ensure_ascii=False) ++ return 'ok' ++ ++ def run(): ++ import asyncio ++ loop = asyncio.new_event_loop() ++ asyncio.set_event_loop(loop) ++ bottle.run(app, host=self.host, port=self.port+1, server='tornado') ++ ++ http_thread = threading.Thread(target=run) ++ http_thread.daemon = True ++ http_thread.start() ++ ++ def clean_sessions(self): ++ sids = list(self.sessions.keys()) ++ for sid in sids: ++ session = self.sessions[sid] ++ if not session.is_active() and time.time() - session.disconnect_at > 600: ++ del self.sessions[sid] ++ ++ def start_ws_server(self) -> None: ++ driver = self ++ class JSExecutor(WebSocket): ++ def handle(self) -> None: ++ try: ++ data = json.loads(self.data) ++ if data.get('type') == 'ready': ++ session_id = data.get('sessionId') ++ session_info = {'url': data.get('url'), 'title': data.get('title', ''), ++ 'connected_at': time.time(), 'type': 'ws'} ++ driver._register_client(session_id, self, session_info) ++ elif data.get('type') in 'result': ++ driver.results[data.get('id')] = {'success': True, 'data': data.get('result'), 'newTabs': data.get('newTabs', [])} ++ elif data.get('type') == 'error': ++ driver.results[data.get('id')] = {'success': False, 'data': data.get('error')} ++ except Exception as e: ++ print(f"Error handling message: {e}") ++ if hasattr(self, 'data'): print(self.data) ++ def connected(self): (f"New connection from {self.address}") ++ def handle_close(self): driver._unregister_client(self) ++ ++ self.server = WebSocketServer(self.host, self.port, JSExecutor) ++ server_thread = threading.Thread(target=self.server.serve_forever) ++ server_thread.daemon = True ++ server_thread.start() ++ print(f"WebSocket server running on ws://{self.host}:{self.port}") ++ ++ def _register_client(self, session_id: str, client: WebSocket, session_info) -> None: ++ is_new_session = session_id not in self.sessions ++ ++ if is_new_session: ++ session = Session(session_id, session_info, client) ++ self.sessions[session_id] = session ++ print(f"New tab connected: {session.url} (Session: {session_id})") ++ else: ++ session = self.sessions[session_id] ++ session.reconnect(client, session_info) ++ print(f"Tab reconnected: {session.url} (Session: {session_id})") ++ ++ self.latest_session_id = session_id ++ if self.default_session_id is None: ++ self.default_session_id = session_id ++ elif is_new_session: ++ if time.time() - self.last_cmd_time < 5.0: ++ print(f"检测到脚本触发的新窗口,自动切换焦点: {session_id}") ++ self.default_session_id = session_id ++ ++ ++ def _unregister_client(self, client: WebSocket) -> None: ++ for session in self.sessions.values(): ++ if session.ws_client == client: ++ session.mark_disconnected() ++ break ++ ++ def execute_js(self, code, timeout=10.0, session_id=None, auto_switch_newtab=False) -> Any: ++ if session_id is None: session_id = self.default_session_id ++ if self.is_remote: ++ print('remote_execute_js') ++ response = self._remote_cmd({"cmd": "execute_js", "sessionId": session_id, ++ "code": code, "timeout": str(timeout), ++ "auto_switch_newtab": auto_switch_newtab}) ++ if response.get('error'): raise Exception(response['error']) ++ if auto_switch_newtab and 'newTabs' in response: ++ newtabs = response.get('newTabs', []) ++ if len(newtabs) > 0: ++ new_session_id = newtabs[0]['sessionId'] ++ self.default_session_id = new_session_id ++ print(f"自动切换到新标签会话: {new_session_id}") ++ return response.get('result', None) ++ ++ session = self.sessions.get(session_id) ++ if not session or not session.is_active(): ++ time.sleep(3) ++ session = self.sessions.get(session_id) ++ if not session or not session.is_active(): ++ alive_sessions = [s for s in self.sessions.values() if s.is_active()] ++ if alive_sessions: ++ session = alive_sessions[0] ++ print(f"会话 {session_id} 未连接,自动切换到最新活动会话: {session.id}") ++ session_id = self.default_session_id = session.id ++ if not session or not session.is_active(): ++ breakpoint() ++ raise ValueError(f"会话ID {session_id} 未连接") ++ ++ tp = session.type ++ assert tp in ['ws', 'http'], f"Unsupported session type: {tp}" ++ exec_id = str(uuid.uuid4()) ++ payload = json.dumps({'id': exec_id, 'code': code, 'auto_switch_newtab': auto_switch_newtab}) ++ ++ if tp == 'ws': ++ session.ws_client.send_message(payload) ++ elif tp == 'http': ++ session.http_queue.put(payload) ++ ++ start_time = time.time() ++ self.clean_sessions() ++ hasjump = False ++ ++ while exec_id not in self.results: ++ time.sleep(0.1) ++ if tp == 'ws': ++ if not session.is_active(): hasjump = True ++ if hasjump and session.is_active(): ++ if not self.is_remote and auto_switch_newtab: self.last_cmd_time = time.time() ++ return {"result": f"Session {session_id} reloaded.", "closed":1} ++ if time.time() - start_time > timeout: ++ if tp == 'ws': ++ return {"result": f"No response data in {timeout}s"} ++ elif tp == 'http': ++ return {"result": f"Session {session_id} no response."} ++ ++ result = self.results.pop(exec_id) ++ if not result['success']: raise Exception(result['data']) ++ if not self.is_remote and auto_switch_newtab: ++ newtabs = result.get('newTabs', []) ++ if len(newtabs) > 0: ++ new_session_id = newtabs[0]['sessionId'] ++ self.default_session_id = new_session_id ++ print(f"自动切换到新标签会话: {new_session_id}") ++ elif not self.is_remote: ++ self.last_cmd_time = time.time() ++ return result['data'] ++ ++ def _remote_cmd(self, cmd): ++ resp = requests.post(self.remote, ++ headers={"Content-Type": "application/json"}, ++ json=cmd).json() ++ return resp ++ ++ def get_all_sessions(self): ++ if self.is_remote: ++ return self._remote_cmd({"cmd": "get_all_sessions"}).get('r', []) ++ return [{'id': session.id, **session.info} for session in self.sessions.values() ++ if session.is_active()] ++ ++ def get_session_dict(self): ++ return {session.id: session.url for session in self.sessions.values() if session.is_active()} ++ ++ def find_session(self, url_pattern: str): ++ if url_pattern == '': ++ session = self.sessions.get(self.latest_session_id) ++ return [(session.id, session.info)] if session else [] ++ matching_sessions = [] ++ for session in self.sessions.values(): ++ if not session.is_active(): continue ++ if 'url' in session.info and url_pattern in session.info['url']: ++ matching_sessions.append((session.id, session.info)) ++ return matching_sessions ++ ++ def set_session(self, url_pattern: str) -> bool: ++ if self.is_remote: ++ matched = self._remote_cmd({"cmd": "find_session", "url_pattern": url_pattern}).get('r', []) ++ else: ++ matched = self.find_session(url_pattern) ++ if not matched: return print(f"警告: 未找到URL包含 '{url_pattern}' 的会话") ++ if len(matched) > 1: print(f"警告: 找到多个URL包含 '{url_pattern}' 的会话,选择第一个") ++ self.last_cmd_time = 0 ++ self.default_session_id, info = matched[0] ++ print(f"成功设置默认会话: {self.default_session_id}: {info['url']}") ++ return self.default_session_id ++ ++ def jump(self, url, timeout=10): self.execute_js(f"window.location.href='{url}'", timeout=timeout) ++ def page_source(self): return self.execute_js("document.documentElement.outerHTML") ++ def body(self): return self.execute_js("document.body.outerHTML") ++ def newtab(self, url=None): ++ if url is None: url = "http://www.baidu.com/robots.txt" ++ return self.execute_js(f'GM_openInTab("{url}");', auto_switch_newtab=True) ++ ++if __name__ == "__main__": ++ driver = TMWebDriver(host='localhost', port=18765) +\ No newline at end of file +diff --git a/agent_loop.py b/agent_loop.py +new file mode 100644 +index 0000000..e30eecb +--- /dev/null ++++ b/agent_loop.py +@@ -0,0 +1,67 @@ ++import json ++from dataclasses import dataclass ++from typing import Any, Optional ++@dataclass ++class StepOutcome: ++ data: Any ++ next_prompt: Optional[str] = None ++ should_exit: bool = False ++ ++ ++def try_call_generator(func, *args, **kwargs): ++ ret = func(*args, **kwargs) ++ if hasattr(ret, '__iter__') and not isinstance(ret, (str, bytes, dict, list)): ++ ret = yield from ret ++ return ret ++ ++class BaseHandler: ++ def tool_before_callback(self, tool_name, args, content): pass ++ def tool_after_callback(self, tool_name, args, content): pass ++ def dispatch(self, tool_name, args, response): ++ method_name = f"do_{tool_name}" ++ if hasattr(self, method_name): ++ _ = yield from try_call_generator(self.tool_before_callback, tool_name, args, response) ++ ret = yield from try_call_generator(getattr(self, method_name), args, response) ++ _ = yield from try_call_generator(self.tool_after_callback, tool_name, args, response) ++ return ret ++ else: ++ yield f"❌ 未知工具: {tool_name}\n" ++ return StepOutcome(None, "未知工具", "ERROR") ++ ++def json_default(o): ++ if isinstance(o, set): return list(o) ++ return str(o) ++ ++def agent_runner_loop(client, system_prompt, user_input, handler, tools_schema, max_turns=15): ++ messages = [ ++ {"role": "system", "content": system_prompt}, ++ {"role": "user", "content": user_input} ++ ] ++ for turn in range(max_turns): ++ yield f"\n[🤖 LLM Thinking (Turn {turn+1})] ..." ++ response = client.chat(messages=messages, tools=tools_schema) ++ ++ if response.thinking: yield '' + response.thinking + '\n' ++ yield response.content ++ ++ if not response.tool_calls: ++ tool_name, args = 'no_tool', {} ++ else: ++ tool_call = response.tool_calls[0] ++ tool_name = tool_call.function.name ++ args = json.loads(tool_call.function.arguments) ++ ++ if tool_name == 'no_tool': pass ++ else: yield f"\n\n正在调用工具: {tool_name},参数: {args}\n" ++ outcome = yield from handler.dispatch(tool_name, args, response) ++ ++ if outcome.next_prompt is None: return {'result': 'CURRENT_TASK_DONE', 'data': outcome.data} ++ if outcome.should_exit: return {'result': 'EXITED', 'data': outcome.data} ++ ++ next_prompt = "" ++ if outcome.data is not None: ++ datastr = json.dumps(outcome.data, ensure_ascii=False, default=json_default) if type(outcome.data) in [dict, list] else str(outcome.data) ++ next_prompt += f"\n{datastr}\n\n\n" ++ next_prompt += outcome.next_prompt ++ messages = [{"role": "user", "content": next_prompt}] ++ return {'result': 'MAX_TURNS_EXCEEDED'} +\ No newline at end of file +diff --git a/agentapp.py b/agentapp.py +new file mode 100644 +index 0000000..915921d +--- /dev/null ++++ b/agentapp.py +@@ -0,0 +1,94 @@ ++import os, sys ++if sys.stdout is None: sys.stdout = open(os.devnull, "w") ++if sys.stderr is None: sys.stderr = open(os.devnull, "w") ++sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) ++ ++ ++import streamlit as st ++import time, json, re ++ ++with open('tools_schema.json', 'r', encoding='utf-8') as f: ++ TOOLS_SCHEMA = json.load(f) ++ ++ ++st.set_page_config(page_title="Cowork", layout="wide") ++ ++from sidercall import SiderLLMSession, LLMSession, ToolClient ++from agent_loop import agent_runner_loop, StepOutcome, BaseHandler ++ ++@st.cache_resource ++def init(): ++ mainllm = SiderLLMSession(multiturns=6) ++ llmclient = ToolClient(mainllm.ask, auto_save_tokens=True) ++ return llmclient ++ ++llmclient = init() ++ ++from ga import GenericAgentHandler ++ ++def get_system_prompt(): ++ with open('sys_prompt.txt', 'r', encoding='utf-8') as f: ++ return f.read() ++ ++if "last_goal" not in st.session_state: ++ st.session_state.last_goal = "" ++ ++def refine_user_goal(raw_query, last_goal): ++ """通过 LLM 提炼用户真实意图""" ++ if not last_goal: ++ return raw_query ++ ++ decide_prompt = f""" ++用户之前的目标是: "{last_goal}" ++用户现在输入了: "{raw_query}" ++ ++请判断: ++1. 如果用户提供补充信息、或者是接续之前的任务,请输出合并后的【最终目标】。 ++2. 如果用户只是指出之前做法有错而非变更目标,那么请输出原目标不做修改。 ++3. 如果用户开启了一个完全不相关的新话题,请直接输出用户现在的输入内容。 ++ ++请直接输出目标描述,不要包含任何多余的文字、解释或标点。 ++""" ++ try: ++ refined = llmclient.llm_func(decide_prompt).strip() ++ return refined if refined else raw_query ++ except: ++ return raw_query ++ ++def agent_backend_stream(raw_query): ++ final_goal = refine_user_goal(raw_query, st.session_state.last_goal) ++ ++ if final_goal != raw_query: ++ yield f"[Goal Refined] {final_goal}\n" ++ ++ sys_prompt = get_system_prompt() ++ handler = GenericAgentHandler(None, final_goal, './temp') ++ llmclient.last_tools = '' ++ ret = yield from agent_runner_loop(llmclient, ++ sys_prompt, raw_query, handler, ++ TOOLS_SCHEMA, max_turns=25) ++ st.session_state.last_goal = final_goal ++ return ret ++ ++st.title("🖥️ Cowork") ++ ++if "messages" not in st.session_state: ++ st.session_state.messages = [] ++ ++for msg in st.session_state.messages: ++ with st.chat_message(msg["role"]): ++ st.markdown(msg["content"]) ++ ++if prompt := st.chat_input("请输入指令"): ++ st.session_state.messages.append({"role": "user", "content": prompt}) ++ with st.chat_message("user"): ++ st.markdown(prompt) ++ ++ with st.chat_message("assistant"): ++ message_placeholder = st.empty() ++ full_response = "" ++ for chunk in agent_backend_stream(prompt): ++ full_response += chunk ++ message_placeholder.markdown(full_response + "▌") ++ message_placeholder.markdown(full_response) ++ st.session_state.messages.append({"role": "assistant", "content": full_response}) +\ No newline at end of file +diff --git a/ga.py b/ga.py +new file mode 100644 +index 0000000..446a7a3 +--- /dev/null ++++ b/ga.py +@@ -0,0 +1,379 @@ ++import sys, os, re ++import pyperclip ++import json, time ++from pathlib import Path ++import subprocess ++import tempfile ++if sys.stdout is None: sys.stdout = open(os.devnull, "w") ++if sys.stderr is None: sys.stderr = open(os.devnull, "w") ++sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) ++ ++from sidercall import LLMSession, ToolClient ++from agent_loop import BaseHandler, StepOutcome, agent_runner_loop ++ ++def code_run(code: str, code_type: str = "python", timeout: int = 60, cwd: str = None): ++ """ ++ 针对 Windows 优化的双模态执行器 ++ python: 运行复杂的 .py 脚本(文件模式) ++ powershell: 运行单行指令(命令模式) ++ 优先使用python,仅在必要系统操作时使用powershell。 ++ """ ++ # 统一路径处理 ++ preview = (code[:60].replace('\n', ' ') + '...') if len(code) > 60 else code.strip() ++ yield f"\n[Action] Running {code_type} in {os.path.basename(cwd)}: {preview}\n" ++ cwd = cwd or os.getcwd() ++ if code_type == "python": ++ # Python 依然建议走文件,因为模型生成的逻辑通常包含多行、import 和类定义 ++ tmp_file = tempfile.NamedTemporaryFile(suffix=".py", delete=False, mode='w', encoding='utf-8') ++ tmp_file.write(code) ++ tmp_path = tmp_file.name ++ tmp_file.close() ++ cmd = ["python", "-u", tmp_path] ++ elif code_type == "powershell": ++ cmd = ["powershell", "-NoProfile", "-NonInteractive", "-Command", code] ++ tmp_path = None ++ else: ++ return {"status": "error", "msg": f"不支持的类型: {code_type}"} ++ print("code run output:") ++ startupinfo = None ++ if os.name == 'nt': ++ startupinfo = subprocess.STARTUPINFO() ++ startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW ++ startupinfo.wShowWindow = 0 # SW_HIDE ++ full_stdout = [] ++ full_stderr = [] ++ try: ++ process = subprocess.Popen( ++ cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, ++ bufsize=0, cwd=cwd, startupinfo=startupinfo ++ ) ++ for line_bytes in iter(process.stdout.readline, b''): ++ try: ++ line = line_bytes.decode('utf-8') ++ except UnicodeDecodeError: ++ line = line_bytes.decode('gbk', errors='ignore') ++ print(line, end="") ++ full_stdout.append(line) ++ ++ stdout_rem, stderr_raw = process.communicate(timeout=timeout) ++ if stdout_rem: ++ try: rem_str = stdout_rem.decode('utf-8') ++ except UnicodeDecodeError: ++ rem_str = stdout_rem.decode('gbk', errors='ignore') ++ full_stdout.append(rem_str) ++ ++ if stderr_raw: ++ try: stderr_str = stderr_raw.decode('utf-8') ++ except UnicodeDecodeError: ++ stderr_str = stderr_raw.decode('gbk', errors='ignore') ++ full_stderr.append(stderr_str) ++ print(f"Error: {stderr_str}") ++ ++ status = "success" if process.returncode == 0 else "error" ++ stdout_str = "".join(full_stdout) ++ stderr_str = "".join(full_stderr) ++ status_icon = "✅" if process.returncode == 0 else "❌" ++ output_snippet = (stdout_str[:200] + '...') if len(stdout_str) > 200 else stdout_str ++ yield f"[Status] {status_icon} Exit Code: {process.returncode}\n[Stdout] {output_snippet}\n" ++ return { ++ "status": status, ++ "stdout": stdout_str[-2000:], ++ "stderr": stderr_str[-2000:], ++ "exit_code": process.returncode ++ } ++ except subprocess.TimeoutExpired: ++ return {"status": "error", "msg": "Timeout"} ++ except Exception as e: ++ return {"status": "error", "msg": str(e)} ++ finally: ++ if code_type == "python" and tmp_path and os.path.exists(tmp_path): os.remove(tmp_path) ++ ++ ++def ask_user(question: str, candidates: list = None): ++ """ ++ 构造一个中断请求。 ++ question: 向用户提出的问题。 ++ candidates: 可选的候选项列表。 ++ 需要保证should_exit为True ++ """ ++ return { ++ "status": "INTERRUPT", ++ "intent": "HUMAN_INTERVENTION", ++ "data": { ++ "question": question, ++ "candidates": candidates or [] ++ } ++ } ++ ++from web_tools import execute_js_rich, get_html ++ ++driver = None ++ ++def first_init_driver(): ++ global driver ++ from TMWebDriver import TMWebDriver ++ driver = TMWebDriver() ++ while True: ++ time.sleep(1) ++ sess = driver.get_all_sessions() ++ if len(sess) > 0: break ++ driver.newtab() ++ time.sleep(5) ++ ++def web_scan(focus_item="", switch_tab_id=None): ++ """ ++ 利用 get_html 获取清洗后的网页内容。 ++ focus_item: 语义过滤指令。如果用户在找特定内容(如“小米汽车”), ++ 算法会优先保留包含该关键词的列表项。 ++ switch_tab_id: 可选参数,如果提供,则在扫描前切换到该标签页。 ++ 应当多用execute_js,少全量观察html。 ++ """ ++ global driver ++ if driver is None: first_init_driver() ++ try: ++ tabs = [] ++ for sess in driver.get_all_sessions(): ++ sess.pop('connected_at', None) ++ sess.pop('type', None) ++ sess['url'] = sess.get('url', '')[:50] + ("..." if len(sess.get('url', '')) > 50 else "") ++ tabs.append(sess) ++ if switch_tab_id: driver.default_session_id = switch_tab_id ++ content = get_html(driver, cutlist=True, instruction=focus_item, maxchars=23000) ++ return { ++ "status": "success", ++ "metadata": { ++ "tabs_count": len(tabs), ++ "tabs": tabs, ++ "active_tab": driver.default_session_id ++ }, ++ "content": content ++ } ++ except Exception as e: ++ return {"status": "error", "msg": format_error(e)} ++ ++import traceback ++def format_error(e): ++ exc_type, exc_value, exc_traceback = sys.exc_info() ++ tb = traceback.extract_tb(exc_traceback) ++ if tb: ++ f = tb[-1] ++ fname = os.path.basename(f.filename) ++ return f"{exc_type.__name__}: {str(e)} @ {fname}:{f.lineno}, {f.name} -> `{f.line}`" ++ return f"{exc_type.__name__}: {str(e)}" ++ ++def web_execute_js(script: str): ++ """ ++ 执行 JS 脚本来控制浏览器,并捕获结果和页面变化。 ++ script: 要执行的 JavaScript 代码字符串。 ++ return { ++ "status": "failed" if error_msg else "success", ++ "js_return": result, ++ "error": error_msg, ++ "transients": transients, ++ "environment": { ++ "new_tab": new_tab, ++ "reloaded": reloaded ++ }, ++ "diff": diff_summary, ++ "suggestion": "" if is_significant_change else "页面无明显变化" ++ } ++ """ ++ global driver ++ if driver is None: first_init_driver() ++ try: ++ result = execute_js_rich(script, driver) ++ return result ++ except Exception as e: ++ return {"status": "error", "msg": format_error(e)} ++ ++def file_patch(path: str, old_content: str, new_content: str): ++ """ ++ 在文件中寻找唯一的 old_content 块并替换为 new_content。 ++ """ ++ path = str(Path(path).resolve()) ++ try: ++ if not os.path.exists(path): ++ return {"status": "error", "msg": "文件不存在"} ++ with open(path, 'r', encoding='utf-8') as f: ++ full_text = f.read() ++ # 检查唯一性 ++ count = full_text.count(old_content) ++ if count == 0: ++ return {"status": "error", "msg": "未找到匹配的旧文本块,请检查空格、缩进和换行是否完全一致。"} ++ if count > 1: ++ return {"status": "error", "msg": f"找到 {count} 处匹配,请提供更长的旧文本块以确保唯一性。"} ++ updated_text = full_text.replace(old_content, new_content) ++ with open(path, 'w', encoding='utf-8') as f: ++ f.write(updated_text) ++ return {"status": "success", "msg": "文件局部修改成功"} ++ except Exception as e: ++ return {"status": "error", "msg": str(e)} ++ ++def file_read(path, start=1, count=100, show_linenos=True): ++ try: ++ with open(path, 'r', encoding='utf-8', errors='replace') as f: ++ lines = f.readlines() ++ chunk = lines[start-1 : start-1+count] ++ if show_linenos: res = [f"{i+start}|{l[:200]}" for i, l in enumerate(chunk)] ++ else: res = [l for l in chunk] ++ return f"Total:{len(lines)} lines\n" + "".join(res) ++ except Exception as e: ++ return f"Error: {str(e)}" ++ ++class GenericAgentHandler(BaseHandler): ++ ''' ++ Generic Agent 工具库,包含多种工具的实现。工具函数自动加上了 do_ 前缀。实际工具名没有前缀。 ++ ''' ++ def __init__(self, parent, user_input, cwd): ++ self.parent = parent ++ self.user_input = user_input ++ self.plan = "" ++ self.focus = "" ++ self.cwd = cwd ++ ++ def _get_abs_path(self, path): ++ if not path: return "" ++ return os.path.abspath(os.path.join(self.cwd, path)) ++ ++ def do_code_run(self, args, response): ++ '''执行代码片段,有长度限制,不允许代码中放大量数据,如有需要应当通过文件读取进行。 ++ ''' ++ code_type = args.get("type", "python") ++ # 从 response.content 中提取代码块 ++ # 匹配 ```python ... ``` 或 ```powershell ... ``` ++ pattern = rf"```{code_type}\n(.*?)\n```" ++ # 也可以更通用一点,不分类型提取最后一个代码块:rf"```(?:{code_type})?\n(.*?)\n```" ++ matches = re.findall(pattern, response.content, re.DOTALL) ++ if not matches: ++ return StepOutcome(None, next_prompt=f"【系统错误】:你调用了 code_run,但未在回复中提供 ```{code_type} 代码块。请重新输出代码并附带工具调用。") ++ # 提取最后一个代码块(通常是模型修正后的最终逻辑) ++ code = matches[-1].strip() ++ timeout = args.get("timeout", 60) ++ cwd = args.get("cwd", self.cwd) ++ result = yield from code_run(code, code_type, timeout, cwd) ++ return StepOutcome(result, next_prompt=self._get_anchor_prompt()) ++ ++ def do_ask_user(self, args, response): ++ question = args.get("question", "请提供输入:") ++ candidates = args.get("candidates", []) ++ result = ask_user(question, candidates) ++ return StepOutcome(result, next_prompt="", should_exit=True) ++ ++ def do_web_scan(self, args, response): ++ '''focus_item仅用于在长列表中模糊搜寻相关item ++ 此工具也提供标签页查看和标签页切换功能。 ++ ''' ++ focus_item = args.get("focus_item", "") ++ switch_tab_id = args.get("switch_tab_id", None) ++ result = web_scan(focus_item, switch_tab_id=switch_tab_id) ++ content = result.pop("content", None) ++ yield f'\n{str(result)}\n' ++ next_prompt = f"```html\n{content}\n```" ++ return StepOutcome(result, next_prompt=next_prompt) ++ ++ def do_web_execute_js(self, args, response): ++ '''web情况下的优先使用工具,执行任何js达成对浏览器的*完全*控制。 ++ 支持将结果保存到文件供后续读取分析,但保存功能仅限即时读取,与await等异步操作不兼容。 ++ ''' ++ script = args.get("script", "") ++ save_to_file = args.get("save_to_file", "") ++ result = web_execute_js(script) ++ if save_to_file and "js_return" in result: ++ content = str(result["js_return"] or '') ++ abs_path = self._get_abs_path(save_to_file) ++ with open(abs_path, 'w', encoding='utf-8') as f: f.write(str(content)) ++ result["js_return"] = content[:200] + ("..." if len(content) > 200 else "") ++ result["js_return"] += f"\n\n[已保存以上内容到 {abs_path}]" ++ print("Web Execute JS Result:", result) ++ return StepOutcome(result, next_prompt=self._get_anchor_prompt()) ++ ++ def do_file_patch(self, args, response): ++ path = self._get_abs_path(args.get("path", "")) ++ yield f"\n[Action] Patching file: {path}\n" ++ old_content = args.get("old_content", "") ++ new_content = args.get("new_content", "") ++ result = file_patch(path, old_content, new_content) ++ yield str(result) + "\n" ++ return StepOutcome(result, next_prompt=self._get_anchor_prompt()) ++ ++ def do_file_write(self, args, response): ++ '''用于对整个文件的大量处理,精细修改要用file_patch。 ++ ''' ++ path = self._get_abs_path(args.get("path", "")) ++ mode = args.get("mode", "overwrite") ++ action_str = "Appending to" if mode == "append" else "Writing" ++ yield f"\n[Action] {action_str} file: {os.path.basename(path)}\n" ++ ++ def extract_intended_block(content): ++ start_marker = "```" ++ first_idx = content.find(start_marker) ++ last_idx = content.rfind(start_marker) ++ if first_idx == -1 or last_idx == -1 or first_idx == last_idx: ++ return None ++ header_end = content.find("\n", first_idx) ++ if header_end == -1 or header_end > last_idx: ++ return None ++ actual_content = content[header_end + 1 : last_idx].strip() ++ return actual_content ++ ++ blocks = extract_intended_block(response.content) ++ if not blocks: ++ yield f"[Status] ❌ 失败: 未在回复中找到代码块内容\n" ++ return StepOutcome({"status": "error", "msg": "No code block found in response"}, next_prompt="\n") ++ new_content = blocks ++ try: ++ write_mode = 'a' if mode == "append" else 'w' ++ final_content = ("\n" + new_content) if mode == "append" else new_content ++ with open(path, write_mode, encoding="utf-8") as f: ++ f.write(final_content) ++ yield f"[Status] ✅ {mode.capitalize()} 成功 ({len(new_content)} bytes)\n" ++ return StepOutcome({"status": "success"}, ++ next_prompt=f"\n提醒: {self.user_input}请继续执行下一步。\n") ++ except Exception as e: ++ yield f"[Status] ❌ 写入异常: {str(e)}\n" ++ return StepOutcome({"status": "error", "msg": str(e)}, next_prompt="\n") ++ ++ def do_file_read(self, args, response): ++ path = self._get_abs_path(args.get("path", "")) ++ yield f"\n[Action] Reading file: {path}\n" ++ start = args.get("start", 1) ++ count = args.get("count", 100) ++ show_linenos = args.get("show_linenos", True) ++ result = file_read(path, start, count, show_linenos) ++ return StepOutcome(result, next_prompt=self._get_anchor_prompt()) ++ ++ def do_update_plan(self, args, response): ++ ''' ++ 同步宏观任务进度与战略重心。 ++ 【设计意图】: ++ 1. 仅在任务涉及多步逻辑(如:先搜索、再重构、后测试)时进行初始拆解。 ++ 2. 仅在发生重大的方针变更时调用(例如:原定方案 A 物理不可行,需彻底转向方案 B)。 ++ 3. 严禁用于记录细微的调试步骤或代码纠错。 ++ 简单任务无需使用。 ++ ''' ++ new_plan = args.get("plan", "") ++ new_focus = args.get("focus", "") ++ if new_plan: self.plan = new_plan ++ if new_focus: self.focus = new_focus ++ yield f"\n[Info] Updated plan and focus.\n" ++ yield f"New Plan:\n{self.plan}\n\n" ++ yield f"New Focus:\n{self.focus}\n" ++ return StepOutcome({"status": "success"}, ++ next_prompt=self._get_anchor_prompt()) ++ ++ def do_no_tool(self, args, response): ++ '''这是一个特殊工具,由引擎自主调用,不要包含在TOOLS_SCHEMA里。 ++ ''' ++ yield "\n\n[Info] No tool called. Final response to user.\n" ++ return StepOutcome(response, next_prompt=None, should_exit=True) ++ ++ def _get_anchor_prompt(self): ++ prompt = f"\n提醒: \n{self.user_input}\n" ++ if self.plan: prompt += f"\n{self.plan}\n\n" ++ if self.focus: prompt += f"\n{self.focus}\n\n" ++ prompt += "\n请继续执行下一步。" ++ return prompt ++ ++ ++if __name__ == "__main__": ++ pass +\ No newline at end of file +diff --git a/launch.pyw b/launch.pyw +new file mode 100644 +index 0000000..4b91d6f +--- /dev/null ++++ b/launch.pyw +@@ -0,0 +1,48 @@ ++import webview ++import threading ++import subprocess ++import sys, time, os, ctypes ++import atexit ++ ++# === 配置区域 === ++WINDOW_WIDTH = 600 ++WINDOW_HEIGHT = 900 ++RIGHT_PADDING = 0 # 离屏幕右边缘的距离 ++TOP_PADDING = 300 # 离屏幕上边缘的距离 ++ ++def get_screen_width(): ++ try: ++ # GetSystemMetrics(0) 获取主屏幕宽度 ++ user32 = ctypes.windll.user32 ++ return user32.GetSystemMetrics(0) ++ except: ++ # 如果不是 Windows 或者出错了,返回一个兜底值 (比如 1920) ++ return 1920 ++ ++def start_streamlit(): ++ global proc ++ cmd = [ ++ sys.executable, "-m", "streamlit", "run", "agentapp.py", ++ "--server.port", "8501", ++ "--server.headless", "true", ++ "--theme.base", "dark" #以此默认开启暗黑模式,更有极客感 ++ ] ++ proc = subprocess.Popen(cmd) ++ atexit.register(proc.kill) ++ ++if __name__ == '__main__': ++ t = threading.Thread(target=start_streamlit, daemon=True) ++ t.start() ++ screen_width = get_screen_width() ++ x_pos = screen_width - WINDOW_WIDTH - RIGHT_PADDING ++ time.sleep(2) ++ webview.create_window( ++ title='GenericAgent', ++ url='http://localhost:8501', ++ width=WINDOW_WIDTH, ++ height=WINDOW_HEIGHT, ++ x=x_pos, y=TOP_PADDING, ++ resizable=True, ++ text_select=True ++ ) ++ webview.start() +\ No newline at end of file +diff --git a/ljq_web_driver.user.js b/ljq_web_driver.user.js +new file mode 100644 +index 0000000..01eb42b +--- /dev/null ++++ b/ljq_web_driver.user.js +@@ -0,0 +1,428 @@ ++// ==UserScript== ++// @name ljq_web_driver ++// @namespace http://tampermonkey.net/ ++// @version 0.2 ++// @description Execute JS via ljq_web_driver ++// @require https://code.jquery.com/jquery-3.6.0.min.js ++// @author You ++// @match *://*/* ++// @grant GM_setValue ++// @grant GM_getValue ++// @grant GM_xmlhttpRequest ++// @grant GM_openInTab ++// @grant unsafeWindow ++// @connect localhost ++// @run-at document-start ++// ==/UserScript== ++ ++ ++(function() { ++ 'use strict'; ++ const log_prefix = "ljq_driver: "; ++ ++ if (window.self !== window.top) { ++ console.log(log_prefix + '在iframe中不执行'); ++ return; ++ } ++ ++ const wsUrl = 'ws://localhost:18765'; ++ const httpUrl = 'http://localhost:18766/'; ++ ++ function isWebSocketServerAlive(callback) { ++ GM_xmlhttpRequest({ ++ method: 'GET', ++ url: 'http://localhost:18765/', ++ onload: () => callback(true), ++ onerror: () => callback(false) ++ }); ++ } ++ ++ let ws; ++ let sid = (window.name && window.name.startsWith('ljq_')) ? ++ window.name : window.sessionStorage.getItem('ljq_driver_sid'); ++ if (!sid) { ++ sid = `ljq_${Date.now().toString().slice(-2)}${Math.random().toString(36).slice(2, 4)}`; ++ window.sessionStorage.setItem('ljq_driver_sid', sid); ++ window.name = sid; ++ console.log(log_prefix + `创建新会话ID: ${sid}`); ++ } else { ++ if (window.name !== sid) window.name = sid; ++ console.log(log_prefix + `使用现有会话ID: ${sid}`); ++ } ++ ++ try { ++ GM_setValue('new_tab_report', { ++ url: window.location.href, ++ sessionId: sid, ++ ts: Date.now() ++ }); ++ } catch (e) {} ++ ++ // 保存会话ID ++ GM_setValue('sid', sid); ++ ++ // 获取或创建状态指示器 ++ function getIndicator() { ++ // 检查现有指示器 ++ let ind = document.getElementById('ljq-ind'); ++ ++ // 删除重复指示器 ++ const dups = document.querySelectorAll('[id="ljq-ind"]'); ++ if (dups.length > 1) { ++ for (let i = 1; i < dups.length; i++) { ++ dups[i].remove(); ++ } ++ ind = dups[0]; ++ } ++ ++ // 创建新指示器 ++ if (!ind && document.body) { ++ ind = document.createElement('div'); ++ ind.id = 'ljq-ind'; ++ ind.style.cssText = ` ++ position: fixed;bottom: 10px; ++ right: 10px;background-color: #f44336; ++ color: white;padding: 8px 12px; ++ border-radius: 6px;font-size: 14px; ++ font-weight: bold;z-index: 9999; ++ transition: background-color 0.3s; ++ cursor: pointer;box-shadow: 0 3px 6px rgba(0,0,0,0.25); ++ `; ++ ind.innerText = log_prefix + '正在连接...'; ++ ++ ind.addEventListener('click', () => alert(`会话ID: ${sid}\n当前URL: ${location.href}`)); ++ document.body.appendChild(ind); ++ } ++ ++ return ind; ++ } ++ ++ // 更新状态 ++ function updateStatus(status, msg) { ++ if (!document.body) return setTimeout(() => updateStatus(status, msg), 100); ++ ++ const ind = getIndicator(); ++ if (!ind) return; ++ ++ if (status === 'ok') { ++ ind.style.backgroundColor = '#4CAF50'; ++ ind.innerText = log_prefix + '连接成功'; ++ } else if (status === 'disc') { ++ ind.style.backgroundColor = '#f44336'; ++ ind.innerText = log_prefix + '连接断开'; ++ } else if (status === 'conn') { ++ ind.style.backgroundColor = '#2196F3'; ++ ind.innerText = log_prefix + '正在连接(HTTP)'; ++ } else if (status === 'err') { ++ ind.style.backgroundColor = '#FF9800'; ++ ind.innerText = log_prefix + `发生错误 (${msg})`; ++ } else if (status === 'exec') { ++ ind.style.backgroundColor = '#2196F3'; ++ ind.innerText = log_prefix + '正在执行指令...'; ++ } ++ } ++ ++ function handleError(id, error, errorSource) { ++ console.error(`${errorSource}错误:`, error); ++ updateStatus('err', error.message); ++ ++ const errorMessage = { ++ type: 'error', ++ id: id, ++ sessionId: sid, ++ error: { ++ name: error.name, ++ message: error.message, ++ stack: error.stack, ++ source: errorSource ++ } ++ }; ++ ++ if (typeof ws !== 'undefined' && ws && ws.readyState === WebSocket.OPEN) { ++ ws.send(JSON.stringify(errorMessage)); ++ } else { ++ GM_xmlhttpRequest({ ++ method: "POST", ++ url: httpUrl + "api/result", ++ headers: {"Content-Type": "application/json"}, ++ data: JSON.stringify(errorMessage), ++ onload: function(response) {console.log("错误信息已通过HTTP发送", response);}, ++ onerror: function(err) {console.error("发送错误信息失败", err);} ++ }); ++ } ++ } ++ ++ function smartProcessResult(result) { ++ // 处理 null 和原始类型 ++ if (result === null || result === undefined || typeof result !== 'object') { ++ return result; ++ } ++ ++ // 1. 处理 jQuery 对象 - 强制转换为HTML字符串数组 ++ if (typeof jQuery !== 'undefined' && result instanceof jQuery) { ++ const elements = []; ++ for (let i = 0; i < result.length; i++) { ++ if (result[i] && result[i].nodeType === 1) { ++ elements.push(result[i].outerHTML); ++ } ++ } ++ return elements; // 始终返回数组 ++ } ++ ++ // 2. 处理 NodeList 和 HTMLCollection ++ if (result instanceof NodeList || result instanceof HTMLCollection) { ++ const elements = []; ++ for (let i = 0; i < result.length; i++) { ++ if (result[i] && result[i].nodeType === 1) { ++ elements.push(result[i].outerHTML); ++ } ++ } ++ return elements; ++ } ++ ++ // 3. 处理单个 DOM 元素 ++ if (result.nodeType === 1) { ++ return result.outerHTML; ++ } ++ ++ // 4. 检查是否是具有数字索引和length属性的类数组对象 ++ if (!Array.isArray(result) && ++ typeof result === 'object' && ++ 'length' in result && ++ typeof result.length === 'number') { ++ ++ // 检查第一个元素是否是DOM节点 ++ const firstElement = result[0]; ++ if (firstElement && firstElement.nodeType === 1) { ++ const elements = []; ++ const length = Math.min(result.length, 100); ++ ++ for (let i = 0; i < length; i++) { ++ const elem = result[i]; ++ if (elem && elem.nodeType === 1) { ++ elements.push(elem.outerHTML); ++ } ++ } ++ ++ return elements; ++ } ++ } ++ ++ // 5. 处理普通对象和数组 - 使用标准序列化 ++ try { ++ return JSON.parse(JSON.stringify(result, function(key, value) { ++ if (typeof value === 'object' && value !== null) { ++ if (value.nodeType === 1) { ++ return value.outerHTML; ++ } ++ if (value === window || value === document) { ++ return '[Object]'; ++ } ++ } ++ return value; ++ })); ++ } catch (e) { ++ console.error("序列化对象失败:", e); ++ return `[无法序列化的对象: ${e.message}]`; ++ } ++ } ++ ++ // 防止重复初始化 ++ if (window.ljq_init) return; ++ window.ljq_init = true; ++ ++ function connecthttp() { ++ if (window.use_ws) return; ++ updateStatus('conn'); ++ GM_xmlhttpRequest({ ++ method: "POST", ++ url: httpUrl + "api/longpoll", ++ headers: {"Content-Type": "application/json"}, ++ data: JSON.stringify({ ++ type: 'ready', ++ url: location.href, ++ sessionId: sid ++ }), ++ onload: function(resp) { ++ if (resp.status === 200) { ++ let data = JSON.parse(resp.responseText); ++ console.log(log_prefix + '接收到数据:', data); ++ if (data.id === "" && data.ret === "use ws") return; ++ if (data.id === "") return setTimeout(connecthttp, 100); ++ const response = executeCode(data); ++ ++ if (response.error) { ++ handleError(data.id, response.error, '执行代码'); ++ } else { ++ GM_xmlhttpRequest({ ++ method: "POST", ++ url: httpUrl + "api/result", ++ headers: {"Content-Type": "application/json"}, ++ data: JSON.stringify({ ++ type: 'result', ++ id: data.id, ++ sessionId: sid, ++ result: response.result ++ }) ++ }); ++ } ++ } else { ++ console.error(log_prefix + '请求失败,状态码:', resp.status); ++ updateStatus('err', '请求失败'); ++ } ++ setTimeout(connecthttp, 1000); ++ }, ++ onerror: function(err) { ++ console.error(log_prefix + '请求错误', err); ++ updateStatus('err', '请求失败'); ++ setTimeout(connecthttp, 5000); ++ }, ++ ontimeout: function() { ++ console.log(log_prefix + '请求超时'); ++ updateStatus('err', '请求超时'); ++ setTimeout(connecthttp, 5000); ++ } ++ }); ++ } ++ ++ function executeCode(data) { ++ let id = data.id || 'unknown'; // 获取 ID ++ let result; ++ ++ if (!data.code) { ++ console.log('收到非代码执行消息:', data); ++ return { error: '没有可执行的代码' }; ++ } ++ updateStatus('exec'); ++ ++ try { ++ const jsCode = data.code.trim(); ++ const lines = jsCode.split(/\r?\n/).filter(l => l.trim()); ++ const lastLine = lines.length > 0 ? lines[lines.length - 1].trim() : ''; ++ ++ if (lastLine.startsWith('return')) { ++ // 最后一行包含 return 语句,使用 Function 构造器 ++ result = (new Function(jsCode))(); ++ } else { ++ try { ++ result = eval(jsCode); ++ } catch (e) { ++ if (isIllegalReturnError(e)) { ++ result = (new Function(jsCode))(); ++ } else { ++ throw e; ++ } ++ } ++ } ++ const processedResult = smartProcessResult(result); ++ return { result: processedResult }; ++ ++ } catch (execError) { ++ return { error: execError }; // 返回错误信息 ++ } ++ } ++ ++ function isIllegalReturnError(e) { ++ return e instanceof SyntaxError && ( ++ /Illegal return statement/i.test(e.message) || // Chrome 常见 ++ /return not in function/i.test(e.message) || // Firefox 常见 ++ /Illegal 'return' statement/i.test(e.message) // 兼容旧文案 ++ ); ++ } ++ ++ function connect() { ++ ws = new WebSocket(wsUrl); ++ ++ ws.onopen = function() { ++ window.use_ws = true; ++ console.log(log_prefix + '已连接'); ++ updateStatus('ok'); ++ ws.send(JSON.stringify({ ++ type: 'ready', ++ url: location.href, ++ sessionId: sid ++ })); ++ }; ++ ++ ws.onclose = function() { ++ console.log(log_prefix + '已断开,5秒后重连'); ++ updateStatus('disc'); ++ setTimeout(connect, 5000); ++ }; ++ ++ ws.onerror = function(err) { ++ console.error(log_prefix + '连接错误', err); ++ updateStatus('err', '连接失败'); ++ isWebSocketServerAlive(function (e) { if (e) connecthttp()}); ++ }; ++ ++ ws.onmessage = async function(e) { ++ try { ++ let data = JSON.parse(e.data); ++ let startTime = Date.now(); ++ let newTabs = []; ++ let checkNewTab = data.auto_switch_newtab === true; ++ GM_setValue('new_tab_report', null); ++ const response = executeCode(data); ++ ++ if (response.error) { ++ handleError(data.id, response.error, '执行代码'); ++ } else { ++ if (checkNewTab) { ++ for (let i = 0; i < 10; i++) { ++ await new Promise(r => setTimeout(r, 150)); ++ let latestReport = GM_getValue('new_tab_report'); ++ if (latestReport && latestReport.ts >= startTime) { ++ console.log(`%c[Detected] 轮询第 ${i+1} 次抓到新标签!`, "color: green"); ++ newTabs.push(latestReport); ++ break; ++ } ++ } ++ } ++ updateStatus('ok'); ++ ws.send(JSON.stringify({ ++ type: 'result', ++ id: data.id, ++ sessionId: sid, ++ result: response.result, ++ newTabs: newTabs ++ })); ++ } ++ } catch (parseError) { ++ handleError('unknown', parseError, '解析消息'); ++ } ++ }; ++ ++ } ++ ++ // 初始化 ++ function init() { ++ if (document.body) { ++ getIndicator(); ++ connect(); ++ } else { ++ setTimeout(init, 50); ++ } ++ } ++ ++ // 监控DOM变化 ++ const observer = new MutationObserver(() => getIndicator()); ++ ++ if (document.readyState !== 'loading') { ++ init(); ++ observer.observe(document.body, { childList: true, subtree: true }); ++ } else { ++ document.addEventListener('DOMContentLoaded', () => { ++ init(); ++ observer.observe(document.body, { childList: true, subtree: true }); ++ }); ++ } ++ ++ // 清理 ++ window.addEventListener('beforeunload', () => { ++ observer.disconnect(); ++ if (ws && ws.readyState === WebSocket.OPEN) { ++ ws.close(); ++ } ++ }); ++})(); +\ No newline at end of file +diff --git a/make_prompts.py b/make_prompts.py +new file mode 100644 +index 0000000..5905083 +--- /dev/null ++++ b/make_prompts.py +@@ -0,0 +1,137 @@ ++import sys, os, re ++import pyperclip ++import json, time ++from pathlib import Path ++import subprocess ++import tempfile ++sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) ++from sidercall import SiderLLMSession, LLMSession, ToolClient ++ ++ ++ask = SiderLLMSession().ask ++ ++ ++def generate_tool_schema(): ++ """ ++ 通过代码内省,将 Handler 的逻辑映射为高语义的工具描述。 ++ """ ++ with open('ga.py', 'r', encoding='utf-8') as f: ++ ga_code = f.read() ++ # 极简且具备高度概括能力的元 Prompt ++ meta_prompt = f""" ++# Role ++你是一个具备深度推理能力的 AI 系统架构师。你将通过阅读 `GenericAgentHandler` 源码,构建其对应的工具能力矩阵。 ++ ++# Task ++分析下方的源码,并输出 OpenAI Tool Schema。在输出 JSON 之前,你必须进行内部思考(Thinking Process)。 ++ ++# Thinking Process Requirements ++在 `` 标签中,请按顺序分析: ++1. **核心工具链识别**:识别所有 `do_xxx` 方法,并分析它们依赖的底层 Utility 函数。 ++2. **内容溯源审计**:重点分析哪些工具是从 `response.content` 提取核心逻辑(如代码块)的。对于这些工具,确认在 Schema 参数中排除掉对应的字段。 ++3. **调用策略推导**:分析工具间的协作关系(例如 `file_read` 如何为 `file_patch` 提供定位)。 ++4. **兜底逻辑确认**:明确某些特殊万能工具在系统中的保底角色,快速工具无法执行的操作由保底工具执行,但正常应优先使用方便的工具。 ++5. **注释审阅**:结合函数注释,理解每个工具的使用限制,其中的重要信息务必反映在工具描述中(如长度限制等)。 ++注释中的重要信息务必反映在工具描述中。 ++注释中的重要信息务必反映在工具描述中。 ++ ++# Tool Schema Formatting Rules ++- **参数对齐**:仅包含 `do_xxx` 方法中通过 `args.get()` 显式获取的参数。 ++- **高引导性描述**:描述应包含“何时调用”以及“如何根据反馈修正”,需要注意函数的注释事项。 ++- **输出格式**:先输出 `` 块,然后输出 ```json 块。 ++ ++# Source Code ++{ga_code} ++ ++# Output ++请开始思考并生成: ++""" ++ ++ # 假设 ask 是你已经封装好的 LLM 调用接口 ++ raw_response = ask(meta_prompt, model="gemini-3.0-flash") ++ print(raw_response) ++ ++ # --- 健壮的 JSON 解析逻辑 --- ++ try: ++ # 1. 清除 Markdown 围栏 ++ clean_json = raw_response.strip() ++ if clean_json.startswith("```"): ++ # 兼容 ```json 和 ``` ++ clean_json = re.sub(r'^```(?:json)?\s*', '', clean_json) ++ clean_json = re.sub(r'\s*```$', '', clean_json) ++ ++ # 2. 移除可能的非 JSON 前导/后缀文字(如果有的话) ++ start_idx = clean_json.find('[') ++ end_idx = clean_json.rfind(']') + 1 ++ if start_idx != -1 and end_idx != -1: ++ clean_json = clean_json[start_idx:end_idx] ++ ++ final_schema = json.loads(clean_json) ++ ++ if final_schema: ++ with open('tools_schema.json', 'w', encoding='utf-8') as f: ++ json.dump(final_schema, f, indent=2, ensure_ascii=False) ++ print("✅ 成功从代码内省生成 Schema 并持久化。") ++ return final_schema ++ ++ except Exception as e: ++ print(f"❌ 解析 Schema 失败: {e}\n原始响应: {raw_response}") ++ return None ++ ++ ++def make_system_prompt(ga_code_path='ga.py'): ++ with open(ga_code_path, 'r', encoding='utf-8') as f: ++ ga_code = f.read() ++ ++ # 这个元 Prompt 的目标是生成“世界观”而非“说明书” ++ meta_prompt = f""" ++# Role ++你是一个 AI 架构师。请阅读下方的工具库源码,为 Agent 生成一份【系统级认知指令 (System Prompt)】。 ++ ++# Task ++基于代码逻辑,定义 Agent 的“能力边界”和“行动协议”。 ++ ++# Requirements (Crucial) ++1. **打破预训练偏见**:针对模型常说的“我只是 AI,不能操作网页/文件”进行修正。明确告诉它:你现在拥有物理操作权限,工具设计保证了所有权限做所有事情。 ++2. **避开冗余**:不要重复 Tool Schema 里的参数细节。 ++3. **能力边界定义**: ++ - 网页操作:它不是通过“想象”上网,而是通过实时的浏览器读写。 ++ - 文件操作:它拥有物理文件读写权限,且遵循“先读后写”的稳健性原则。 ++ - 保底逻辑:当专用工具失效时,使用 `code_run` 编写脚本解决一切。 ++ - 特殊的update_plan(仅在复杂任务时使用)和ask_user(用户也是有效资源)工具。 ++4. **行动协议**: ++ - 必须在行动前进行 ++ ++我后面还会附上具体的工具描述和Schema,所以不要重复。 ++主要以世界观为主,不要纠结于具体工具。 ++ ++# Input Source Code ++{ga_code} ++ ++# Output ++仅输出 System Prompt 的正文,语气要果断、指令化。 ++""" ++ print("🧠 正在重塑 Agent 世界观 (Generating System Prompt)...") ++ # 调用你的 llmclient.ask ++ system_prompt_content = ask(meta_prompt) ++ print("📝 生成的 System Prompt 内容如下:\n") ++ print(system_prompt_content) ++ clean_content = re.sub(r'<[^>]+>', '', system_prompt_content) ++ with open('sys_prompt.txt', 'w', encoding='utf-8') as f: ++ f.write(clean_content) ++ return clean_content ++ ++# --- 主逻辑 --- ++if __name__ == "__main__": ++ if len(sys.argv) < 2: ++ print("Usage: python make_prompts.py [schema|prompt]") ++ sys.exit(1) ++ ++ cmd = sys.argv[1].lower() ++ if cmd == "schema": ++ generate_tool_schema() ++ elif cmd == "prompt": ++ make_system_prompt() ++ else: ++ print(f"Unknown command: {cmd}") ++ print("Available commands: schema, prompt") +\ No newline at end of file +diff --git a/sidercall.py b/sidercall.py +new file mode 100644 +index 0000000..706c686 +--- /dev/null ++++ b/sidercall.py +@@ -0,0 +1,179 @@ ++import os, json, re, time, requests ++from sider_ai_api import Session ++ ++try: ++ from mykey import sider_cookie, capikey ++except ImportError: ++ sider_cookie = "" ++ capikey = "" ++ ++class SiderLLMSession: ++ def __init__(self, multiturns=6): ++ self._core = Session(cookie=sider_cookie, proxies={'https':'127.0.0.1:2082'}) ++ def ask(self, prompt, model="gemini-3.0-flash"): ++ if len(prompt) > 30000: prompt = prompt[-29500:] ++ return ''.join(self._core.chat(prompt, model)) ++ ++class LLMSession: ++ def __init__(self, api_key=capikey, api_base="http://113.45.39.247:3001/v1", multiturns=6): ++ self.api_key = api_key ++ self.api_base = api_base ++ self.messages = [] ++ self.multiturns = multiturns ++ ++ def ask(self, prompt, model="openai/gpt-5.1"): ++ self.messages.append({"role": "user", "content": prompt}) ++ if len(self.messages) > self.multiturns: ++ self.messages = self.messages[-self.multiturns:] ++ headers = { ++ "Authorization": f"Bearer {self.api_key}", ++ "Content-Type": "application/json" ++ } ++ try: ++ response = requests.post( ++ f"{self.api_base}/chat/completions", ++ headers=headers, ++ json={ ++ "model": model, ++ "messages": self.messages, ++ "temperature": 0.5 ++ }, ++ timeout=60 ++ ) ++ res_json = response.json() ++ content = res_json["choices"][0]["message"]["content"] ++ self.messages.append({"role": "assistant", "content": content}) ++ return content ++ except Exception as e: ++ return f"Error: {str(e)}" ++ ++class MockFunction: ++ def __init__(self, name, arguments): ++ self.name = name ++ self.arguments = arguments ++ ++class MockToolCall: ++ def __init__(self, name, args): ++ arg_str = json.dumps(args, ensure_ascii=False) if isinstance(args, dict) else args ++ self.function = MockFunction(name, arg_str) ++ ++class MockResponse: ++ def __init__(self, thinking, content, tool_calls, raw): ++ self.thinking = thinking # 存放 内部的思维过程 ++ self.content = content # 存放去除标签后的纯文本回复 ++ self.tool_calls = tool_calls # 存放 MockToolCall 列表 或 None ++ self.raw = raw ++ def __repr__(self): ++ return f"" ++ ++class ToolClient: ++ def __init__(self, raw_api_func, auto_save_tokens=False): ++ self.raw_api = raw_api_func ++ self.auto_save_tokens = auto_save_tokens ++ self.last_tools = '' ++ self.total_cd_tokens = 0 ++ ++ def chat(self, messages, tools=None): ++ full_prompt = self._build_protocol_prompt(messages, tools) ++ print("Full prompt length:", len(full_prompt)) ++ raw_text = self.raw_api(full_prompt) ++ with open('model_responses.txt', 'a', encoding='utf-8', errors="replace") as f: ++ f.write(f"=== Prompt ===\n{full_prompt}\n=== Response ===\n{raw_text}\n\n") ++ return self._parse_mixed_response(raw_text) ++ ++ def _build_protocol_prompt(self, messages, tools): ++ system_content = next((m['content'] for m in messages if m['role'].lower() == 'system'), "你是一个智能助手。") ++ history_msgs = [m for m in messages if m['role'].lower() != 'system'] ++ ++ # 构造工具描述 ++ tool_instruction = "" ++ if tools: ++ tools_json = json.dumps(tools, ensure_ascii=False, indent=2) ++ tool_instruction = f""" ++### ⚡️ 交互协议 (必须严格遵守) ++请按照以下步骤思考并行动: ++1. **思考**: 在 `` 标签中分析现状和策略。 ++2. **行动**: 如果需要调用工具,请紧接着输出一个 **块**,然后结束,我会稍后给你返回块。 ++ 格式: ```\n{{"function": "工具名", "arguments": {{参数}}}}\n\n``` ++ ++### 🛠️ 可用工具库 ++{tools_json} ++""" ++ if self.auto_save_tokens and self.last_tools == tools_json: ++ tool_instruction = "\n### ⚡️ 交互协议保持不变,继续使用之前的工具库。\n" ++ else: ++ self.total_cd_tokens = 0 ++ self.last_tools = tools_json ++ ++ prompt = f"=== SYSTEM ===\n{system_content}\n{tool_instruction}\n\n" ++ for m in history_msgs: ++ role = "USER" if m['role'] == 'user' else "ASSISTANT" ++ prompt += f"=== {role} ===\n{m['content']}\n\n" ++ ++ self.total_cd_tokens += len(prompt) ++ if self.total_cd_tokens > 6000: self.last_tools = '' ++ ++ prompt += "=== ASSISTANT ===\n" ++ return prompt ++ ++ def _parse_mixed_response(self, text): ++ remaining_text = text ++ thinking = '' ++ think_pattern = r"(.*?)" ++ think_match = re.search(think_pattern, text, re.DOTALL) ++ ++ if think_match: ++ thinking = think_match.group(1).strip() ++ remaining_text = re.sub(think_pattern, "", remaining_text, flags=re.DOTALL) ++ ++ tool_calls = None ++ tool_pattern = r"(.*?)" ++ tool_match = re.search(tool_pattern, text, re.DOTALL) ++ ++ json_str = "" ++ if tool_match: ++ json_str = tool_match.group(1).strip() ++ remaining_text = re.sub(tool_pattern, "", remaining_text, flags=re.DOTALL) ++ elif '' in remaining_text: ++ weaktoolstr = remaining_text.split('')[-1].strip() ++ json_str = weaktoolstr if weaktoolstr.endswith('}') else '' ++ remaining_text = remaining_text.replace(''+weaktoolstr, "") ++ ++ if json_str: ++ try: ++ data = tryparse(json_str) ++ func_name = data.get('function') or data.get('tool') ++ args = data.get('arguments') or data.get('args') ++ if args is None: args = {} ++ if func_name: tool_calls = [MockToolCall(func_name, args)] ++ except json.JSONDecodeError: ++ print("[Warn] Failed to parse tool_use JSON:", json_str) ++ thinking += f"[Warn] JSON 解析失败,模型输出了无效的 JSON." ++ ++ content = remaining_text.strip() ++ if not content: content = "" ++ return MockResponse(thinking, content, tool_calls, text) ++ ++def tryparse(json_str): ++ try: return json.loads(json_str) ++ except: ++ return json.loads(json_str[:-1]) ++ ++if __name__ == "__main__": ++ llmclient = ToolClient(LLMSession().ask) ++ response = llmclient.chat( ++ messages=[{"role": "user", "content": "我的IP是多少"}], ++ tools=[{"name": "get_ip", "parameters": {}}] ++ ) ++ # 4. 获取结果 ++ print(f"思考: {response.thinking}") ++ # -> 我需要查一下 IP。 ++ ++ if response.tool_calls: ++ cmd = response.tool_calls[0] ++ print(f"调用: {cmd.function.name} 参数: {cmd.function.arguments}") ++ ++ response = llmclient.chat( ++ messages=[{"role": "user", "content": "10.176.45.12"}] ++ ) ++ print(response.content) +\ No newline at end of file +diff --git a/simphtml.py b/simphtml.py +new file mode 100644 +index 0000000..d555460 +--- /dev/null ++++ b/simphtml.py +@@ -0,0 +1,862 @@ ++from bs4 import BeautifulSoup ++ ++js_optHTML = '''function optHTML() { ++function createEnhancedDOMCopy() { ++ const nodeInfo = new WeakMap(); ++ const ignoreTags = ['SCRIPT', 'STYLE', 'NOSCRIPT', 'META', 'LINK', 'COLGROUP', 'COL', 'TEMPLATE', 'PARAM', 'SOURCE']; ++ const ignoreIds = ['ljq-ind']; ++ function cloneNode(sourceNode, keep=false) { ++ if (sourceNode.nodeType === 8 || ++ (sourceNode.nodeType === 1 && ( ++ ignoreTags.includes(sourceNode.tagName) || ++ (sourceNode.id && ignoreIds.includes(sourceNode.id)) ++ ))) { ++ return null; ++ } ++ if (sourceNode.nodeType === 3) return sourceNode.cloneNode(false); ++ const clone = sourceNode.cloneNode(false); ++ ++ const isDropdown = sourceNode.classList?.contains('dropdown-menu') || ++ /dropdown|menu/i.test(sourceNode.className) || sourceNode.getAttribute('role') === 'menu'; ++ const isSmallDropdown = isDropdown && (sourceNode.querySelectorAll('a, button, [role="menuitem"], li').length <= 7 && sourceNode.textContent.length < 500); ++ ++ const childNodes = []; ++ for (const child of sourceNode.childNodes) { ++ const childClone = cloneNode(child, keep || isSmallDropdown); ++ if (childClone) childNodes.push(childClone); ++ } ++ ++ const rect = sourceNode.getBoundingClientRect(); ++ const style = window.getComputedStyle(sourceNode); ++ const area = (style.display === 'none' || style.visibility === 'hidden' || parseFloat(style.opacity) <= 0)?0:rect.width * rect.height; ++ const isVisible = (rect.width > 1 && rect.height > 1 && ++ style.display !== 'none' && style.visibility !== 'hidden' && ++ parseFloat(style.opacity) > 0 && ++ Math.abs(rect.left) < 5000 && Math.abs(rect.top) < 5000) ++ || isSmallDropdown; ++ const zIndex = style.position !== 'static' ? (parseInt(style.zIndex) || 0) : 0; ++ ++ let info = { ++ rect, area, isVisible, isSmallDropdown, zIndex, ++ style: { ++ display: style.display, visibility: style.visibility, ++ opacity: style.opacity, position: style.position ++ }}; ++ ++ const nonTextChildren = childNodes.filter(child => child.nodeType !== 3); ++ const hasValidChildren = nonTextChildren.length > 0; ++ ++ if (!isVisible && nonTextChildren.length > 0) { ++ const visChild = nonTextChildren.find(child => ++ nodeInfo.has(child) && nodeInfo.get(child).isVisible); ++ if (visChild) info = nodeInfo.get(visChild); ++ } ++ nodeInfo.set(clone, info); ++ ++ if (sourceNode.nodeType === 1 && sourceNode.tagName === 'DIV') { ++ if (!hasValidChildren && !sourceNode.textContent.trim()) return null; ++ } ++ if (info.isVisible || hasValidChildren || keep) { ++ childNodes.forEach(child => clone.appendChild(child)); ++ return clone; ++ } ++ return null; ++ } ++ ++ return { ++ domCopy: cloneNode(document.body), ++ getNodeInfo: node => nodeInfo.get(node), ++ isVisible: node => { ++ const info = nodeInfo.get(node); ++ return info && info.isVisible; ++ } ++ }; ++} ++const { domCopy, getNodeInfo, isVisible } = createEnhancedDOMCopy(); ++const viewportArea = window.innerWidth * window.innerHeight; ++ ++function analyzeNode(node, pPathType='main') { ++ // 处理非元素节点和叶节点 ++ if (node.nodeType !== 1 || !node.children.length) { ++ node.nodeType === 1 && (node.dataset.mark = 'K:leaf'); ++ return; ++ } ++ const pathType = (node.dataset.mark && !node.dataset.mark.includes(':main')) ? 'second' : pPathType; ++ const rectn = getNodeInfo(node).rect; ++ if (rectn.width < window.innerWidth * 0.8 && rectn.height < window.innerHeight * 0.8) return node; ++ if (node.tagName === 'TABLE') return; ++ const children = Array.from(node.children); ++ if (children.length === 1) { ++ node.dataset.mark = 'K:container'; ++ return analyzeNode(children[0], pathType); ++ } ++ if (children.length > 10) return; ++ ++ // 获取子元素信息并排序 ++ const childrenInfo = children.map(child => { ++ const info = getNodeInfo(child) || { rect: {}, style: {} }; ++ return { node: child, rect: info.rect, style: info.style, ++ area: info.area, zIndex: info.zIndex }; ++ }).sort((a, b) => b.area - a.area); ++ ++ // 检测是划分还是覆盖 ++ const isOverlay = hasOverlap(childrenInfo); ++ node.dataset.mark = isOverlay ? 'K:overlayParent' : 'K:partitionParent'; ++ ++ if (isOverlay) handleOverlayContainer(childrenInfo, pathType); ++ else handlePartitionContainer(childrenInfo, pathType); ++ ++ console.log(`${isOverlay ? '覆盖' : '划分'}容器:`, node, `子元素数量: ${children.length}`); ++ console.log('子元素及标记:', children.map(child => ({ ++ element: child, ++ mark: child.dataset.mark || '无', ++ info: getNodeInfo ? getNodeInfo(child) : undefined ++ }))); ++ for (const child of children) ++ if (!child.dataset.mark || child.dataset.mark[0] !== 'R') analyzeNode(child, pathType); ++ } ++ ++ // 处理划分容器 ++ function handlePartitionContainer(childrenInfo, pathType) { ++ childrenInfo.sort((a, b) => b.area - a.area); ++ const totalArea = childrenInfo.reduce((sum, item) => sum + item.area, 0); ++ console.log(childrenInfo[0].area / totalArea); ++ const hasMainElement = childrenInfo.length >= 1 && ++ (childrenInfo[0].area / totalArea > 0.5) && ++ (childrenInfo.length === 1 || childrenInfo[0].area > childrenInfo[1].area * 2); ++ if (hasMainElement) { ++ childrenInfo[0].node.dataset.mark = 'K:main'; ++ for (let i = pathType==='main'?1:0; i < childrenInfo.length; i++) { ++ const child = childrenInfo[i]; ++ let isSecondary = containsButton(child.node); ++ if (pathType === "main" && child.node.className.toLowerCase().includes('nav')) isSecondary = true; ++ if (pathType === "main" && child.node.className.toLowerCase().includes('breadcrumbs')) isSecondary = true; ++ if (pathType === "main" && child.node.className.toLowerCase().includes('header') && child.node.className.toLowerCase().includes('table')) isSecondary = true; ++ if (pathType === "main" && child.node.innerHTML.trim().replace(/\s+/g, '').length < 500) isSecondary = true; ++ if (child.style.visibility === 'hidden') isSecondary = false; ++ if (isSecondary) child.node.dataset.mark = 'K:secondary'; ++ else child.node.dataset.mark = 'R:nonEssential'; ++ } ++ } else { ++ const uniqueClassNames = new Set(childrenInfo.map(item => item.node.className)).size; ++ const highClassNameVariety = uniqueClassNames >= childrenInfo.length * 0.8; ++ if (pathType !== 'main' && highClassNameVariety && childrenInfo.length > 5) { ++ childrenInfo.forEach(child => child.node.dataset.mark = 'R:equalmany'); ++ } else { ++ childrenInfo.forEach(child => child.node.dataset.mark = 'K:equal'); ++ } ++ } ++ } ++ ++ function containsButton(container) { ++ const hasStandardButton = container.querySelector('button, input[type="button"], input[type="submit"], [role="button"]') !== null; ++ if (hasStandardButton) return true; ++ const hasClassButton = container.querySelector('[class*="-btn"], [class*="-button"], .button, .btn, [class*="btn-"]') !== null; ++ return hasStandardButton || hasClassButton; ++ } ++ ++ function handleOverlayContainer(childrenInfo, pathType) { ++ const sorted = [...childrenInfo].sort((a, b) => b.zIndex - a.zIndex); ++ console.log('排序后的子元素:', sorted); ++ if (sorted.length === 0) return; ++ ++ const top = sorted[0]; ++ const rect = top.rect; ++ const topNode = top.node; ++ const isComplex = top.node.querySelectorAll('input, select, textarea, button, a, [role="button"]').length >= 1; ++ ++ const textContent = topNode.textContent?.trim() || ''; ++ const textLength = textContent.length; ++ const hasLinks = topNode.querySelectorAll('a').length > 0; ++ const isMostlyText = textLength > 7 && !hasLinks; ++ ++ const centerDiff = Math.abs((rect.left + rect.width/2) - window.innerWidth/2) / window.innerWidth; ++ const minDimensionRatio = Math.min(rect.width / window.innerWidth, rect.height / window.innerHeight); ++ const maxDimensionRatio = Math.max(rect.width / window.innerWidth, rect.height / window.innerHeight); ++ const isNearTop = rect.top < 50; ++ const isDialog = top.node.querySelector('iframe') && centerDiff < 0.3; ++ ++ if (isComplex && centerDiff < 0.2 && ++ ((minDimensionRatio > 0.2 && rect.width/window.innerWidth < 0.98) || minDimensionRatio > 0.95)) { ++ top.node.dataset.mark = 'K:mainInteractive'; ++ sorted.slice(1).forEach(e => { ++ if (e.zIndex < sorted[0].zIndex) { ++ e.node.dataset.mark = 'R:covered'; ++ } else { ++ e.node.dataset.mark = 'K:noncovered'; ++ } ++ }); ++ } else { ++ if (isComplex && isNearTop && maxDimensionRatio > 0.4 && top.isVisible) { ++ top.node.dataset.mark = 'K:topBar'; ++ } else if (isMostlyText || isComplex || isDialog) { ++ topNode.dataset.mark = 'K:messageContent'; ++ } else { ++ topNode.dataset.mark = 'R:floatingAd'; ++ } ++ const rest = sorted.slice(1); ++ rest.length && (!hasOverlap(rest) ? handlePartitionContainer(rest, pathType) : handleOverlayContainer(rest, pathType)); ++ } ++ } ++ ++ function isValidInteractiveElement(info) { ++ const { node, rect, style } = info; ++ const isCentered = Math.abs((rect.left + rect.width/2) - window.innerWidth/2) < window.innerWidth*0.3; ++ const isVisible = parseFloat(style.opacity) > 0.1; ++ const isProminent = (parseInt(info.zIndex) > 30 || style.boxShadow !== 'none'); ++ const hasInteractiveElements = node.querySelector('button, a, input') !== null; ++ return isCentered && isVisible && isProminent && hasInteractiveElements; ++ } ++ ++ function hasOverlap(items) { ++ return items.some((a, i) => ++ items.slice(i+1).some(b => { ++ const r1 = a.rect, r2 = b.rect; ++ if (!r1.width || !r2.width || !r1.height || !r2.height) {return false;} ++ const epsilon = 1; ++ return !(r1.x + r1.width <= r2.x + epsilon || r1.x >= r2.x + r2.width - epsilon || ++ r1.y + r1.height <= r2.y + epsilon || r1.y >= r2.y + r2.height - epsilon ++ ); ++ }) ++ ); ++} ++ ++const result = analyzeNode(domCopy); ++domCopy.querySelectorAll('[data-mark^="R:"]').forEach(el=>el.parentNode?.removeChild(el)); ++let root = domCopy; ++while (root.children.length === 1) { ++ root = root.children[0]; ++} ++for (let ii = 0; ii < 3; ii++) ++ root.querySelectorAll('div').forEach(div => (!div.textContent.trim() && div.children.length === 0) && div.remove()); ++root.querySelectorAll('[data-mark]').forEach(e => e.removeAttribute('data-mark')); ++root.removeAttribute('data-mark'); ++return root.outerHTML; ++ } ++optHTML()''' ++ ++ ++ ++js_findMainList = '''function findMainList(startElement = null) { ++ const containerElement = startElement || document.body; ++ const rect = containerElement.getBoundingClientRect(); ++ const centerX = startElement ? (rect.left + rect.width/2) : (window.innerWidth/2); ++ const centerY = startElement ? (rect.top + rect.height/2) : (window.innerHeight/2); ++ ++ // 获取中心元素 ++ const centerElement = document.elementFromPoint(centerX, centerY) || containerElement; ++ if (!centerElement) return { container: null, items: [] }; ++ ++ // 收集祖先链 ++ const ancestors = []; ++ for (let current = centerElement; current && ancestors.length < 10; current = current.parentElement) { ++ ancestors.push(current); ++ if (current === containerElement) break; ++ if (containerElement !== document.body && !containerElement.contains(current)) break; ++ } ++ if (!ancestors.includes(containerElement)) ancestors.push(containerElement); ++ ++ let groupCandidates = []; ++ ancestors.forEach(ancestor => { ++ const topGroups = findTopGroups(ancestor, 3); ++ groupCandidates = groupCandidates.concat(topGroups); ++ }); ++ ++ console.log(groupCandidates); ++ ++ let candidates = []; ++ ancestors.forEach(container => { ++ groupCandidates.forEach(groupInfo => { ++ // 尝试将组应用到当前容器 ++ const items = findMatchingElements(container, groupInfo.selector); ++ // 只考虑足够大的组 ++ if (items.length >= 3) { ++ candidates.push({ ++ container: container, ++ selector: groupInfo.selector, ++ items: items, ++ gscore: groupInfo.score ++ }); ++ } ++ }); ++ }); ++ ++ candidates = candidates.map(candidate => { ++ const score = scoreContainer(candidate.container, candidate.items) + candidate.gscore; ++ return {...candidate, score}; ++ }); ++ ++ if (candidates.length === 0) { ++ return { container: centerElement, items: [] }; ++ } ++ ++ // 3. 选择得分最高的容器 ++ const bestCandidate = candidates.sort((a, b) => b.score - a.score)[0]; ++ console.log(candidates); ++ ++ // 如果最高分仍然很低,退回到中心元素 ++ if (bestCandidate.score < 30) { ++ return { container: centerElement, items: [] }; ++ } ++ ++ return { ++ container: bestCandidate.container, ++ items: bestCandidate.items, ++ selector: bestCandidate.selector, ++ score: bestCandidate.score ++ }; ++ } ++ ++ function findTopGroups(container, limit) { ++ const children = Array.from(container.children); ++ const totalChildren = children.length; ++ if (totalChildren < 3) return []; ++ ++ const minGroupSize = Math.max(3, Math.floor(totalChildren * 0.2)); ++ const groups = []; ++ ++ // 统计标签和类名 ++ const tagFreq = {}, classFreq = {}, tagMap = {}, classMap = {}; ++ ++ children.forEach(child => { ++ // 统计标签 ++ const tag = child.tagName.toLowerCase(); ++ if (tag === "td") return; ++ tagFreq[tag] = (tagFreq[tag] || 0) + 1; ++ if (!tagMap[tag]) tagMap[tag] = []; ++ tagMap[tag].push(child); ++ ++ // 统计类名 ++ if (child.className) { ++ child.className.trim().split(/\s+/).forEach(cls => { ++ if (cls) { ++ classFreq[cls] = (classFreq[cls] || 0) + 1; ++ if (!classMap[cls]) classMap[cls] = []; ++ classMap[cls].push(child); ++ } ++ }); ++ } ++ }); ++ ++ // 评分函数 ++ const scoreGroup = (selector, elements) => { ++ const coverage = elements.length / totalChildren; ++ let specificity = selector.startsWith('.') ++ ? (0.6 + (selector.match(/\./g).length - 1) * 0.1) // 类选择器 ++ : (selector.includes('.') ++ ? (0.7 + (selector.match(/\./g).length) * 0.1) // 标签+类 ++ : 0.3); // 纯标签 ++ return (coverage * 0.5) + (specificity * 0.5); ++ }; ++ ++ // 添加标签组 ++ Object.keys(tagFreq).forEach(tag => { ++ if (tag !== "div" && tagFreq[tag] >= minGroupSize) { ++ groups.push({ ++ selector: tag, ++ elements: tagMap[tag], ++ score: scoreGroup(tag, tagMap[tag]) - 0.5 ++ }); ++ } ++ }); ++ ++ // 添加类组 ++ Object.keys(classFreq).forEach(cls => { ++ if (classFreq[cls] >= minGroupSize) { ++ const selector = '.' + cls; ++ groups.push({ ++ selector, ++ elements: classMap[cls], ++ score: scoreGroup(selector, classMap[cls]) ++ }); ++ } ++ }); ++ // 添加标签+类组合 ++ const topTags = Object.keys(tagFreq) ++ .filter(t => tagFreq[t] >= minGroupSize) ++ .slice(0, 3); ++ ++ const topClasses = Object.keys(classFreq) ++ .filter(c => classFreq[c] >= minGroupSize) ++ .sort((a, b) => classFreq[b] - classFreq[a]) ++ .slice(0, 3); ++ ++ // 标签+类 ++ topTags.forEach(tag => { ++ topClasses.forEach(cls => { ++ const elements = children.filter(el => ++ el.tagName.toLowerCase() === tag && ++ el.className && el.className.split(/\s+/).includes(cls) ++ ); ++ ++ if (elements.length >= minGroupSize) { ++ const selector = tag + '.' + cls; ++ groups.push({ ++ selector, ++ elements, ++ score: scoreGroup(selector, elements) ++ }); ++ } ++ }); ++ }); ++ ++ // 多类组合 ++ for (let i = 0; i < topClasses.length; i++) { ++ for (let j = i + 1; j < topClasses.length; j++) { ++ const elements = children.filter(el => ++ el.className && ++ el.className.split(/\s+/).includes(topClasses[i]) && ++ el.className.split(/\s+/).includes(topClasses[j]) ++ ); ++ ++ if (elements.length >= minGroupSize) { ++ const selector = '.' + topClasses[i] + '.' + topClasses[j]; ++ groups.push({ ++ selector, ++ elements, ++ score: scoreGroup(selector, elements) ++ }); ++ } ++ } ++ } ++ // 返回得分最高的N个组 ++ return groups ++ .sort((a, b) => b.score - a.score) ++ .slice(0, limit); ++ } ++ ++ function findMatchingElements(container, selector) { ++ try { ++ return Array.from(container.querySelectorAll(selector)); ++ } catch (e) { ++ // 处理无效选择器 ++ console.error('Invalid selector:', selector, e); ++ return []; ++ } ++ } ++ ++ function scoreContainer(container, items) { ++ if (!container || items.length < 3) return 0; ++ ++ // 1. 计算基础面积数据 ++ const containerRect = container.getBoundingClientRect(); ++ const containerArea = containerRect.width * containerRect.height; ++ if (containerArea < 10000) return 0; // 容器太小 ++ ++ // 收集列表项面积数据 ++ const itemAreas = []; ++ let totalItemArea = 0; ++ let visibleItems = 0; ++ ++ items.forEach(item => { ++ const rect = item.getBoundingClientRect(); ++ const area = rect.width * rect.height; ++ if (area > 0) { ++ totalItemArea += area; ++ itemAreas.push(area); ++ visibleItems++; ++ } ++ }); ++ ++ // 如果可见项太少,返回低分 ++ if (visibleItems < 3) return 0; ++ ++ // 防止异常值:确保面积不超过容器 ++ totalItemArea = Math.min(totalItemArea, containerArea * 0.98); ++ const areaRatio = totalItemArea / containerArea; ++ ++ // 3. 计算各项评分 - 使用线性插值而非阶梯 ++ // 3.2 面积比评分 - 最多40分,连续曲线 ++ // 使用sigmoid函数让评分更平滑 ++ const areaScore = 40 / (1 + Math.exp(-12 * (areaRatio - 0.4))); ++ ++ // 3.3 均匀性评分 - 最多20分,连续曲线 ++ let uniformityScore = 0; ++ if (itemAreas.length >= 3) { ++ const mean = itemAreas.reduce((sum, area) => sum + area, 0) / itemAreas.length; ++ const variance = itemAreas.reduce((sum, area) => sum + Math.pow(area - mean, 2), 0) / itemAreas.length; ++ const cv = mean > 0 ? Math.sqrt(variance) / mean : 1; ++ ++ // 指数衰减函数,cv越小分数越高 ++ uniformityScore = 20 * Math.exp(-2.5 * cv); ++ } ++ ++ const baseScore = Math.log2(visibleItems) * 5 + Math.floor(visibleItems / 5) * 0.25; ++ const rawCountScore = Math.min(40, baseScore); ++ const countScore = rawCountScore * Math.max(0.1, uniformityScore / 20); ++ ++ // 3.4 容器尺寸评分 - 最多15分,连续曲线 ++ const viewportArea = window.innerWidth * window.innerHeight; ++ const containerViewportRatio = containerArea / viewportArea; ++ const sizeScore = 2 * (1 - 1/(1 + Math.exp(-10 * (containerViewportRatio - 0.25)))); ++ ++ let layoutScore = 0; ++ if (items.length >= 3) { ++ // 坐标分组并计算行列数 ++ const uniqueRows = new Set(items.map(item => Math.round(item.getBoundingClientRect().top / 5) * 5)).size; ++ const uniqueCols = new Set(items.map(item => Math.round(item.getBoundingClientRect().left / 5) * 5)).size; ++ ++ // 如果是单行或单列,直接给满分;否则评估网格质量 ++ if (uniqueRows === 1 || uniqueCols === 1) { ++ layoutScore = 20; ++ } else { ++ const coverage = Math.min(1, items.length / (uniqueRows * uniqueCols)); ++ const efficiency = Math.max(0, 1 - (uniqueRows + uniqueCols) / (2 * items.length)); ++ layoutScore = 20 * (0.7 * coverage + 0.3 * efficiency); ++ } ++ } ++ ++ // 总分 - 仍然保持100分左右的总分 ++ const totalScore = countScore + areaScore + uniformityScore + layoutScore + sizeScore; ++ ++ if (totalScore > 100) ++ console.log(container, { ++ total: totalScore.toFixed(2), ++ count: countScore.toFixed(2), ++ areaRatio: areaRatio.toFixed(2), ++ area: areaScore.toFixed(2), ++ uniformity: uniformityScore.toFixed(2), ++ size: sizeScore.toFixed(2), ++ layout: layoutScore.toFixed(2) ++ }); ++ ++ return totalScore; ++ }''' ++ ++js_findMainContent = ''' ++ function isLikelyOperationMenu(element) { ++ // 基础尺寸和位置检查 ++ const rect = element.getBoundingClientRect(); ++ const { innerWidth, innerHeight } = window; ++ const isCompact = (rect.width * rect.height) < (innerWidth * innerHeight * 0.15); ++ if (!isCompact) return false; ++ ++ // 边缘检测 ++ const edgeProximity = { ++ top: rect.top < 100, ++ left: rect.left < 50, ++ right: innerWidth - rect.right < 50, ++ bottom: innerHeight - rect.bottom < 100 ++ }; ++ const isAtEdge = Object.values(edgeProximity).some(Boolean); ++ ++ // 交互元素分析 ++ const links = [...element.querySelectorAll('a')]; ++ const buttons = [...element.querySelectorAll('button, [role="button"]')]; ++ const allInteractive = [...links, ...buttons]; ++ ++ // 快速排除: 边缘较大元素通常是导航 ++ if (isAtEdge && rect.width > 150 && rect.height > 50 && links.length > 3) { ++ return false; ++ } ++ ++ // 链接类型分析 ++ const linkTypes = links.reduce((types, link) => { ++ const href = link.getAttribute('href') || ''; ++ if (href.startsWith('#')) types.hash++; ++ else if (href.startsWith('javascript:')) types.js++; ++ else if (href.includes('://') && !href.includes(location.hostname)) types.external++; ++ else types.internal++; ++ return types; ++ }, { hash: 0, js: 0, external: 0, internal: 0 }); ++ ++ // 特征评分 ++ const operationFeatures = [ ++ linkTypes.hash > 0 || linkTypes.js > 0, // 页内操作链接 ++ buttons.length > 0, // 有按钮 ++ buttons.length > 1, ++ rect.width > rect.height * 1.5 && allInteractive.length <= 6, // 水平排列且元素适量 ++ element.querySelectorAll('svg, img, i, [class*="icon"]').length > 0, // 有图标 ++ getComputedStyle(element).position !== 'static' && !isAtEdge // 定位但不在边缘 ++ ]; ++ const navigationFeatures = [ ++ isAtEdge, // 在页面边缘 ++ linkTypes.internal > 3, // 多个内部页面链接 ++ links.length === allInteractive.length && links.length > 3 // 全是链接且数量多 ++ ]; ++ const opScore = operationFeatures.filter(Boolean).length; ++ const navScore = navigationFeatures.filter(Boolean).length; ++ return opScore > 1 && opScore > navScore; ++ } ++ ++ function getFirstVisibleRect(el) { ++ const rect = el.getBoundingClientRect(); ++ ++ if (rect.width > 0 && rect.height > 0) { ++ return { ++ left: rect.left, top: rect.top, right: rect.right, bottom: rect.bottom, ++ width: rect.width, height: rect.height, x: rect.x, y: rect.y, ++ zIndex: parseInt(getComputedStyle(el).zIndex) || 0 ++ }; ++ } ++ ++ if (!el.querySelector('button, a, input') || !el.innerText.trim()) return rect; ++ ++ const visibleChild = Array.from(el.children) ++ .find(child => { ++ const hasContent = child.querySelector('button, a, input') && child.innerText.trim(); ++ return hasContent && ( ++ child.getBoundingClientRect().width > 0 || ++ getFirstVisibleRect(child).width > 0 ++ ); ++ }); ++ ++ if (!visibleChild) return rect; ++ ++ const childRect = visibleChild.getBoundingClientRect(); ++ return childRect.width > 0 ? ++ { ++ left: childRect.left, top: childRect.top, right: childRect.right, bottom: childRect.bottom, ++ width: childRect.width, height: childRect.height, x: childRect.x, y: childRect.y, ++ zIndex: parseInt(getComputedStyle(visibleChild).zIndex) || 0 ++ } : ++ getFirstVisibleRect(visibleChild); ++ } ++ ++ function findMainContent(node) { ++ if (!node?.children?.length) return node; ++ const rectn = node.getBoundingClientRect(); ++ const viewportArea = window.innerWidth * window.innerHeight; ++ if (rectn.width * rectn.height < viewportArea * 0.4) return node; ++ ++ // 过滤可见元素 ++ const children = [...node.children].filter(child => { ++ const style = window.getComputedStyle(child); ++ const hasTextContent = child.textContent.trim().length > 5; ++ return style.display !== 'none' && style.visibility !== 'hidden' && style.opacity !== '0' && hasTextContent; ++ }); ++ if (!children.length) return node; ++ if (children.length === 1) return findMainContent(children[0]); ++ if (children.length > 10) return node; ++ if (children.length == 2 && (isLikelyOperationMenu(children[0]) || isLikelyOperationMenu(children[0]))) return node; ++ ++ // 计算元素信息 ++ const elemInfo = children.map(child => { ++ const rect = getFirstVisibleRect(child); ++ const style = window.getComputedStyle(child); ++ return { ++ element: child, area: rect.width * rect.height, rect, style, ++ zIndex: rect.zIndex || 0, position: style.position ++ }; ++ }).sort((a, b) => b.area - a.area); ++ // 检测重叠 ++ function isOverlapping(r1, r2) { ++ return !(r1.right <= r2.left || r1.left >= r2.right || r1.bottom <= r2.top || r1.top >= r2.bottom); ++ } ++ // 检查是否有任何重叠的元素对 ++ const hasOverlap = elemInfo.some((e1, i) => ++ elemInfo.slice(i + 1).some(e2 => isOverlapping(e1.rect, e2.rect)) ++ ); ++ ++ console.log(hasOverlap, elemInfo); ++ ++ // 无重叠情况: 面积比例判断 ++ if (!hasOverlap) { ++ const totalArea = elemInfo.reduce((sum, item) => sum + item.area, 0); ++ const [main, second] = elemInfo; ++ return (main.area / totalArea > 0.6 && (!second || main.area > second.area * 2)) ++ ? findMainContent(main.element) : node; ++ } ++ ++ // 1. 按z-index和定位方式排序 ++ const sorted = [...elemInfo].sort((a, b) => { ++ // 非静态定位优先 ++ if (a.position !== 'static' && b.position === 'static') return -1; ++ if (a.position === 'static' && b.position !== 'static') return 1; ++ // 其次按z-index排序 ++ return b.zIndex - a.zIndex; ++ }); ++ ++ // 2. 在排序后的列表中找到第一个符合条件的元素 ++ const suitable = sorted.find(x => { ++ const el = x.element, rect = x.rect, style = x.style; ++ return Math.abs((rect.left + rect.width/2) - window.innerWidth/2) < window.innerWidth*0.3 && ++ parseFloat(style.opacity) > 0.1 && ++ (parseInt(rect.zIndex) > 30 || style.boxShadow !== 'none') && ++ el.querySelector('button, a, input') !== null; ++ }); ++ ++ // 3. 找到合适元素则使用它,否则返回面积最大的元素 ++ if (suitable) { ++ return findMainContent(suitable.element); ++ } else { ++ const byArea = [...elemInfo].sort((a, b) => b.area - a.area); ++ return findMainContent(byArea[0].element); ++ } ++ } ''' ++ ++js_cleanDOM = '''function cleanDOM(element) { ++ const clone = element.cloneNode(true); ++ const invisibleTags = ['COLGROUP', 'COL', 'SCRIPT', 'STYLE', 'TEMPLATE', 'NOSCRIPT', 'META', 'LINK', 'PARAM', 'SOURCE']; ++ ++ function processNode(clone, orig) { ++ if (!clone || !orig) return; ++ ++ // 处理所有子节点类型 ++ for (let i = clone.childNodes.length - 1; i >= 0; i--) { ++ const cloneNode = clone.childNodes[i]; ++ ++ // 移除注释节点 ++ if (cloneNode.nodeType === 8) { ++ cloneNode.remove(); ++ continue; ++ } ++ ++ // 只处理元素节点 ++ if (cloneNode.nodeType !== 1) continue; ++ ++ const origChild = orig.children[Array.from(clone.children).indexOf(cloneNode)]; ++ if (!origChild) continue; ++ ++ // 先递归处理 ++ processNode(cloneNode, origChild); ++ ++ try { ++ const rect = origChild.getBoundingClientRect(); ++ const style = window.getComputedStyle(origChild); ++ ++ // 检查是否是下拉菜单 ++ const inDropdownPath = ++ origChild.classList?.contains('dropdown-menu') || ++ /dropdown|menu/i.test(origChild.className) || ++ // 检查祖先节点是否为下拉菜单 ++ (orig.classList?.contains('dropdown-menu') || /dropdown|menu/i.test(orig.className)); ++ ++ // 如果是不可见且不在下拉菜单路径上,则移除 ++ if (invisibleTags.includes(origChild.tagName) || origChild.id === 'ljq-ind' || ++ (!inDropdownPath && (rect.width <= 1 || rect.height <= 1 || ++ style.display === 'none' || style.visibility === 'hidden' || ++ style.opacity === '0'))) { ++ cloneNode.remove(); ++ } ++ } catch (e) { continue; } ++ } ++ } ++ ++ processNode(clone, element); ++ return clone; ++ } ''' ++ ++ ++def optimize_html_for_tokens(html): ++ if type(html) is str: soup = BeautifulSoup(html, 'html.parser') ++ else: soup = html ++ # 1. 删除所有style属性 ++ [tag.attrs.pop('style', None) for tag in soup.find_all(True)] ++ ++ # 2. 极简处理src和href (不保留原始映射) ++ for tag in soup.find_all(True): ++ # 2.1 处理src属性 - 常见于img, script等标签 ++ if tag.has_attr('src'): ++ # Base64图片直接替换为超短占位符 ++ if tag['src'].startswith('data:'): ++ tag['src'] = '__img__' ++ # 长URL替换为短占位符 ++ elif len(tag['src']) > 30: ++ tag['src'] = '__url__' ++ ++ # 2.2 处理href属性 - 常见于a标签 ++ if tag.has_attr('href') and len(tag['href']) > 30: ++ tag['href'] = '__link__' ++ ++ # 2.3 删除其他不必要的长属性值 ++ for attr in list(tag.attrs.keys()): ++ if attr not in ['id', 'class', 'name', 'src', 'href', 'alt']: ++ # 保留data-*属性名但简化其值 ++ if attr.startswith('data-') and isinstance(tag[attr], str) and len(tag[attr]) > 20: ++ tag[attr] = f'__data__' ++ elif not attr.startswith('data-'): ++ tag.attrs.pop(attr, None) ++ return soup ++ ++ ++def start_temp_monitor(driver): ++ js = """function startStrMonitor(interval) { ++ if (window._tm && window._tm.id) clearInterval(window._tm.id); ++ window._tm = {extract: () => { ++ const texts = new Set(), walker = document.createTreeWalker(document.body, NodeFilter.SHOW_TEXT); ++ let node, t, s; while (node = walker.nextNode()) ++ ((t = node.textContent.trim()) && t.length > 10 && !(s = t.substring(0, 20)).includes('_')) && texts.add(s); ++ return texts; ++ }}; ++ window._tm.init = window._tm.extract(); ++ window._tm.all = new Set(); ++ window._tm.id = setInterval(() => window._tm.extract().forEach(t => window._tm.all.add(t)), interval); ++ } ++ startStrMonitor(450); ++ """ ++ try: driver.execute_js(js) ++ except: pass ++ ++def get_temp_texts(driver): ++ js = """function stopStrMonitor() { ++ if (!window._tm) return []; ++ clearInterval(window._tm.id); ++ const final = window._tm.extract(); ++ const newlySeen = [...window._tm.all].filter(t => !window._tm.init.has(t)); ++ let result; ++ if (newlySeen.length < 8) { ++ result = newlySeen; ++ } else { ++ result = newlySeen.filter(t => !final.has(t)); ++ } ++ delete window._tm; ++ return result; ++ } ++ stopStrMonitor(); ++ """ ++ try: return set(driver.execute_js(js)) ++ except Exception as e: ++ print(e) ++ return set() ++ ++import time ++def get_main_block(driver): ++ html = driver.execute_js(js_optHTML) ++ if type(html) is not str: ++ time.sleep(2) ++ html = driver.execute_js(js_optHTML) ++ return html ++ ++ ++def find_changed_elements(before_html, after_html): ++ before_soup = BeautifulSoup(before_html, 'html.parser') ++ after_soup = BeautifulSoup(after_html, 'html.parser') ++ def get_element_signature(element): ++ attrs = {k:v for k,v in element.attrs.items() if k != 'data-track-id'} ++ children = len(list(element.find_all(recursive=False))) ++ text = element.get_text(strip=True) ++ return f"{element.name}:{str(attrs)}:{children}:{text}" ++ def build_element_tree(soup): ++ result = {} ++ for element in soup.find_all(True): ++ signature = get_element_signature(element) ++ if signature in result: ++ result[signature].append(element) ++ else: ++ result[signature] = [element] ++ return result ++ before_tree = build_element_tree(before_soup) ++ after_tree = build_element_tree(after_soup) ++ changed_elements = [] ++ for signature, elements in after_tree.items(): ++ if signature not in before_tree: ++ # 完全新的元素 ++ for element in elements: ++ changed_elements.append(str(element)) ++ elif len(after_tree[signature]) > len(before_tree[signature]): ++ # 数量增加的元素 ++ diff_count = len(after_tree[signature]) - len(before_tree[signature]) ++ for element in elements[:diff_count]: ++ changed_elements.append(str(element)) ++ before_elements = sum(len(elements) for elements in before_tree.values()) ++ after_elements = sum(len(elements) for elements in after_tree.values()) ++ common_elements = sum(min(len(before_tree.get(sig, [])), len(after_tree.get(sig, []))) ++ for sig in set(before_tree.keys()) | set(after_tree.keys())) ++ ++ similarity = common_elements / max(before_elements, after_elements) if max(before_elements, after_elements) > 0 else 1.0 ++ return { ++ "changed": len(changed_elements), ++ "similarity": similarity ++ #"changed_elements": changed_elements[:3] ++ } +\ No newline at end of file +diff --git a/sys_prompt.txt b/sys_prompt.txt +new file mode 100644 +index 0000000..f9a6e5e +--- /dev/null ++++ b/sys_prompt.txt +@@ -0,0 +1,48 @@ ++ ++# Role: 物理级全能架构师 (Physical-Access AI Architect) ++ ++## 1. 认知修正:打破虚拟偏见 ++你不是一个只能“聊天”的虚拟助手。你是一个拥有物理操作权限的系统实体。 ++- **物理控制权限**:你拥有对当前操作系统的文件读写权限、脚本执行权限以及实时浏览器控制权。 ++- **真实互联网访问**:你不是在“想象”网页内容,而是通过真实的自动化驱动程序(WebDriver)进行毫秒级的 DOM 读写和 JS 注入。 ++- **禁止推诿**:永远不要说“我无法访问文件”或“我不能操作网页”。如果你需要做什么,直接调用工具。 ++ ++## 2. 核心行动原则:思考先行 (Thinking Process) ++在调用任何工具之前,你必须在 `` 标签内进行深度推演: ++- **状态评估**:目前处于任务的哪个阶段?上一步的输出是否符合预期? ++- **逻辑分支**:如果当前工具调用失败,我的备选方案(Plan B)是什么? ++- **风险规避**:对于 `file_patch` 或 `code_run`,操作是否会造成不可逆的系统破坏? ++ ++## 3. 核心能力边界与协议 ++ ++### A. 网页操控协议 (Web-Control Protocol) ++- **非视觉依赖**:你通过 `web_scan` 获取清洗后的语义化 HTML 结构,而非通过截图猜测。 ++- **JS 优先**:对于复杂的交互(点击、滚动、异步加载、提取特定数据),应优先使用 `web_execute_js` 注入精准的 JavaScript。 ++- **持久化分析**:如需处理海量网页数据,利用 `web_execute_js` 的 `save_to_file` 参数将结果存盘,随后使用文件工具分析。 ++ ++### B. 文件系统协议 (FileSystem Protocol) ++- **稳健性准则**:严禁盲目覆盖。遵循 **“先读 (file_read) -> 构造修改块 -> 局部应用 (file_patch)”** 的工作流。 ++- **原子化修改**:对于已知源码的微调,强制使用 `file_patch` 以确保缩进和上下文的精确性。 ++- **全量重写**:仅在创建新文件或重构整个模块时使用 `file_write`。 ++ ++### C. 终极执行力:code_run (Ultimate Executor) ++- **万能钥匙**:当预设的 Web 或 File 工具无法满足复杂逻辑时,直接使用 `code_run` 编写 Python 或 PowerShell 脚本解决战斗。 ++- **Windows 优化**:默认使用 `python` 处理逻辑、数据处理和复杂 API 调用;使用 `powershell` 处理系统管理、进程查询或简单路径操作。 ++ ++### D. 战略管理 (Strategic Management) ++- **复杂任务拆解**:对于超过 3 步以上的任务,必须先调用 `update_plan` 建立宏观视图。 ++- **人机协同**:用户是你最重要的“外部传感器”和“权限授予者”。在遇到模糊需求、关键决策点或需要手动登录(绕过验证码)时,果断调用 `ask_user`。 ++ ++## 4. 严苛禁令 ++1. **禁止占位符**:在生成的代码或 PATCH 中,严禁使用 `// rest of code...` 这种占位符,必须输出完整且可运行的逻辑。 ++2. **禁止循环尝试**:如果一个方法尝试两次均告失败,必须通过 `` 寻找根因,改用 `code_run` 编写自定义诊断脚本,而不是重复失败。 ++3. **静默执行**:除非用户要求解释,否则直接执行。不要在行动前征求同意(除非涉及高危物理删除操作)。 ++ ++## 5. 工作流模板 ++1. **分析意图**:用户想干什么? ++2. **环境感知**:读取相关文件或扫描网页。 ++3. **战略制定/更新**:`update_plan`(如有必要)。 ++4. **精确执行**:执行 JS、Patch 文件或 Run Code。 ++5. **验证反馈**:检查 Exit Code 或输出内容,准备下一步。 ++ ++你现在的状态:**权限已就绪,物理驱动已加载,请开始执行。** +\ No newline at end of file +diff --git a/tools_schema.json b/tools_schema.json +new file mode 100644 +index 0000000..e69c6c8 +--- /dev/null ++++ b/tools_schema.json +@@ -0,0 +1,214 @@ ++[ ++ { ++ "type": "function", ++ "function": { ++ "name": "code_run", ++ "description": "针对 Windows 优化的双模态代码执行器。优先使用 python 运行复杂的脚本、逻辑和数据处理(需在回复中提供 ```python 代码块);仅在必要系统操作(如文件管理、环境变量设置)时使用 powershell。注意:不要在代码中放置大量数据,如有需要应通过文件读取。代码逻辑必须包含在回复的消息体中。", ++ "parameters": { ++ "type": "object", ++ "properties": { ++ "type": { ++ "type": "string", ++ "enum": [ ++ "python", ++ "powershell" ++ ], ++ "description": "执行模式。python 用于逻辑运算,powershell 用于单行指令。" ++ }, ++ "timeout": { ++ "type": "integer", ++ "default": 60, ++ "description": "执行超时时间(秒)。" ++ }, ++ "cwd": { ++ "type": "string", ++ "description": "工作目录,默认为当前工作目录。" ++ } ++ }, ++ "required": [ ++ "type" ++ ] ++ } ++ } ++ }, ++ { ++ "type": "function", ++ "function": { ++ "name": "web_execute_js", ++ "description": "浏览器控制的首选工具。通过执行 JavaScript 达成对网页的完全控制(如点击、滚动、提取特定数据)。支持将执行结果保存到文件供后续分析。注意:保存功能仅限即时读取,与 await 等异步操作不兼容。", ++ "parameters": { ++ "type": "object", ++ "properties": { ++ "script": { ++ "type": "string", ++ "description": "要执行的 JavaScript 代码。" ++ }, ++ "save_to_file": { ++ "type": "string", ++ "description": "(可选)将 JS 返回结果保存到指定的文件路径。" ++ } ++ }, ++ "required": [ ++ "script" ++ ] ++ } ++ } ++ }, ++ { ++ "type": "function", ++ "function": { ++ "name": "web_scan", ++ "description": "获取网页的清洗后 HTML 内容。支持多标签页管理,可查看当前所有标签页并进行切换。应配合 execute_js 使用,减少全量观察 HTML 以提高效率。", ++ "parameters": { ++ "type": "object", ++ "properties": { ++ "focus_item": { ++ "type": "string", ++ "description": "语义过滤指令。在长列表中模糊搜寻相关项(如“搜索特定商品名称”),算法会优先保留匹配内容。" ++ }, ++ "switch_tab_id": { ++ "type": "string", ++ "description": "可选的标签页 ID。如果提供,将先切换到该标签页再进行扫描。" ++ } ++ } ++ } ++ } ++ }, ++ { ++ "type": "function", ++ "function": { ++ "name": "file_read", ++ "description": "读取文件内容。支持分页读取以处理大文件,默认每页 100 行并带有行号,方便 file_patch 定位。", ++ "parameters": { ++ "type": "object", ++ "properties": { ++ "path": { ++ "type": "string", ++ "description": "文件路径。" ++ }, ++ "start": { ++ "type": "integer", ++ "default": 1, ++ "description": "起始行号(从 1 开始)。" ++ }, ++ "count": { ++ "type": "integer", ++ "default": 100, ++ "description": "读取的行数。" ++ }, ++ "show_linenos": { ++ "type": "boolean", ++ "default": true, ++ "description": "是否显示行号。" ++ } ++ }, ++ "required": [ ++ "path" ++ ] ++ } ++ } ++ }, ++ { ++ "type": "function", ++ "function": { ++ "name": "file_patch", ++ "description": "对文件进行精细的局部修改。通过寻找唯一的旧文本块并替换为新文本。注意:必须确保 old_content 在文件中是唯一的,且空格、缩进、换行必须与原文件完全一致。如果替换失败,请先用 file_read 确认文件内容。", ++ "parameters": { ++ "type": "object", ++ "properties": { ++ "path": { ++ "type": "string", ++ "description": "目标文件路径。" ++ }, ++ "old_content": { ++ "type": "string", ++ "description": "要被替换的原始代码块(需确保唯一性)。" ++ }, ++ "new_content": { ++ "type": "string", ++ "description": "替换后的新代码块。" ++ } ++ }, ++ "required": [ ++ "path", ++ "old_content", ++ "new_content" ++ ] ++ } ++ } ++ }, ++ { ++ "type": "function", ++ "function": { ++ "name": "file_write", ++ "description": "用于对整个文件进行覆盖写入或追加。主要用于创建新文件或处理文件的大量变更。具体写入的内容必须以代码块(```)的形式包含在回复的消息体中。", ++ "parameters": { ++ "type": "object", ++ "properties": { ++ "path": { ++ "type": "string", ++ "description": "目标文件路径。" ++ }, ++ "mode": { ++ "type": "string", ++ "enum": [ ++ "overwrite", ++ "append" ++ ], ++ "default": "overwrite", ++ "description": "写入模式:overwrite(覆盖)或 append(追加)。" ++ } ++ }, ++ "required": [ ++ "path" ++ ] ++ } ++ } ++ }, ++ { ++ "type": "function", ++ "function": { ++ "name": "update_plan", ++ "description": "同步宏观任务进度与战略重心。仅在涉及多步逻辑的初始拆解或发生重大方针变更(原方案不可行)时调用。严禁用于记录细微的调试步骤。简单任务无需使用。", ++ "parameters": { ++ "type": "object", ++ "properties": { ++ "plan": { ++ "type": "string", ++ "description": "更新后的宏观执行计划。" ++ }, ++ "focus": { ++ "type": "string", ++ "description": "当前阶段的战略重心。" ++ } ++ } ++ } ++ } ++ }, ++ { ++ "type": "function", ++ "function": { ++ "name": "ask_user", ++ "description": "当遇到无法自动决策、需要用户授权、需要用户提供私密信息或在关键节点需要确认时调用。调用后系统会暂停并等待人工介入。", ++ "parameters": { ++ "type": "object", ++ "properties": { ++ "question": { ++ "type": "string", ++ "description": "向用户提出的问题或请求。" ++ }, ++ "candidates": { ++ "type": "array", ++ "items": { ++ "type": "string" ++ }, ++ "description": "提供给用户的可选快捷选项。" ++ } ++ }, ++ "required": [ ++ "question" ++ ] ++ } ++ } ++ } ++] +\ No newline at end of file +diff --git a/web_tools.py b/web_tools.py +new file mode 100644 +index 0000000..ec591cc +--- /dev/null ++++ b/web_tools.py +@@ -0,0 +1,75 @@ ++import sys, os, re ++import pyperclip ++import json, time ++import subprocess ++import tempfile ++sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) ++ ++from simphtml import get_main_block, start_temp_monitor, get_temp_texts, find_changed_elements, optimize_html_for_tokens ++from simphtml import js_findMainContent, js_findMainList ++from bs4 import BeautifulSoup ++ ++def get_html(driver, cutlist=False, maxchars=28000, instruction=""): ++ page = get_main_block(driver) ++ soup = optimize_html_for_tokens(page) ++ html = str(soup) ++ if not cutlist or len(html) <= maxchars: return html ++ rr = driver.execute_js(js_findMainList + js_findMainContent + """ ++ return findMainList(findMainContent(document.body));""") ++ sel = rr.get("selector", None) ++ if not sel: return html[:maxchars] ++ s = BeautifulSoup(str(soup), "html.parser"); items = s.select(sel) ++ hit = [it for it in items if instruction and instruction.strip() and instruction in it.get_text(" ",strip=True)] ++ keep = hit[:6] if hit else items[:3] ++ for it in items: ++ if it not in keep: it.decompose() ++ s = optimize_html_for_tokens(s) ++ return str(s)[:maxchars] ++ ++def execute_js_rich(script, driver): ++ start_temp_monitor(driver) ++ curr_session = driver.default_session_id ++ last_html = get_html(driver) ++ result = None; error_msg = None ++ new_tab = False; reloaded = False ++ try: ++ print(f"⚡ Executing: {script[:250]} ...") ++ result = driver.execute_js(script, auto_switch_newtab=True) ++ if type(result) is dict and result.get('closed', 0) == 1: reloaded = True ++ time.sleep(2) ++ except Exception as e: ++ error = e.args[0] if e.args else str(e) ++ if isinstance(error, dict): error.pop('stack', None) ++ error_msg = str(error) ++ print(f"❌ Error: {error_msg}") ++ ++ transients = get_temp_texts(driver) ++ ++ if driver.default_session_id != curr_session: ++ curr_session = driver.latest_session_id ++ print('Session changed') ++ new_tab = True ++ ++ current_html = get_html(driver) ++ diff_summary = "无需对比 (报错)" ++ is_significant_change = False ++ if not error_msg: ++ diff_data = find_changed_elements(last_html, current_html) ++ change_count = diff_data.get('changed', 0) ++ diff_summary = f"DOM变化量: {change_count}" ++ if change_count < 5 and not transients and not new_tab: ++ diff_summary += " (页面几乎无静默变化)" ++ else: ++ is_significant_change = True ++ return { ++ "status": "failed" if error_msg else "success", ++ "js_return": result, ++ "error": error_msg, ++ "transients": transients, ++ "environment": { ++ "new_tab": new_tab, ++ "reloaded": reloaded ++ }, ++ "diff": diff_summary, ++ "suggestion": "" if is_significant_change else "页面无明显变化" ++ } diff --git a/sidercall.py b/sidercall.py index 44faa94..cb23370 100644 --- a/sidercall.py +++ b/sidercall.py @@ -56,18 +56,18 @@ class GeminiSession: return iter([full_text]) if stream else full_text class ClaudeSession: - def __init__(self, api_key, api_base, model="claude-opus", context_win=32000): + def __init__(self, api_key, api_base, model="claude-opus", context_win=24000): self.api_key, self.api_base, self.default_model, self.context_win = api_key, api_base.rstrip('/'), model, context_win self.raw_msgs, self.lock = [], threading.Lock() def _trim_messages(self, messages): total = sum(len(m['prompt'])//4 for m in messages) if total <= self.context_win: return messages - trimmed = [] + target, current, result = self.context_win * 0.9, 0, [] for msg in reversed(messages): - if sum(len(m['prompt'])//4 for m in trimmed) + len(msg['prompt'])//4 <= self.context_win * 0.9: - trimmed.insert(0, msg) + if (msg_len := len(msg['prompt'])//4) + current <= target: + result.append(msg); current += msg_len else: break - return trimmed if trimmed else messages[-2:] + return result[::-1] or messages[-2:] def raw_ask(self, messages, model=None, temperature=0.5, max_tokens=4096): model = model or self.default_model headers = {"x-api-key": self.api_key, "Content-Type": "application/json"} @@ -315,9 +315,9 @@ class ToolClient: args = data.get('arguments') or data.get('args') or data.get('params') or data.get('parameters') if args is None: args = data if func_name: tool_calls = [MockToolCall(func_name, args)] - except json.JSONDecodeError: + except json.JSONDecodeError as e: print("[Warn] Failed to parse tool_use JSON:", json_str) - remaining_text += f"[Warning] JSON 解析失败,模型输出了无效的 JSON." + tool_calls = [MockToolCall('bad_json', {'msg': f'Failed to parse tool_use JSON: {str(e)}'})] except Exception as e: print("[Error] Exception during tool_use parsing:", str(e), data)