diff --git a/.gitignore b/.gitignore index 416fb38..28144b5 100644 --- a/.gitignore +++ b/.gitignore @@ -60,6 +60,11 @@ memory/* # Plan SOP !memory/plan_sop.md +# Skill Search SOP +!memory/skill_search/ +!memory/skill_search/** +memory/skill_search/**/__pycache__/ + # ADB UI tool !memory/adb_ui.py diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..641d051 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025 lsdefine + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index 4c8d9f5..bac59d6 100644 --- a/README.md +++ b/README.md @@ -226,6 +226,8 @@ The entire core loop is just **92 lines of code** (`agent_loop.py`). | `web_execute_js` | Control browser behavior | | `ask_user` | Human-in-the-loop confirmation | +> Additionally, 2 **memory management tools** (`update_working_checkpoint`, `start_long_term_update`) allow the agent to persist context and accumulate experience across sessions. + 4️⃣ **Capability Extension Mechanism** > _Capable of dynamically creating new tools._ @@ -304,7 +306,7 @@ MIT License — see [LICENSE](LICENSE) ## 📅 最新动态 -- **2026-03-:** [发布百万级 Skill 库](https://mp.weixin.qq.com/s/q2gQ7YvWoiAcwxzaiwpuiQ?scene=1&click_id=7) +- **2026-03-10:** [发布百万级 Skill 库](https://mp.weixin.qq.com/s/q2gQ7YvWoiAcwxzaiwpuiQ?scene=1&click_id=7) - **2026-03-08:** [发布以 GenericAgent 为核心的"政务龙虾" Dintal Claw](https://mp.weixin.qq.com/s/eiEhwo-j6S-WpLxgBnNxBg) - **2026-03-01:** [GenericAgent 被机器之心报道](https://mp.weixin.qq.com/s/uVWpTTF5I1yzAENV_qm7yg) - **2026-01-11:** GenericAgent V1.0 公开版本发布 @@ -476,6 +478,8 @@ GenericAgent 通过**分层记忆 × 最小工具集 × 自主执行循环**完 | `web_execute_js` | 控制浏览器行为 | | `ask_user` | 人机协作确认 | +> 此外,还有 2 个**记忆管理工具**(`update_working_checkpoint`、`start_long_term_update`),使 Agent 能够跨会话积累经验、维持持久上下文。 + 4️⃣ **能力扩展机制** > 具备动态创建新的工具能力 > diff --git a/TMWebDriver.py b/TMWebDriver.py index 86a2b0b..9669777 100644 --- a/TMWebDriver.py +++ b/TMWebDriver.py @@ -34,7 +34,7 @@ class Session: class TMWebDriver: - def __init__(self, host: str = 'localhost', port: int = 18765): + def __init__(self, host: str = '127.0.0.1', port: int = 18765): self.host, self.port = host, port self.sessions, self.results, self.acks = {}, {}, {} self.default_session_id = None @@ -202,7 +202,7 @@ class TMWebDriver: hasjump = acked = False while exec_id not in self.results: - time.sleep(0.5) + time.sleep(0.2) if not acked and exec_id in self.acks: acked = True; start_time = time.time() if tp == 'ws': @@ -266,4 +266,4 @@ class TMWebDriver: return self.execute_js(f'GM_openInTab("{url}");') if __name__ == "__main__": - driver = TMWebDriver(host='localhost', port=18765) \ No newline at end of file + driver = TMWebDriver(host='127.0.0.1', port=18765) \ No newline at end of file diff --git a/agentmain.py b/agentmain.py index 4151ad5..a1025c7 100644 --- a/agentmain.py +++ b/agentmain.py @@ -38,23 +38,16 @@ def get_system_prompt(): class GeneraticAgent: def __init__(self): script_dir = os.path.dirname(os.path.abspath(__file__)) - temp_dir = os.path.join(script_dir, 'temp') - if not os.path.exists(temp_dir): os.makedirs(temp_dir) + os.makedirs(os.path.join(script_dir, 'temp'), exist_ok=True) from llmcore import mykeys llm_sessions = [] for k, cfg in mykeys.items(): if not any(x in k for x in ['api', 'config', 'cookie']): continue try: - if 'claude' in k: llm_sessions += [ClaudeSession(api_key=cfg['apikey'], api_base=cfg['apibase'], model=cfg['model'])] - if 'oai' in k: llm_sessions += [LLMSession( - api_key=cfg['apikey'], api_base=cfg['apibase'], model=cfg['model'], proxy=cfg.get('proxy'), - api_mode=cfg.get('api_mode', 'chat_completions'), - max_retries=cfg.get('max_retries', 2), - connect_timeout=cfg.get('connect_timeout', 10), - read_timeout=cfg.get('read_timeout', 120), - )] - if 'xai' in k: llm_sessions += [XaiSession(cfg, mykeys.get('proxy', ''))] - if 'sider' in k: llm_sessions += [SiderLLMSession(cfg, default_model=x) for x in \ + if 'claude' in k: llm_sessions += [ClaudeSession(cfg=cfg)] + if 'oai' in k: llm_sessions += [LLMSession(cfg=cfg)] + if 'xai' in k: llm_sessions += [XaiSession(cfg=cfg)] + if 'sider' in k: llm_sessions += [SiderLLMSession(cfg={'apikey': cfg, 'model': x}) for x in \ ["gemini-3.0-flash", "gpt-5.4"]] except: pass if len(llm_sessions) > 0: self.llmclient = ToolClient(llm_sessions, auto_save_tokens=True) @@ -63,10 +56,8 @@ class GeneraticAgent: self.history = [] self.task_queue = queue.Queue() self.is_running, self.stop_sig = False, False - self.llm_no = 0 - self.inc_out = False - self.handler = None - self.verbose = True + self.llm_no = 0; self.inc_out = False + self.handler = None; self.verbose = True def next_llm(self, n=-1): self.llm_no = ((self.llm_no + 1) if n < 0 else n) % len(self.llmclient.backends) @@ -80,8 +71,7 @@ class GeneraticAgent: print('Abort current task...') if not self.is_running: return self.stop_sig = True - if self.handler is not None: - self.handler.code_stop_signal.append(1) + if self.handler is not None: self.handler.code_stop_signal.append(1) def put_task(self, query, source="user", images=None): display_queue = queue.Queue() @@ -99,10 +89,11 @@ class GeneraticAgent: sys_prompt = get_system_prompt() script_dir = os.path.dirname(os.path.abspath(__file__)) handler = GenericAgentHandler(None, self.history, os.path.join(script_dir, 'temp')) - if self.handler and self.handler.key_info: - handler.key_info = self.handler.key_info - if '清除工作记忆' not in handler.key_info: - handler.key_info += '\n[SYSTEM] 若开始新任务,先更新或清除工作记忆\n' + if self.handler and 'key_info' in self.handler.working: + ki = re.sub(r'\n\[SYSTEM\] 此为.*?工作记忆[。\n]*', '', self.handler.working['key_info']) # 去旧 + handler.working['key_info'] = ki + handler.working['passed_sessions'] = ps = self.handler.working.get('passed_sessions', 0) + 1 + if ps > 0: handler.working['key_info'] += f'\n[SYSTEM] 此为 {ps} 个对话前设置的key_info,若已在新任务,先更新或清除工作记忆。\n' self.handler = handler self.llmclient.backend = self.llmclient.backends[self.llm_no] user_input = raw_query @@ -154,7 +145,6 @@ if __name__ == '__main__': threading.Thread(target=agent.run, daemon=True).start() if args.task: - script_dir = os.path.dirname(os.path.abspath(__file__)) d = os.path.join(script_dir, f'temp/{args.task}'); rp = os.path.join(d, 'reply.txt'); nround = '' with open(os.path.join(d, 'input.txt'), encoding='utf-8') as f: raw = f.read() while True: @@ -189,11 +179,13 @@ if __name__ == '__main__': try: while 'done' not in (item := dq.get(timeout=120)): pass result = item['done'] + print(result) except Exception as e: if once: raise print(f'[Reflect] drain error: {e}'); result = f'[ERROR] {e}' - script_dir = os.path.dirname(os.path.abspath(__file__)) - open(os.path.join(script_dir, './temp/reflect.log'), 'a', encoding='utf-8').write(f'[{datetime.now():%m-%d %H:%M}]\n{result}\n\n') + log_dir = os.path.join(script_dir, 'temp/reflect_logs'); os.makedirs(log_dir, exist_ok=True) + script_name = os.path.splitext(os.path.basename(args.reflect))[0] + open(os.path.join(log_dir, f'{script_name}_{datetime.now():%Y-%m-%d}.log'), 'a', encoding='utf-8').write(f'[{datetime.now():%m-%d %H:%M}]\n{result}\n\n') if on_done: try: on_done(result) except Exception as e: print(f'[Reflect] on_done error: {e}') diff --git a/assets/global_mem_insight_template.txt b/assets/global_mem_insight_template.txt index a497ccc..dc16393 100644 --- a/assets/global_mem_insight_template.txt +++ b/assets/global_mem_insight_template.txt @@ -1,20 +1,23 @@ # [Global Memory Insight] -# 【引导注释 - 使用后请删除整个注释块】 -# 格式:两层「场景→记忆」映射 + RULES,总计≤30行 -# -# == 第一层:高频场景 key→value == -# 每行格式: 触发场景关键词: sop名/py名/L2:section名(关键参数) -# 示例: 境外网站/API: proxies={'https':'http://127.0.0.1:2082'} -# 示例: 微信发消息: wechat_send_sop | 微信读聊天: wechat_db_sop+wechat_db_utils(quick_connect) -# 可用 | 分隔同类场景,括号内放硬参数 -# -# == 第二层:低频关键词,自包含可只写一个词 == -# 格式: 一行内用 | 分隔多个关键词,需要时 read L2 或 ls memory/ 查详情 -# 示例: 邮件ezgmail | 游戏game | OCR | 华为云Huawei_Cloud -# -# == [RULES] == -# 压缩的通用操作准则,每条一行 -# 示例: 1. 搜索先行: 文件用es, 信息用google, 禁猜路径 +浏览器自动化: web_scan/web_execute_js直接调用 | 特殊:tmwebdriver_sop(文件上传/图搜/PDF blob/元素物理坐标/Cookie提取含HttpOnly/跨域iframe操控/CDP/跨tab/后台tab操作) +键鼠模拟: ljqCtrl_sop+.py(仅win,禁pyautogui/先activate窗口) +定时任务: scheduled_task_sop(报告→sche_tasks/done/) | 与自主任务完全独立 +自主探索任务: autonomous_operation_sop(报告→temp/autonomous_reports/history.txt,不在memory下) | 与定时任务完全独立 +手机操控: adb_ui.py +需要时read L2 或 ls ../memory/ 查L3 L0(META-SOP): memory_management_sop -出厂L3: web_setup_sop | autonomous_operation_sop | scheduled_task_sop | ljqCtrl_sop+.py | tmwebdriver_sop | subagent_sop | plan_sop | mem_scanner.py | adb_ui.py \ No newline at end of file +L2: 现空 +L3: web_setup_sop | autonomous_operation_sop | scheduled_task_sop | ljqCtrl_sop+.py | tmwebdriver_sop | subagent_sop | plan_sop | mem_scanner.py | adb_ui.py + +[RULES] +1. 搜索先行: 信息尽量用google(必须web), 项目内os.listdir, 禁猜路径 +2. 交叉验证: 禁信搜索摘要, 数值必进详情页核实 +3. 编码安全: 改前必读源码; import memory用sys.path.append +4. 闭环: 物理模拟后必确认; 3次失败请求干预; +5. 进程: 禁无条件杀python(会杀自己), 精确PID, 禁os.kill判活 +6. 窗口: GUI状态优先枚举窗口, 比OCR快 +7. 物理红线: cwd用./; cwd指定后代码内禁用../向上切换,改用绝对路径 +8. web JS: 一次写对,输入用原生setter+事件链,点击前检查disabled,注意引号转义; scan空再scan或innerText +9. SOP: 执行前读取缓存硬参数,禁凭印象,有utils必用; 复杂长程先读plan_sop +10. 用户提及或复杂长程需规划任务要读plan_sop进入规划模式 diff --git a/assets/ljq_web_driver.user.js b/assets/ljq_web_driver.user.js index 2e90d36..1674be2 100644 --- a/assets/ljq_web_driver.user.js +++ b/assets/ljq_web_driver.user.js @@ -11,7 +11,7 @@ // @grant GM_xmlhttpRequest // @grant GM_openInTab // @grant unsafeWindow -// @connect localhost +// @connect 127.0.0.1 // @run-at document-start // ==/UserScript== @@ -26,13 +26,13 @@ return; } - const wsUrl = 'ws://localhost:18765'; - const httpUrl = 'http://localhost:18766/'; + const wsUrl = 'ws://127.0.0.1:18765'; + const httpUrl = 'http://127.0.0.1:18766/'; function isWebSocketServerAlive(callback) { GM_xmlhttpRequest({ method: 'GET', - url: 'http://localhost:18765/', + url: 'http://127.0.0.1:18765/', onload: () => callback(true), onerror: () => callback(false) }); diff --git a/chatapp_common.py b/chatapp_common.py new file mode 100644 index 0000000..88d345a --- /dev/null +++ b/chatapp_common.py @@ -0,0 +1,186 @@ +import asyncio, glob, os, queue as Q, re, socket, sys, time + +HELP_TEXT = "📖 命令列表:\n/help - 显示帮助\n/status - 查看状态\n/stop - 停止当前任务\n/new - 清空当前上下文\n/restore - 恢复上次对话历史\n/llm [n] - 查看或切换模型" +FILE_HINT = "If you need to show files to user, use [FILE:filepath] in your response." +TAG_PATS = [r"<" + t + r">.*?" for t in ("thinking", "summary", "tool_use", "file_content")] + + +def clean_reply(text): + for pat in TAG_PATS: + text = re.sub(pat, "", text or "", flags=re.DOTALL) + return re.sub(r"\n{3,}", "\n\n", text).strip() or "..." + + +def extract_files(text): + return re.findall(r"\[FILE:([^\]]+)\]", text or "") + + +def strip_files(text): + return re.sub(r"\[FILE:[^\]]+\]", "", text or "").strip() + + +def split_text(text, limit): + text, parts = (text or "").strip() or "...", [] + while len(text) > limit: + cut = text.rfind("\n", 0, limit) + if cut < limit * 0.6: + cut = limit + parts.append(text[:cut].rstrip()) + text = text[cut:].lstrip() + return parts + ([text] if text else []) or ["..."] + + +def format_restore(): + files = glob.glob("./temp/model_responses_*.txt") + if not files: + return None, "❌ 没有找到历史记录" + latest = max(files, key=os.path.getmtime) + with open(latest, "r", encoding="utf-8") as f: + content = f.read() + users = re.findall(r"=== USER ===\n(.+?)(?==== |$)", content, re.DOTALL) + resps = re.findall(r"=== Response ===.*?\n(.+?)(?==== Prompt|$)", content, re.DOTALL) + restored = [] + for u, r in zip(users, resps): + u, r = u.strip(), r.strip()[:500] + if u and r: + restored.extend([f"[USER]: {u}", f"[Agent] {r}"]) + if not restored: + return None, "❌ 历史记录里没有可恢复内容" + return (restored, os.path.basename(latest), len(restored) // 2), None + + +def build_done_text(raw_text): + files = [p for p in extract_files(raw_text) if os.path.exists(p)] + body = strip_files(clean_reply(raw_text)) + if files: + body = (body + "\n\n" if body else "") + "\n".join(f"生成文件: {p}" for p in files) + return body or "..." + + +def public_access(allowed): + return not allowed or "*" in allowed + + +def to_allowed_set(value): + if value is None: + return set() + if isinstance(value, str): + value = [value] + return {str(x).strip() for x in value if str(x).strip()} + + +def allowed_label(allowed): + return "public" if public_access(allowed) else sorted(allowed) + + +def ensure_single_instance(port, label): + try: + lock_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + lock_sock.bind(("127.0.0.1", port)) + return lock_sock + except OSError: + print(f"[{label}] Another instance is already running, skipping...") + sys.exit(1) + + +def require_runtime(agent, label, **required): + missing = [k for k, v in required.items() if not v] + if missing: + print(f"[{label}] ERROR: please set {', '.join(missing)} in mykey.py or mykey.json") + sys.exit(1) + if agent.llmclient is None: + print(f"[{label}] ERROR: no usable LLM backend found in mykey.py or mykey.json") + sys.exit(1) + + +def redirect_log(script_file, log_name, label, allowed): + log_dir = os.path.join(os.path.dirname(script_file), "temp") + os.makedirs(log_dir, exist_ok=True) + logf = open(os.path.join(log_dir, log_name), "a", encoding="utf-8", buffering=1) + sys.stdout = sys.stderr = logf + print(f"[NEW] {label} process starting, the above are history infos ...") + print(f"[{label}] allow list: {allowed_label(allowed)}") + + +class AgentChatMixin: + label = "Chat" + source = "chat" + split_limit = 1500 + ping_interval = 20 + + def __init__(self, agent, user_tasks): + self.agent, self.user_tasks = agent, user_tasks + + async def send_text(self, chat_id, content, **ctx): + raise NotImplementedError + + async def send_done(self, chat_id, raw_text, **ctx): + await self.send_text(chat_id, build_done_text(raw_text), **ctx) + + async def handle_command(self, chat_id, cmd, **ctx): + parts = (cmd or "").split() + op = (parts[0] if parts else "").lower() + if op == "/stop": + state = self.user_tasks.get(chat_id) + if state: + state["running"] = False + self.agent.abort() + return await self.send_text(chat_id, "⏹️ 正在停止...", **ctx) + if op == "/status": + llm = self.agent.get_llm_name() if self.agent.llmclient else "未配置" + return await self.send_text(chat_id, f"状态: {'🔴 运行中' if self.agent.is_running else '🟢 空闲'}\nLLM: [{self.agent.llm_no}] {llm}", **ctx) + if op == "/llm": + if not self.agent.llmclient: + return await self.send_text(chat_id, "❌ 当前没有可用的 LLM 配置", **ctx) + if len(parts) > 1: + try: + self.agent.next_llm(int(parts[1])) + return await self.send_text(chat_id, f"✅ 已切换到 [{self.agent.llm_no}] {self.agent.get_llm_name()}", **ctx) + except Exception: + return await self.send_text(chat_id, f"用法: /llm <0-{len(self.agent.list_llms()) - 1}>", **ctx) + lines = [f"{'→' if cur else ' '} [{i}] {name}" for i, name, cur in self.agent.list_llms()] + return await self.send_text(chat_id, "LLMs:\n" + "\n".join(lines), **ctx) + if op == "/restore": + try: + restored_info, err = format_restore() + if err: + return await self.send_text(chat_id, err, **ctx) + restored, fname, count = restored_info + self.agent.abort() + self.agent.history.extend(restored) + return await self.send_text(chat_id, f"✅ 已恢复 {count} 轮对话\n来源: {fname}\n(仅恢复上下文,请输入新问题继续)", **ctx) + except Exception as e: + return await self.send_text(chat_id, f"❌ 恢复失败: {e}", **ctx) + if op == "/new": + self.agent.abort() + self.agent.history = [] + return await self.send_text(chat_id, "🆕 已清空当前共享上下文", **ctx) + return await self.send_text(chat_id, HELP_TEXT, **ctx) + + async def run_agent(self, chat_id, text, **ctx): + state = {"running": True} + self.user_tasks[chat_id] = state + try: + await self.send_text(chat_id, "思考中...", **ctx) + dq = self.agent.put_task(f"{FILE_HINT}\n\n{text}", source=self.source) + last_ping = time.time() + while state["running"]: + try: + item = await asyncio.to_thread(dq.get, True, 3) + except Q.Empty: + if self.agent.is_running and time.time() - last_ping > self.ping_interval: + await self.send_text(chat_id, "⏳ 还在处理中,请稍等...", **ctx) + last_ping = time.time() + continue + if "done" in item: + await self.send_done(chat_id, item.get("done", ""), **ctx) + break + if not state["running"]: + await self.send_text(chat_id, "⏹️ 已停止", **ctx) + except Exception as e: + import traceback + print(f"[{self.label}] run_agent error: {e}") + traceback.print_exc() + await self.send_text(chat_id, f"❌ 错误: {e}", **ctx) + finally: + self.user_tasks.pop(chat_id, None) diff --git a/dingtalkapp.py b/dingtalkapp.py index d4a07dc..bcb2faa 100644 --- a/dingtalkapp.py +++ b/dingtalkapp.py @@ -1,8 +1,9 @@ -import os, sys, re, threading, asyncio, queue as Q, socket, time, glob, json +import asyncio, json, os, sys, threading, time import requests sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) from agentmain import GeneraticAgent +from chatapp_common import AgentChatMixin, ensure_single_instance, public_access, redirect_log, require_runtime, split_text from llmcore import mykeys try: @@ -12,82 +13,26 @@ except Exception: print("Please install dingtalk-stream to use DingTalk: pip install dingtalk-stream") sys.exit(1) -agent = GeneraticAgent() -agent.verbose = False - +agent = GeneraticAgent(); agent.verbose = False CLIENT_ID = str(mykeys.get("dingtalk_client_id", "") or "").strip() CLIENT_SECRET = str(mykeys.get("dingtalk_client_secret", "") or "").strip() ALLOWED = {str(x).strip() for x in mykeys.get("dingtalk_allowed_users", []) if str(x).strip()} - -_TAG_PATS = [r"<" + t + r">.*?" for t in ("thinking", "summary", "tool_use", "file_content")] -_USER_TASKS = {} +USER_TASKS = {} -def _clean(text): - for pat in _TAG_PATS: - text = re.sub(pat, "", text, flags=re.DOTALL) - return re.sub(r"\n{3,}", "\n\n", text).strip() or "..." +class DingTalkApp(AgentChatMixin): + label, source, split_limit = "DingTalk", "dingtalk", 1800 - -def _extract_files(text): - return re.findall(r"\[FILE:([^\]]+)\]", text or "") - - -def _strip_files(text): - return re.sub(r"\[FILE:[^\]]+\]", "", text or "").strip() - - -def _split_text(text, limit=1800): - text = (text or "").strip() or "..." - parts = [] - while len(text) > limit: - cut = text.rfind("\n", 0, limit) - if cut < limit * 0.6: - cut = limit - parts.append(text[:cut].rstrip()) - text = text[cut:].lstrip() - if text: - parts.append(text) - return parts or ["..."] - - -def _format_restore(): - files = glob.glob("./temp/model_responses_*.txt") - if not files: - return None, "❌ 没有找到历史记录" - latest = max(files, key=os.path.getmtime) - with open(latest, "r", encoding="utf-8") as f: - content = f.read() - users = re.findall(r"=== USER ===\n(.+?)(?==== |$)", content, re.DOTALL) - resps = re.findall(r"=== Response ===.*?\n(.+?)(?==== Prompt|$)", content, re.DOTALL) - count, restored = 0, [] - for u, r in zip(users, resps): - u, r = u.strip(), r.strip()[:500] - if u and r: - restored.extend([f"[USER]: {u}", f"[Agent] {r}"]) - count += 1 - if not restored: - return None, "❌ 历史记录里没有可恢复内容" - return (restored, os.path.basename(latest), count), None - - -class DingTalkApp: def __init__(self): - self.client = None - self.access_token = None - self.token_expiry = 0 - self.background_tasks = set() + super().__init__(agent, USER_TASKS) + self.client, self.access_token, self.token_expiry, self.background_tasks = None, None, 0, set() async def _get_access_token(self): if self.access_token and time.time() < self.token_expiry: return self.access_token def _fetch(): - resp = requests.post( - "https://api.dingtalk.com/v1.0/oauth2/accessToken", - json={"appKey": CLIENT_ID, "appSecret": CLIENT_SECRET}, - timeout=20, - ) + resp = requests.post("https://api.dingtalk.com/v1.0/oauth2/accessToken", json={"appKey": CLIENT_ID, "appSecret": CLIENT_SECRET}, timeout=20) resp.raise_for_status() return resp.json() @@ -107,30 +52,17 @@ class DingTalkApp: headers = {"x-acs-dingtalk-access-token": token} if chat_id.startswith("group:"): url = "https://api.dingtalk.com/v1.0/robot/groupMessages/send" - payload = { - "robotCode": CLIENT_ID, - "openConversationId": chat_id[6:], - "msgKey": msg_key, - "msgParam": json.dumps(msg_param, ensure_ascii=False), - } + payload = {"robotCode": CLIENT_ID, "openConversationId": chat_id[6:], "msgKey": msg_key, "msgParam": json.dumps(msg_param, ensure_ascii=False)} else: url = "https://api.dingtalk.com/v1.0/robot/oToMessages/batchSend" - payload = { - "robotCode": CLIENT_ID, - "userIds": [chat_id], - "msgKey": msg_key, - "msgParam": json.dumps(msg_param, ensure_ascii=False), - } + payload = {"robotCode": CLIENT_ID, "userIds": [chat_id], "msgKey": msg_key, "msgParam": json.dumps(msg_param, ensure_ascii=False)} def _post(): resp = requests.post(url, json=payload, headers=headers, timeout=20) body = resp.text if resp.status_code != 200: raise RuntimeError(f"HTTP {resp.status_code}: {body[:300]}") - try: - result = resp.json() - except Exception: - result = {} + result = resp.json() if "json" in resp.headers.get("content-type", "") else {} errcode = result.get("errcode") if errcode not in (None, 0): raise RuntimeError(f"API errcode={errcode}: {body[:300]}") @@ -143,119 +75,32 @@ class DingTalkApp: return False async def send_text(self, chat_id, content): - for part in _split_text(content): + for part in split_text(content, self.split_limit): await self._send_batch_message(chat_id, "sampleMarkdown", {"text": part, "title": "Agent Reply"}) - async def send_done(self, chat_id, raw_text): - files = [p for p in _extract_files(raw_text) if os.path.exists(p)] - body = _strip_files(_clean(raw_text)) - if files: - body = (body + "\n\n" if body else "") + "\n".join([f"生成文件: {p}" for p in files]) - await self.send_text(chat_id, body or "...") - - async def handle_command(self, chat_id, cmd): - parts = (cmd or "").split() - op = (parts[0] if parts else "").lower() - if op == "/stop": - state = _USER_TASKS.get(chat_id) - if state: - state["running"] = False - agent.abort() - await self.send_text(chat_id, "⏹️ 正在停止...") - elif op == "/status": - llm = agent.get_llm_name() if agent.llmclient else "未配置" - await self.send_text(chat_id, f"状态: {'🔴 运行中' if agent.is_running else '🟢 空闲'}\nLLM: [{agent.llm_no}] {llm}") - elif op == "/llm": - if not agent.llmclient: - return await self.send_text(chat_id, "❌ 当前没有可用的 LLM 配置") - if len(parts) > 1: - try: - n = int(parts[1]) - agent.next_llm(n) - await self.send_text(chat_id, f"✅ 已切换到 [{agent.llm_no}] {agent.get_llm_name()}") - except Exception: - await self.send_text(chat_id, f"用法: /llm <0-{len(agent.list_llms()) - 1}>") - else: - lines = [f"{'→' if cur else ' '} [{i}] {name}" for i, name, cur in agent.list_llms()] - await self.send_text(chat_id, "LLMs:\n" + "\n".join(lines)) - elif op == "/restore": - try: - restored_info, err = _format_restore() - if err: - return await self.send_text(chat_id, err) - restored, fname, count = restored_info - agent.abort() - agent.history.extend(restored) - await self.send_text(chat_id, f"✅ 已恢复 {count} 轮对话\n来源: {fname}\n(仅恢复上下文,请输入新问题继续)") - except Exception as e: - await self.send_text(chat_id, f"❌ 恢复失败: {e}") - elif op == "/new": - agent.abort() - agent.history = [] - await self.send_text(chat_id, "🆕 已清空当前共享上下文") - else: - await self.send_text( - chat_id, - "📖 命令列表:\n/help - 显示帮助\n/status - 查看状态\n/stop - 停止当前任务\n/new - 清空当前上下文\n/restore - 恢复上次对话历史\n/llm [n] - 查看或切换模型", - ) - - async def run_agent(self, chat_id, text): - state = {"running": True} - _USER_TASKS[chat_id] = state - try: - await self.send_text(chat_id, "思考中...") - prompt = f"If you need to show files to user, use [FILE:filepath] in your response.\n\n{text}" - dq = agent.put_task(prompt, source="dingtalk") - last_ping = time.time() - while state["running"]: - try: - item = await asyncio.to_thread(dq.get, True, 3) - except Q.Empty: - if agent.is_running and time.time() - last_ping > 20: - await self.send_text(chat_id, "⏳ 还在处理中,请稍等...") - last_ping = time.time() - continue - if "done" in item: - await self.send_done(chat_id, item.get("done", "")) - break - if not state["running"]: - await self.send_text(chat_id, "⏹️ 已停止") - except Exception as e: - import traceback - - print(f"[DingTalk] run_agent error: {e}") - traceback.print_exc() - await self.send_text(chat_id, f"❌ 错误: {e}") - finally: - _USER_TASKS.pop(chat_id, None) - async def on_message(self, content, sender_id, sender_name, conversation_type=None, conversation_id=None): try: if not content: return - public_access = not ALLOWED or "*" in ALLOWED - if not public_access and sender_id not in ALLOWED: + if not public_access(ALLOWED) and sender_id not in ALLOWED: print(f"[DingTalk] unauthorized user: {sender_id}") return is_group = conversation_type == "2" and conversation_id chat_id = f"group:{conversation_id}" if is_group else sender_id print(f"[DingTalk] message from {sender_name} ({sender_id}): {content}") if content.startswith("/"): - await self.handle_command(chat_id, content) - return + return await self.handle_command(chat_id, content) task = asyncio.create_task(self.run_agent(chat_id, content)) self.background_tasks.add(task) task.add_done_callback(self.background_tasks.discard) except Exception: import traceback - print("[DingTalk] handle_message error") traceback.print_exc() async def start(self): - handler = _DingTalkHandler(self) self.client = DingTalkStreamClient(Credential(CLIENT_ID, CLIENT_SECRET)) - self.client.register_callback_handler(ChatbotMessage.TOPIC, handler) + self.client.register_callback_handler(ChatbotMessage.TOPIC, _DingTalkHandler(self)) print("[DingTalk] bot starting...") while True: try: @@ -274,44 +119,22 @@ class _DingTalkHandler(CallbackHandler): async def process(self, message): try: chatbot_msg = ChatbotMessage.from_dict(message.data) - text = "" - if getattr(getattr(chatbot_msg, "text", None), "content", None): - text = chatbot_msg.text.content.strip() + text = getattr(getattr(chatbot_msg, "text", None), "content", "") or "" extensions = getattr(chatbot_msg, "extensions", None) or {} recognition = ((extensions.get("content") or {}).get("recognition") or "").strip() if isinstance(extensions, dict) else "" - if not text: + if not (text := text.strip()): text = recognition or str((message.data.get("text", {}) or {}).get("content", "") or "").strip() - sender_id = getattr(chatbot_msg, "sender_staff_id", None) or getattr(chatbot_msg, "sender_id", None) or "unknown" + sender_id = str(getattr(chatbot_msg, "sender_staff_id", None) or getattr(chatbot_msg, "sender_id", None) or "unknown") sender_name = getattr(chatbot_msg, "sender_nick", None) or "Unknown" - conversation_type = message.data.get("conversationType") - conversation_id = message.data.get("conversationId") or message.data.get("openConversationId") - await self.app.on_message(text, str(sender_id), sender_name, conversation_type, conversation_id) - return AckMessage.STATUS_OK, "OK" + await self.app.on_message(text, sender_id, sender_name, message.data.get("conversationType"), message.data.get("conversationId") or message.data.get("openConversationId")) except Exception as e: print(f"[DingTalk] callback error: {e}") - return AckMessage.STATUS_OK, "Error" + return AckMessage.STATUS_OK, "OK" if __name__ == "__main__": - try: - _lock_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - _lock_sock.bind(("127.0.0.1", 19530)) - except OSError: - print("[DingTalk] Another instance is already running, skipping...") - sys.exit(1) - - if not CLIENT_ID or not CLIENT_SECRET: - print("[DingTalk] ERROR: please set dingtalk_client_id and dingtalk_client_secret in mykey.py or mykey.json") - sys.exit(1) - if agent.llmclient is None: - print("[DingTalk] ERROR: no usable LLM backend found in mykey.py or mykey.json") - sys.exit(1) - - log_dir = os.path.join(os.path.dirname(__file__), "temp") - os.makedirs(log_dir, exist_ok=True) - _logf = open(os.path.join(log_dir, "dingtalkapp.log"), "a", encoding="utf-8", buffering=1) - sys.stdout = sys.stderr = _logf - print("[NEW] DingTalk process starting, the above are history infos ...") - print(f"[DingTalk] allow list: {'public' if not ALLOWED or '*' in ALLOWED else sorted(ALLOWED)}") + _LOCK_SOCK = ensure_single_instance(19530, "DingTalk") + require_runtime(agent, "DingTalk", dingtalk_client_id=CLIENT_ID, dingtalk_client_secret=CLIENT_SECRET) + redirect_log(__file__, "dingtalkapp.log", "DingTalk", ALLOWED) threading.Thread(target=agent.run, daemon=True).start() asyncio.run(DingTalkApp().start()) diff --git a/ga.py b/ga.py index ee1d731..e5b64c1 100644 --- a/ga.py +++ b/ga.py @@ -246,8 +246,7 @@ class GenericAgentHandler(BaseHandler): '''Generic Agent 工具库,包含多种工具的实现。工具函数自动加上了 do_ 前缀。实际工具名没有前缀。''' def __init__(self, parent, last_history=None, cwd='./'): self.parent = parent - self.key_info = "" - self.related_sop = "" + self.working = {} self.cwd = cwd; self.current_turn = 0 self.history_info = last_history if last_history else [] self.code_stop_signal = [] @@ -408,11 +407,12 @@ class GenericAgentHandler(BaseHandler): ''' key_info = args.get("key_info", "") related_sop = args.get("related_sop", "") - if "key_info" in args: self.key_info = key_info - if "related_sop" in args: self.related_sop = related_sop + if "key_info" in args: self.working['key_info'] = key_info + if "related_sop" in args: self.working['related_sop'] = related_sop + self.working['passed_sessions'] = 0 yield f"[Info] Updated key_info and related_sop.\n" - yield f"key_info:\n{self.key_info}\n\n" - yield f"related_sop:\n{self.related_sop}\n\n" + yield f"key_info:\n{self.working.get('key_info', '')}\n\n" + yield f"related_sop:\n{self.working.get('related_sop', '')}\n\n" next_prompt = self._get_anchor_prompt() #next_prompt += '\n[SYSTEM TIPS] 此函数一般在任务开始或中间时调用,如果任务已成功完成应该是start_long_term_update用于结算长期记忆。\n' return StepOutcome({"status": "success"}, next_prompt=next_prompt) @@ -477,14 +477,14 @@ class GenericAgentHandler(BaseHandler): h_str = "\n".join(self.history_info[-20:]) prompt = f"\n### [WORKING MEMORY]\n\n{h_str}\n" prompt += f"\nCurrent turn: {self.current_turn}\n" - if self.key_info: prompt += f"\n{self.key_info}" - if self.related_sop: prompt += f"\n有不清晰的地方请再次读取{self.related_sop}" + if self.working.get('key_info'): prompt += f"\n{self.working.get('key_info')}" + if self.working.get('related_sop'): prompt += f"\n有不清晰的地方请再次读取{self.working.get('related_sop')}" try: print(prompt) except: pass return prompt def next_prompt_patcher(self, next_prompt, outcome, turn): - if turn % 35 == 0 and 'plan' not in str(self.related_sop): + if turn % 35 == 0 and 'plan' not in str(self.working.get('related_sop')): next_prompt += f"\n\n[DANGER] 已连续执行第 {turn} 轮。你必须总结情况进行ask_user,不允许继续重试。" elif turn % 7 == 0: next_prompt += f"\n\n[DANGER] 已连续执行第 {turn} 轮。禁止无效重试。若无有效进展,必须切换策略:1. 探测物理边界 2. 请求用户协助。如有需要,可调用 update_working_checkpoint 保存关键上下文。" diff --git a/llmcore.py b/llmcore.py index c7ac438..2c46a0f 100644 --- a/llmcore.py +++ b/llmcore.py @@ -54,10 +54,10 @@ def build_multimodal_content(prompt_text, image_paths): return parts class SiderLLMSession: - def __init__(self, sider_cookie, default_model="gemini-3.0-flash"): + def __init__(self, cfg): from sider_ai_api import Session # 不使用sider的话没必要安装这个包 - self._core = Session(cookie=sider_cookie, proxies=proxies) - self.default_model = default_model + self._core = Session(cookie=cfg['apikey'], proxies=proxies) + self.default_model = cfg.get('model', 'gemini-3.0-flash') def ask(self, prompt, model=None, stream=False): if model is None: model = self.default_model if len(prompt) > 28000: @@ -68,8 +68,10 @@ class SiderLLMSession: return full_text class ClaudeSession: - def __init__(self, api_key, api_base, model="claude-opus", context_win=12000): - self.api_key, self.api_base, self.default_model, self.context_win = api_key, api_base.rstrip('/'), model, context_win + def __init__(self, cfg): + self.api_key = cfg['apikey']; self.api_base = cfg['apibase'].rstrip('/') + self.default_model = cfg.get('model', 'claude-opus') + self.context_win = cfg.get('context_win', 12000) self.raw_msgs, self.lock = [], threading.Lock() def _trim_messages(self, messages): compress_history_tags(messages) @@ -84,6 +86,7 @@ class ClaudeSession: return result[::-1] or messages[-2:] def raw_ask(self, messages, model=None, temperature=0.5, max_tokens=6144): model = model or self.default_model + if 'kimi' in model.lower() or 'moonshot' in model.lower(): temperature = 1.0 # kimi/moonshot only accepts temp 1.0 headers = {"x-api-key": self.api_key, "Content-Type": "application/json", "anthropic-version": "2023-06-01"} payload = {"model": model, "messages": messages, "temperature": temperature, "max_tokens": max_tokens, "stream": True} try: @@ -117,28 +120,28 @@ class ClaudeSession: return _ask_gen() if stream else ''.join(list(_ask_gen())) class LLMSession: - def __init__(self, api_key, api_base, model, context_win=16000, proxy=None, api_mode="chat_completions", - max_retries=2, connect_timeout=10, read_timeout=120): - self.api_key = api_key; self.api_base = api_base.rstrip('/'); self.default_model = model - self.context_win = context_win; self.raw_msgs = []; self.messages = [] + def __init__(self, cfg): + self.api_key = cfg['apikey']; self.api_base = cfg['apibase'].rstrip('/') + self.default_model = cfg['model'] + self.context_win = cfg.get('context_win', 16000) + self.raw_msgs, self.messages = [], [] + proxy = cfg.get('proxy') self.proxies = {"http": proxy, "https": proxy} if proxy else None + self.prompt_cache = cfg.get('prompt_cache', False) self.lock = threading.Lock() - self.max_retries = max(0, int(max_retries)) - self.connect_timeout = max(1, int(connect_timeout)) - self.read_timeout = max(5, int(read_timeout)) - mode = str(api_mode or "chat_completions").strip().lower().replace('-', '_') + self.max_retries = max(0, int(cfg.get('max_retries', 2))) + self.connect_timeout = max(1, int(cfg.get('connect_timeout', 10))) + self.read_timeout = max(5, int(cfg.get('read_timeout', 120))) + mode = str(cfg.get('api_mode', 'chat_completions')).strip().lower().replace('-', '_') if mode in ["responses", "response"]: self.api_mode = "responses" else: self.api_mode = "chat_completions" def _retry_delay(self, resp, attempt): retry_after = None try: - if resp is not None: - retry_after = (resp.headers or {}).get("retry-after") - if retry_after is not None: - retry_after = float(retry_after) - except: - retry_after = None + if resp is not None: retry_after = (resp.headers or {}).get("retry-after") + if retry_after is not None: retry_after = float(retry_after) + except: retry_after = None if retry_after is None: retry_after = min(30.0, 1.5 * (2 ** attempt)) return max(0.5, float(retry_after)) @@ -168,6 +171,7 @@ class LLMSession: def raw_ask(self, messages, model=None, temperature=0.5): if model is None: model = self.default_model + if 'kimi' in model.lower() or 'moonshot' in model.lower(): temperature = 1.0 # kimi/moonshot only accepts temp 1.0 headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json", "Accept": "text/event-stream"} if self.api_mode == "responses": url = auto_make_url(self.api_base, "responses") @@ -310,7 +314,7 @@ class LLMSession: content += chunk; yield chunk if not content.startswith("Error:"): self.raw_msgs.append({"role": "assistant", "prompt": content, "image": None}) - if total_len > 5000: print(f"[Debug] Whole context length {total_len} {str(msg_lens)}.") + if total_len > self.context_win // 2: print(f"[Debug] Whole context length {total_len} {str(msg_lens)}.") if total_len > self.context_win: yield '[NextWillSummary]' threading.Thread(target=self.summary_history, daemon=True).start() @@ -319,11 +323,12 @@ class LLMSession: class GeminiSession: - def __init__(self, api_key=None, default_model="gemini-2.0-flash-001", proxy=proxy): - self.api_key = api_key or google_api_key + def __init__(self, cfg): + self.api_key = cfg.get('apikey') or google_api_key if not self.api_key: raise ValueError("google_api_key 未配置或为空,请在 mykey.py 中设置") - self.default_model = default_model - self.proxies = {"http":proxy, "https":proxy} if proxy else None + self.default_model = cfg.get('model', 'gemini-2.0-flash-001') + p = cfg.get('proxy', proxy) + self.proxies = {"http":p, "https":p} if p else None def ask(self, prompt, model=None, stream=False): if model is None: model = self.default_model url = f"https://generativelanguage.googleapis.com/v1/models/{model}:generateContent?key={self.api_key}" @@ -347,13 +352,14 @@ class GeminiSession: return iter([full_text]) if stream else full_text class XaiSession: - def __init__(self, api_key, proxy="http://127.0.0.1:2082", default_model="grok-4-1-fast-non-reasoning"): + def __init__(self, cfg): import xai_sdk from xai_sdk.chat import user, system self._user, self._system = user, system - self.default_model = default_model + self.default_model = cfg.get('model', 'grok-4-1-fast-non-reasoning') self._last_response_id = None # 多轮对话链 - os.environ["XAI_API_KEY"] = api_key + os.environ["XAI_API_KEY"] = cfg['apikey'] + proxy = cfg.get('proxy', 'http://127.0.0.1:2082') if not proxy.startswith("http"): proxy = f"http://{proxy}" os.environ.setdefault("grpc_proxy", proxy) self._client = xai_sdk.Client() @@ -411,23 +417,28 @@ class ToolClient: self.total_cd_tokens = 0 def chat(self, messages, tools=None): - if self._should_use_structured_messages(messages): - return (yield from self._chat_structured(messages, tools)) - full_prompt = self._build_protocol_prompt(messages, tools) - print("Full prompt length:", len(full_prompt), 'chars') script_dir = os.path.dirname(os.path.abspath(__file__)) - with open(os.path.join(script_dir, f'./temp/model_responses_{os.getpid()}.txt'), 'a', encoding='utf-8', errors="replace") as f: - f.write(f"=== Prompt === {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n{full_prompt}\n") - gen = self.backend.ask(full_prompt, stream=True) + log_path = os.path.join(script_dir, f'./temp/model_responses_{os.getpid()}.txt') + if self._should_use_structured_messages(messages): + backend_messages = self._build_backend_messages(messages, tools) + print("Structured prompt length:", sum(self._estimate_content_len(m.get("content")) for m in backend_messages), 'chars') + prompt_log = self._serialize_messages_for_log(backend_messages) + gen = self.backend.raw_ask(backend_messages) + else: + full_prompt = self._build_protocol_prompt(messages, tools) + print("Full prompt length:", len(full_prompt), 'chars') + prompt_log = full_prompt + gen = self.backend.ask(full_prompt, stream=True) + with open(log_path, 'a', encoding='utf-8', errors="replace") as f: + f.write(f"=== Prompt === {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n{prompt_log}\n") raw_text = ''; summarytag = '[NextWillSummary]' for chunk in gen: - raw_text += chunk; + raw_text += chunk if chunk != summarytag: yield chunk print('Complete response received.') if raw_text.endswith(summarytag): self.last_tools = ''; raw_text = raw_text[:-len(summarytag)] - script_dir = os.path.dirname(os.path.abspath(__file__)) - with open(os.path.join(script_dir, f'./temp/model_responses_{os.getpid()}.txt'), 'a', encoding='utf-8', errors="replace") as f: + with open(log_path, 'a', encoding='utf-8', errors="replace") as f: f.write(f"=== Response === {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n{raw_text}\n\n") return self._parse_mixed_response(raw_text) @@ -504,24 +515,6 @@ class ToolClient: logged.append(msg) return json.dumps(logged, ensure_ascii=False, indent=2) - def _chat_structured(self, messages, tools): - backend_messages = self._build_backend_messages(messages, tools) - print("Structured prompt length:", sum(self._estimate_content_len(m.get("content")) for m in backend_messages), 'chars') - script_dir = os.path.dirname(os.path.abspath(__file__)) - with open(os.path.join(script_dir, f'./temp/model_responses_{os.getpid()}.txt'), 'a', encoding='utf-8', errors="replace") as f: - f.write(f"=== Prompt === {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n{self._serialize_messages_for_log(backend_messages)}\n") - gen = self.backend.raw_ask(backend_messages) - raw_text = ''; summarytag = '[NextWillSummary]' - for chunk in gen: - raw_text += chunk - if chunk != summarytag: yield chunk - print('Complete response received.') - if raw_text.endswith(summarytag): - self.last_tools = ''; raw_text = raw_text[:-len(summarytag)] - with open(os.path.join(script_dir, f'./temp/model_responses_{os.getpid()}.txt'), 'a', encoding='utf-8', errors="replace") as f: - f.write(f"=== Response === {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n{raw_text}\n\n") - return self._parse_mixed_response(raw_text) - def _build_protocol_prompt(self, messages, tools): system_content = next((m['content'] for m in messages if m['role'].lower() == 'system'), "") history_msgs = [m for m in messages if m['role'].lower() != 'system'] @@ -609,10 +602,7 @@ if __name__ == "__main__": } google_api_key = mykeys.get("google_api_key") cfg = oai_configs.get("oai_config") - - llmclient = ToolClient(GeminiSession(api_key=google_api_key, proxy='127.0.0.1:2082').ask) - #llmclient = ToolClient(LLMSession(api_key=cfg['apikey'], api_base=cfg['apibase'], model=cfg['model']).ask) - #llmclient = ToolClient(SiderLLMSession().ask) + llmclient = ToolClient(LLMSession(cfg)) def get_final(gen): try: while True: print('mid:', next(gen)) diff --git a/memory/autonomous_operation_sop.md b/memory/autonomous_operation_sop.md index 324610f..3cb8bf8 100644 --- a/memory/autonomous_operation_sop.md +++ b/memory/autonomous_operation_sop.md @@ -15,7 +15,7 @@ ## 执行 **启动**: -- update_working_checkpoint: `自主行动|报告→./autonomous_reports/R{XX}_简短描述.md|≤30回合|收尾:重读sop,写报告+更新history+标记TODO` +- update_working_checkpoint: `自主行动|报告→./autonomous_reports/R{XX}_简短描述.md|≤30回合|收尾:重读sop,写报告+更新history+标记TODO | 报告严禁放错位置` - 读 `./autonomous_reports/history.txt` 推断下一编号RXX + 了解历史避免重复 **执行**: diff --git a/memory/skill_search/SKILL.md b/memory/skill_search/SKILL.md new file mode 100644 index 0000000..e89b34b --- /dev/null +++ b/memory/skill_search/SKILL.md @@ -0,0 +1,64 @@ +# Skill Search — 105K 技能卡检索 + +> 从 105K+ 技能卡中语义搜索最匹配的 skill。零依赖,内置默认 API 地址,开箱即用。 + +## 最简调用 + +```python +import sys; sys.path.append('../memory/skill_search') +from skill_search import search + +results = search("python send email") # ⚠️ 必须用英文查询,中文匹配效果极差 +for r in results: + s = r.skill + print(f"[{r.final_score:.2f}] {s.name} — {s.one_line_summary}") + print(f" key: {s.key} category: {s.category} tags: {s.tags[:3]}") +``` + +## API 签名 + +```python +search(query, env=None, category=None, top_k=10) -> list[SearchResult] +# env: 自动检测,一般不传 +# category: 可选过滤,如 "devops" +# top_k: 返回数量,默认10 +``` + +## 返回结构 + +``` +SearchResult + .final_score float 综合评分 (0~1) + .relevance float 语义相关度 + .quality float 质量分 + .match_reasons list[str] 匹配原因 + .warnings list[str] 警告 + .skill SkillIndex ↓ + +SkillIndex (常用字段) + .key str 唯一标识/路径 + .name str 名称 + .one_line_summary str 一句话摘要 + .description str 详细描述 + .category str 类别 + .tags list[str] 标签 + .form str 形式(sop/script/...) + .autonomous_safe bool 是否自主安全 +``` + +## CLI + +```bash +python -m skill_search "python testing" +python -m skill_search "docker deployment" --category devops --top 5 +python -m skill_search "git" --json +python -m skill_search --stats +python -m skill_search --env +``` + +## 配置 + +| 项 | 默认值 | 说明 | +|---|---|---| +| API地址 | `http://www.fudankw.cn:58787` | 环境变量 `SKILL_SEARCH_API` 可覆盖 | +| API密钥 | 无(可选) | 环境变量 `SKILL_SEARCH_KEY` | \ No newline at end of file diff --git a/memory/skill_search/skill_search/__init__.py b/memory/skill_search/skill_search/__init__.py new file mode 100644 index 0000000..70b246f --- /dev/null +++ b/memory/skill_search/skill_search/__init__.py @@ -0,0 +1,8 @@ +"""skill_search — Skill 检索 API 客户端""" +from .engine import ( + SkillIndex, SearchResult, SkillSearchError, + search, get_stats, detect_environment, +) + +__all__ = ["SkillIndex", "SearchResult", "SkillSearchError", + "search", "get_stats", "detect_environment"] \ No newline at end of file diff --git a/memory/skill_search/skill_search/__main__.py b/memory/skill_search/skill_search/__main__.py new file mode 100644 index 0000000..6691a27 --- /dev/null +++ b/memory/skill_search/skill_search/__main__.py @@ -0,0 +1,116 @@ +"""CLI 入口: python -m skill_search""" +from __future__ import annotations +import argparse, json, sys +from .engine import SearchResult, SkillSearchError, detect_environment, search, get_stats + + +# ── 格式化 ─────────────────────────────────────────────── + +def format_results(results: list[SearchResult], env: dict, query: str) -> str: + lines = [f'🔍 搜索: "{query}"', + f"🖥️ 环境: {env.get('os','?')} / {env.get('shell','?')} / {', '.join(env.get('runtimes',[]))}", + f"📊 找到 {len(results)} 个匹配结果\n"] + if not results: + lines.append("未找到匹配的 skill。试试其他关键词?") + return "\n".join(lines) + for i, r in enumerate(results, 1): + s = r.skill + safe_icon = "🟢" if s.autonomous_safe else "🔴" + score_bar = "█" * int(r.final_score * 10) + "░" * (10 - int(r.final_score * 10)) + lines += [ + f"{'─'*60}", + f"#{i} {safe_icon} {s.name}", + f" 路径: {s.key}", + f" 类别: {s.category} | 标签: {', '.join(s.tags[:5])}", + f" 摘要: {s.one_line_summary}", + f" 评分: [{score_bar}] {r.final_score:.2f} (相关={r.relevance:.2f} 质量={r.quality:.1f})", + f" 清晰={s.clarity} 完整={s.completeness} 可操作={s.actionability} | 形式={s.form}", + ] + if r.match_reasons: + lines.append(f" 匹配: {' | '.join(r.match_reasons[:3])}") + if r.warnings: + lines.extend(f" {w}" for w in r.warnings) + lines.append("") + lines.append(f"{'─'*60}") + return "\n".join(lines) + + +def format_results_json(results: list[SearchResult]) -> list[dict]: + out = [] + for r in results: + s = r.skill + out.append({ + "rank": len(out) + 1, "key": s.key, "name": s.name, + "category": s.category, "tags": s.tags, + "description": s.description, "one_line_summary": s.one_line_summary, + "scores": {"final": round(r.final_score, 3), "relevance": round(r.relevance, 3), + "quality": round(r.quality, 1), "clarity": s.clarity, + "completeness": s.completeness, "actionability": s.actionability}, + "safety": {"autonomous_safe": s.autonomous_safe, "blast_radius": s.blast_radius, + "requires_credentials": s.requires_credentials, + "data_exposure": s.data_exposure, "effect_scope": s.effect_scope}, + "platform": {"os": s.os, "runtimes": s.runtimes, "tools": s.tools, "services": s.services}, + "warnings": r.warnings, "match_reasons": r.match_reasons, + }) + return out + + +# ── CLI ────────────────────────────────────────────────── + +def main(): + parser = argparse.ArgumentParser(prog="skill_search", + description="Skill 检索系统 — 根据环境和需求智能推荐 skill(API 客户端)") + parser.add_argument("query", nargs="?", help="搜索关键词(如: 'python testing')") + parser.add_argument("--category", "-cat", help="限定类别") + parser.add_argument("--top", "-k", type=int, default=10, help="返回结果数(默认 10)") + parser.add_argument("--json", action="store_true", help="JSON 格式输出") + parser.add_argument("--env", action="store_true", help="仅显示检测到的环境信息") + parser.add_argument("--stats", action="store_true", help="显示索引统计信息") + parser.add_argument("--api-url", help="指定 API 地址(也可用 SKILL_SEARCH_API 环境变量)") + args = parser.parse_args() + + if args.api_url: + import os; os.environ["SKILL_SEARCH_API"] = args.api_url + + env = detect_environment() + + if args.env: + print("🖥️ 当前环境:") + print(f" OS: {env['os']}") + print(f" Shell: {env['shell']}") + print(f" 运行时: {', '.join(env['runtimes'])}") + print(f" 工具: {', '.join(env['tools'])}") + print(f" 模型能力: tool_calling={env['model']['tool_calling']}, " + f"reasoning={env['model']['reasoning']}, context={env['model']['context_window']}") + return + + if args.stats: + try: + stats = get_stats(env) + print(f"📊 索引统计:") + print(f" 总计: {stats.get('total', '?')} 个 skills") + print(f" 自动安全: {stats.get('safe_count', '?')} 个") + if 'categories' in stats: + print(f" 类别分布:") + for cat, cnt in sorted(stats['categories'].items(), key=lambda x: -x[1]): + print(f" {cat:15s} {cnt:4d}") + except SkillSearchError as e: + print(f"❌ {e}", file=sys.stderr); sys.exit(1) + return + + if not args.query: + parser.print_help(); return + + try: + results = search(query=args.query, env=env, category=args.category, top_k=args.top) + except SkillSearchError as e: + print(f"❌ {e}", file=sys.stderr); sys.exit(1) + + if args.json: + print(json.dumps(format_results_json(results), indent=2, ensure_ascii=False)) + else: + print(format_results(results, env, args.query)) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/memory/skill_search/skill_search/engine.py b/memory/skill_search/skill_search/engine.py new file mode 100644 index 0000000..5625a4f --- /dev/null +++ b/memory/skill_search/skill_search/engine.py @@ -0,0 +1,156 @@ +"""Skill 检索引擎 — API 客户端(含数据模型与环境检测)""" +from __future__ import annotations +import json, os, platform, shutil, subprocess, urllib.request, urllib.error +from dataclasses import dataclass, field + +# ── 数据模型 ───────────────────────────────────────────── + +@dataclass +class SkillIndex: + """Skill 索引条目(与服务端结构对齐)""" + key: str + name: str = "" + description: str = "" + one_line_summary: str = "" + category: str = "" + tags: list[str] = field(default_factory=list) + language: str = "en" + os: list[str] = field(default_factory=list) + shell: list[str] = field(default_factory=list) + runtimes: list[str] = field(default_factory=list) + tools: list[str] = field(default_factory=list) + services: list[str] = field(default_factory=list) + needs_tool_calling: bool = False + needs_reasoning: bool = False + min_context_window: str = "standard" + decay_risk: str = "low" + clarity: int = 0 + completeness: int = 0 + actionability: int = 0 + autonomous_safe: bool = True + blast_radius: str = "low" + requires_credentials: bool = False + data_exposure: str = "none" + effect_scope: str = "local" + form: str = "" + estimated_tokens: str = "medium" + capabilities: list[str] = field(default_factory=list) + github_stars: int = 0 + github_url: str = "" + + @property + def quality_score(self): + return self.clarity * 0.3 + self.completeness * 0.3 + self.actionability * 0.4 + + @classmethod + def from_dict(cls, d): + known = {f.name for f in cls.__dataclass_fields__.values()} + return cls(**{k: v for k, v in d.items() if k in known}) + + +@dataclass +class SearchResult: + """单条检索结果""" + skill: SkillIndex + relevance: float = 0.0 + quality: float = 0.0 + final_score: float = 0.0 + match_reasons: list[str] = field(default_factory=list) + warnings: list[str] = field(default_factory=list) + + @classmethod + def from_dict(cls, d): + skill = SkillIndex.from_dict(d.get("skill", d)) + return cls(skill=skill, relevance=d.get("relevance", 0.0), + quality=d.get("quality", 0.0), final_score=d.get("final_score", 0.0), + match_reasons=d.get("match_reasons", []), warnings=d.get("warnings", [])) + + +# ── 环境检测 ───────────────────────────────────────────── + +def _run(cmd): + try: + r = subprocess.run(cmd.split(), capture_output=True, text=True, timeout=5) + return r.stdout.strip() if r.returncode == 0 else "" + except Exception: + return "" + +def _detect_os(): + s = platform.system().lower() + return {"darwin": "macos", "linux": "linux", "windows": "windows"}.get(s, s) + +def _detect_shell(): + shell = os.environ.get("SHELL", "") + if "zsh" in shell: return "zsh" + if "bash" in shell: return "bash" + if platform.system() == "Windows": return "powershell" + return os.path.basename(shell) if shell else "unknown" + +def _detect_runtimes(): + checks = {"python": ["python3", "python"], "node": ["node"], "go": ["go"], + "rust": ["rustc"], "java": ["java"], "ruby": ["ruby"], + "php": ["php"], "dotnet": ["dotnet"]} + found = [] + for name, cmds in checks.items(): + for cmd in cmds: + if shutil.which(cmd): + found.append(name); break + return found + +def _detect_tools(): + tools = ["git", "docker", "npm", "pip", "curl", "wget", "kubectl", + "terraform", "aws", "gcloud", "az", "brew", "cargo", "make", "cmake"] + return [t for t in tools if shutil.which(t)] + +def detect_environment(): + """采集完整环境信息""" + return {"os": _detect_os(), "shell": _detect_shell(), + "runtimes": _detect_runtimes(), "tools": _detect_tools(), + "model": {"tool_calling": True, "reasoning": True, "context_window": "large"}} + + +# ── API 配置与调用 ──────────────────────────────────────── + +DEFAULT_API_URL = "http://www.fudankw.cn:58787" + +def _get_api_url(): + return os.environ.get("SKILL_SEARCH_API", DEFAULT_API_URL) + +def _get_api_key(): + return os.environ.get("SKILL_SEARCH_KEY") + +class SkillSearchError(Exception): + pass + +def _api_request(endpoint, payload): + url = f"{_get_api_url()}/{endpoint}" + data = json.dumps(payload).encode("utf-8") + headers = {"Content-Type": "application/json"} + api_key = _get_api_key() + if api_key: + headers["Authorization"] = f"Bearer {api_key}" + req = urllib.request.Request(url, data=data, headers=headers, method="POST") + try: + with urllib.request.urlopen(req, timeout=30) as resp: + return json.loads(resp.read().decode("utf-8")) + except urllib.error.HTTPError as e: + body = e.read().decode("utf-8", errors="replace") + raise SkillSearchError(f"API 错误 {e.code}: {body}") from e + except urllib.error.URLError as e: + raise SkillSearchError(f"无法连接服务: {e.reason}") from e + except Exception as e: + raise SkillSearchError(f"请求失败: {e}") from e + + +# ── 公开接口 ───────────────────────────────────────────── + +def search(query, env=None, category=None, top_k=10): + if env is None: env = detect_environment() + payload = {"query": query, "env": env, "top_k": top_k} + if category: payload["category"] = category + resp = _api_request("search", payload) + return [SearchResult.from_dict(r) for r in resp.get("results", [])] + +def get_stats(env=None): + if env is None: env = detect_environment() + return _api_request("stats", {"env": env}) \ No newline at end of file diff --git a/memory/tmwebdriver_sop.md b/memory/tmwebdriver_sop.md index ad26ebd..a004fc2 100644 --- a/memory/tmwebdriver_sop.md +++ b/memory/tmwebdriver_sop.md @@ -104,4 +104,7 @@ web_scan失败时按序排查: ①TM没装?→遍历本机所有Chromium浏览器(Chrome/Edge/Brave…)用户数据目录下Extensions/,各子目录manifest.json搜"tampermonkey" 没找到→走web_setup_sop;找到→记住装在哪个浏览器 ②浏览器没开?→检查①对应的浏览器进程是否在跑(tasklist/ps),没有则启动并打开正常URL(⚠about:blank等内部页不加载扩展) -③WS后台挂了?→socket.connect_ex(('localhost',18766))非0即dead→手动`from TMWebDriver import TMWebDriver; TMWebDriver()`起master \ No newline at end of file +③WS后台挂了?→socket.connect_ex(('127.0.0.1',18766))非0即dead→手动`from TMWebDriver import TMWebDriver; TMWebDriver()`起master + +## 性能 +- ⚠ URL必须用`127.0.0.1`不用`localhost`。Windows下localhost先尝试IPv6(::1)超时2s再回退IPv4,每次HTTP请求多2s \ No newline at end of file diff --git a/qqapp.py b/qqapp.py index 5e65002..cd4223f 100644 --- a/qqapp.py +++ b/qqapp.py @@ -1,8 +1,9 @@ -import os, sys, re, threading, asyncio, queue as Q, socket, time, glob +import asyncio, os, sys, threading, time from collections import deque sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) from agentmain import GeneraticAgent +from chatapp_common import AgentChatMixin, ensure_single_instance, public_access, redirect_log, require_runtime, split_text from llmcore import mykeys try: @@ -12,73 +13,19 @@ except Exception: print("Please install qq-botpy to use QQ module: pip install qq-botpy") sys.exit(1) -agent = GeneraticAgent() -agent.verbose = False - +agent = GeneraticAgent(); agent.verbose = False APP_ID = str(mykeys.get("qq_app_id", "") or "").strip() APP_SECRET = str(mykeys.get("qq_app_secret", "") or "").strip() ALLOWED = {str(x).strip() for x in mykeys.get("qq_allowed_users", []) if str(x).strip()} - -_TAG_PATS = [r"<" + t + r">.*?" for t in ("thinking", "summary", "tool_use", "file_content")] -_PROCESSED_IDS = deque(maxlen=1000) -_USER_TASKS = {} -_SEQ_LOCK = threading.Lock() -_MSG_SEQ = 1 +PROCESSED_IDS, USER_TASKS = deque(maxlen=1000), {} +SEQ_LOCK, MSG_SEQ = threading.Lock(), 1 def _next_msg_seq(): - global _MSG_SEQ - with _SEQ_LOCK: - _MSG_SEQ += 1 - return _MSG_SEQ - - -def _clean(text): - for pat in _TAG_PATS: - text = re.sub(pat, "", text, flags=re.DOTALL) - return re.sub(r"\n{3,}", "\n\n", text).strip() or "..." - - -def _extract_files(text): - return re.findall(r"\[FILE:([^\]]+)\]", text or "") - - -def _strip_files(text): - return re.sub(r"\[FILE:[^\]]+\]", "", text or "").strip() - - -def _split_text(text, limit=1500): - text = (text or "").strip() or "..." - parts = [] - while len(text) > limit: - cut = text.rfind("\n", 0, limit) - if cut < limit * 0.6: - cut = limit - parts.append(text[:cut].rstrip()) - text = text[cut:].lstrip() - if text: - parts.append(text) - return parts or ["..."] - - -def _format_restore(): - files = glob.glob("./temp/model_responses_*.txt") - if not files: - return None, "❌ 没有找到历史记录" - latest = max(files, key=os.path.getmtime) - with open(latest, "r", encoding="utf-8") as f: - content = f.read() - users = re.findall(r"=== USER ===\n(.+?)(?==== |$)", content, re.DOTALL) - resps = re.findall(r"=== Response ===.*?\n(.+?)(?==== Prompt|$)", content, re.DOTALL) - count, restored = 0, [] - for u, r in zip(users, resps): - u, r = u.strip(), r.strip()[:500] - if u and r: - restored.extend([f"[USER]: {u}", f"[Agent] {r}"]) - count += 1 - if not restored: - return None, "❌ 历史记录里没有可恢复内容" - return (restored, os.path.basename(latest), count), None + global MSG_SEQ + with SEQ_LOCK: + MSG_SEQ += 1 + return MSG_SEQ def _build_intents(): @@ -86,16 +33,7 @@ def _build_intents(): return botpy.Intents(public_messages=True, direct_message=True) except Exception: intents = botpy.Intents.none() if hasattr(botpy.Intents, "none") else botpy.Intents() - for attr in ( - "public_messages", - "public_guild_messages", - "direct_message", - "direct_messages", - "c2c_message", - "c2c_messages", - "group_at_message", - "group_at_messages", - ): + for attr in ("public_messages", "public_guild_messages", "direct_message", "direct_messages", "c2c_message", "c2c_messages", "group_at_message", "group_at_messages"): if hasattr(intents, attr): try: setattr(intents, attr, True) @@ -105,15 +43,12 @@ def _build_intents(): def _make_bot_class(app): - intents = _build_intents() - - class _QQBot(botpy.Client): + class QQBot(botpy.Client): def __init__(self): - super().__init__(intents=intents, ext_handlers=False) + super().__init__(intents=_build_intents(), ext_handlers=False) async def on_ready(self): - name = getattr(getattr(self, "robot", None), "name", "QQBot") - print(f"[QQ] bot ready: {name}") + print(f"[QQ] bot ready: {getattr(getattr(self, 'robot', None), 'name', 'QQBot')}") async def on_c2c_message_create(self, message: C2CMessage): await app.on_message(message, is_group=False) @@ -124,154 +59,50 @@ def _make_bot_class(app): async def on_direct_message_create(self, message): await app.on_message(message, is_group=False) - return _QQBot + return QQBot -class QQApp: +class QQApp(AgentChatMixin): + label, source, split_limit = "QQ", "qq", 1500 + def __init__(self): + super().__init__(agent, USER_TASKS) self.client = None async def send_text(self, chat_id, content, *, msg_id=None, is_group=False): if not self.client: return - for part in _split_text(content): - seq = _next_msg_seq() - if is_group: - await self.client.api.post_group_message( - group_openid=chat_id, - msg_type=0, - content=part, - msg_id=msg_id, - msg_seq=seq, - ) - else: - await self.client.api.post_c2c_message( - openid=chat_id, - msg_type=0, - content=part, - msg_id=msg_id, - msg_seq=seq, - ) - - async def send_done(self, chat_id, raw_text, *, msg_id=None, is_group=False): - files = [p for p in _extract_files(raw_text) if os.path.exists(p)] - body = _strip_files(_clean(raw_text)) - if files: - body = (body + "\n\n" if body else "") + "\n".join([f"生成文件: {p}" for p in files]) - await self.send_text(chat_id, body or "...", msg_id=msg_id, is_group=is_group) - - async def handle_command(self, chat_id, cmd, *, msg_id=None, is_group=False): - parts = (cmd or "").split() - op = (parts[0] if parts else "").lower() - if op == "/stop": - state = _USER_TASKS.get(chat_id) - if state: - state["running"] = False - agent.abort() - await self.send_text(chat_id, "⏹️ 正在停止...", msg_id=msg_id, is_group=is_group) - elif op == "/status": - llm = agent.get_llm_name() if agent.llmclient else "未配置" - await self.send_text(chat_id, f"状态: {'🔴 运行中' if agent.is_running else '🟢 空闲'}\nLLM: [{agent.llm_no}] {llm}", msg_id=msg_id, is_group=is_group) - elif op == "/llm": - if not agent.llmclient: - return await self.send_text(chat_id, "❌ 当前没有可用的 LLM 配置", msg_id=msg_id, is_group=is_group) - if len(parts) > 1: - try: - n = int(parts[1]) - agent.next_llm(n) - await self.send_text(chat_id, f"✅ 已切换到 [{agent.llm_no}] {agent.get_llm_name()}", msg_id=msg_id, is_group=is_group) - except Exception: - await self.send_text(chat_id, f"用法: /llm <0-{len(agent.list_llms()) - 1}>", msg_id=msg_id, is_group=is_group) - else: - lines = [f"{'→' if cur else ' '} [{i}] {name}" for i, name, cur in agent.list_llms()] - await self.send_text(chat_id, "LLMs:\n" + "\n".join(lines), msg_id=msg_id, is_group=is_group) - elif op == "/restore": - try: - restored_info, err = _format_restore() - if err: - return await self.send_text(chat_id, err, msg_id=msg_id, is_group=is_group) - restored, fname, count = restored_info - agent.abort() - agent.history.extend(restored) - await self.send_text(chat_id, f"✅ 已恢复 {count} 轮对话\n来源: {fname}\n(仅恢复上下文,请输入新问题继续)", msg_id=msg_id, is_group=is_group) - except Exception as e: - await self.send_text(chat_id, f"❌ 恢复失败: {e}", msg_id=msg_id, is_group=is_group) - elif op == "/new": - agent.abort() - agent.history = [] - await self.send_text(chat_id, "🆕 已清空当前共享上下文", msg_id=msg_id, is_group=is_group) - else: - await self.send_text( - chat_id, - "📖 命令列表:\n/help - 显示帮助\n/status - 查看状态\n/stop - 停止当前任务\n/new - 清空当前上下文\n/restore - 恢复上次对话历史\n/llm [n] - 查看或切换模型", - msg_id=msg_id, - is_group=is_group, - ) - - async def run_agent(self, chat_id, text, *, msg_id=None, is_group=False): - state = {"running": True} - _USER_TASKS[chat_id] = state - try: - await self.send_text(chat_id, "思考中...", msg_id=msg_id, is_group=is_group) - prompt = f"If you need to show files to user, use [FILE:filepath] in your response.\n\n{text}" - dq = agent.put_task(prompt, source="qq") - last_ping = time.time() - while state["running"]: - try: - item = await asyncio.to_thread(dq.get, True, 3) - except Q.Empty: - if agent.is_running and time.time() - last_ping > 20: - await self.send_text(chat_id, "⏳ 还在处理中,请稍等...", msg_id=msg_id, is_group=is_group) - last_ping = time.time() - continue - if "done" in item: - await self.send_done(chat_id, item.get("done", ""), msg_id=msg_id, is_group=is_group) - break - if not state["running"]: - await self.send_text(chat_id, "⏹️ 已停止", msg_id=msg_id, is_group=is_group) - except Exception as e: - import traceback - - print(f"[QQ] run_agent error: {e}") - traceback.print_exc() - await self.send_text(chat_id, f"❌ 错误: {e}", msg_id=msg_id, is_group=is_group) - finally: - _USER_TASKS.pop(chat_id, None) + api = self.client.api.post_group_message if is_group else self.client.api.post_c2c_message + key = "group_openid" if is_group else "openid" + for part in split_text(content, self.split_limit): + await api(**{key: chat_id, "msg_type": 0, "content": part, "msg_id": msg_id, "msg_seq": _next_msg_seq()}) async def on_message(self, data, is_group=False): try: msg_id = getattr(data, "id", None) - if msg_id in _PROCESSED_IDS: + if msg_id in PROCESSED_IDS: return - _PROCESSED_IDS.append(msg_id) + PROCESSED_IDS.append(msg_id) content = (getattr(data, "content", "") or "").strip() if not content: return author = getattr(data, "author", None) - if is_group: - chat_id = str(getattr(data, "group_openid", "") or "") - user_id = str(getattr(author, "member_openid", "") or getattr(author, "id", "") or "unknown") - else: - user_id = str(getattr(author, "user_openid", "") or getattr(author, "id", "") or "unknown") - chat_id = user_id - public_access = not ALLOWED or "*" in ALLOWED - if not public_access and user_id not in ALLOWED: + user_id = str(getattr(author, "member_openid" if is_group else "user_openid", "") or getattr(author, "id", "") or "unknown") + chat_id = str(getattr(data, "group_openid", "") or user_id) if is_group else user_id + if not public_access(ALLOWED) and user_id not in ALLOWED: print(f"[QQ] unauthorized user: {user_id}") return print(f"[QQ] message from {user_id} ({'group' if is_group else 'c2c'}): {content}") if content.startswith("/"): - await self.handle_command(chat_id, content, msg_id=msg_id, is_group=is_group) - return + return await self.handle_command(chat_id, content, msg_id=msg_id, is_group=is_group) asyncio.create_task(self.run_agent(chat_id, content, msg_id=msg_id, is_group=is_group)) except Exception: import traceback - print("[QQ] handle_message error") traceback.print_exc() async def start(self): - BotClass = _make_bot_class(self) - self.client = BotClass() + self.client = _make_bot_class(self)() while True: try: print(f"[QQ] bot starting... {time.strftime('%m-%d %H:%M')}") @@ -283,25 +114,8 @@ class QQApp: if __name__ == "__main__": - try: - _lock_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - _lock_sock.bind(("127.0.0.1", 19528)) - except OSError: - print("[QQ] Another instance is already running, skipping...") - sys.exit(1) - - if not APP_ID or not APP_SECRET: - print("[QQ] ERROR: please set qq_app_id and qq_app_secret in mykey.py or mykey.json") - sys.exit(1) - if agent.llmclient is None: - print("[QQ] ERROR: no usable LLM backend found in mykey.py or mykey.json") - sys.exit(1) - - log_dir = os.path.join(os.path.dirname(__file__), "temp") - os.makedirs(log_dir, exist_ok=True) - _logf = open(os.path.join(log_dir, "qqapp.log"), "a", encoding="utf-8", buffering=1) - sys.stdout = sys.stderr = _logf - print("[NEW] QQ process starting, the above are history infos ...") - print(f"[QQ] allow list: {'public' if not ALLOWED or '*' in ALLOWED else sorted(ALLOWED)}") + _LOCK_SOCK = ensure_single_instance(19528, "QQ") + require_runtime(agent, "QQ", qq_app_id=APP_ID, qq_app_secret=APP_SECRET) + redirect_log(__file__, "qqapp.log", "QQ", ALLOWED) threading.Thread(target=agent.run, daemon=True).start() asyncio.run(QQApp().start()) diff --git a/simphtml.py b/simphtml.py index c439dd4..b5294da 100644 --- a/simphtml.py +++ b/simphtml.py @@ -17,12 +17,14 @@ function createEnhancedDOMCopy() { if (sourceNode.nodeType === 3) return sourceNode.cloneNode(false); const clone = sourceNode.cloneNode(false); if ((sourceNode.tagName === 'INPUT' || sourceNode.tagName === 'TEXTAREA') && sourceNode.value) clone.setAttribute('value', sourceNode.value); + if (sourceNode.tagName === 'INPUT' && (sourceNode.type === 'radio' || sourceNode.type === 'checkbox') && sourceNode.checked) clone.setAttribute('checked', ''); else if (sourceNode.tagName === 'SELECT' && sourceNode.value) clone.setAttribute('data-selected', sourceNode.value); try { if (sourceNode.matches && sourceNode.matches(':-webkit-autofill')) { clone.setAttribute('data-autofilled', 'true'); if (!sourceNode.value) clone.setAttribute('value', '⚠️受保护-读tmwebdriver_sop的autofill章节提取'); } } catch(e) {} const isDropdown = sourceNode.classList?.contains('dropdown-menu') || /dropdown|menu/i.test(sourceNode.className) || sourceNode.getAttribute('role') === 'menu'; - const isSmallDropdown = isDropdown && (sourceNode.querySelectorAll('a, button, [role="menuitem"], li').length <= 7 && sourceNode.textContent.length < 500); + const _ddItems = isDropdown ? sourceNode.querySelectorAll('a, button, [role="menuitem"], li').length : 0; + const isSmallDropdown = _ddItems > 0 && _ddItems <= 7 && sourceNode.textContent.length < 500; const childNodes = []; for (const child of sourceNode.childNodes) { @@ -704,6 +706,7 @@ js_findMainContent = ''' def optimize_html_for_tokens(html): if type(html) is str: soup = BeautifulSoup(html, 'html.parser') else: soup = html + for svg in soup.find_all('svg'): svg.clear() [tag.attrs.pop('style', None) for tag in soup.find_all(True)] for tag in soup.find_all(True): if tag.has_attr('src'): @@ -788,6 +791,10 @@ def find_changed_elements(before_html, after_html): for sig, els in after_sigs.items(): if sig not in before_sigs: changed.extend(els) elif len(els) > len(before_sigs[sig]): changed.extend(els[:len(els) - len(before_sigs[sig])]) + if len(changed) == 0 and str(before_soup) != str(after_soup): + before_els, after_els = before_soup.find_all(True), after_soup.find_all(True) + for i in range(min(len(before_els), len(after_els))): + if get_sig(before_els[i]) != get_sig(after_els[i]): changed.append(after_els[i]) # 变化边界: parent不在changed中的元素 cids = set(id(el) for el in changed) boundaries = [el for el in changed if el.parent is None or id(el.parent) not in cids] diff --git a/stapp.py b/stapp.py index 99c79e0..a59a8ce 100644 --- a/stapp.py +++ b/stapp.py @@ -82,6 +82,11 @@ if "messages" not in st.session_state: st.session_state.messages = [] for msg in st.session_state.messages: with st.chat_message(msg["role"]): st.markdown(msg["content"], unsafe_allow_html=True) +# IME composition fix (macOS only) - prevents Enter from submitting during CJK input +if os.name != 'nt': + import streamlit.components.v1 as components + components.html('', height=0) + if prompt := st.chat_input("请输入指令"): st.session_state.messages.append({"role": "user", "content": prompt}) with st.chat_message("user"): st.markdown(prompt, unsafe_allow_html=False) # 小心 XSS diff --git a/wecomapp.py b/wecomapp.py index b5ef37b..bd37ddd 100644 --- a/wecomapp.py +++ b/wecomapp.py @@ -1,8 +1,9 @@ -import os, sys, re, threading, asyncio, queue as Q, socket, time, glob +import asyncio, os, sys, threading from collections import deque sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) from agentmain import GeneraticAgent +from chatapp_common import AgentChatMixin, ensure_single_instance, public_access, redirect_log, require_runtime, split_text from llmcore import mykeys try: @@ -11,211 +12,64 @@ except Exception: print("Please install wecom_aibot_sdk to use WeCom: pip install wecom_aibot_sdk") sys.exit(1) -agent = GeneraticAgent() -agent.verbose = False - +agent = GeneraticAgent(); agent.verbose = False BOT_ID = str(mykeys.get("wecom_bot_id", "") or "").strip() SECRET = str(mykeys.get("wecom_secret", "") or "").strip() WELCOME = str(mykeys.get("wecom_welcome_message", "") or "").strip() ALLOWED = {str(x).strip() for x in mykeys.get("wecom_allowed_users", []) if str(x).strip()} - -_TAG_PATS = [r"<" + t + r">.*?" for t in ("thinking", "summary", "tool_use", "file_content")] -_PROCESSED_IDS = deque(maxlen=1000) -_USER_TASKS = {} +PROCESSED_IDS, USER_TASKS = deque(maxlen=1000), {} -def _clean(text): - for pat in _TAG_PATS: - text = re.sub(pat, "", text, flags=re.DOTALL) - return re.sub(r"\n{3,}", "\n\n", text).strip() or "..." +class WeComApp(AgentChatMixin): + label, source, split_limit = "WeCom", "wecom", 1200 - -def _extract_files(text): - return re.findall(r"\[FILE:([^\]]+)\]", text or "") - - -def _strip_files(text): - return re.sub(r"\[FILE:[^\]]+\]", "", text or "").strip() - - -def _split_text(text, limit=1200): - text = (text or "").strip() or "..." - parts = [] - while len(text) > limit: - cut = text.rfind("\n", 0, limit) - if cut < limit * 0.6: - cut = limit - parts.append(text[:cut].rstrip()) - text = text[cut:].lstrip() - if text: - parts.append(text) - return parts or ["..."] - - -def _format_restore(): - files = glob.glob("./temp/model_responses_*.txt") - if not files: - return None, "❌ 没有找到历史记录" - latest = max(files, key=os.path.getmtime) - with open(latest, "r", encoding="utf-8") as f: - content = f.read() - users = re.findall(r"=== USER ===\n(.+?)(?==== |$)", content, re.DOTALL) - resps = re.findall(r"=== Response ===.*?\n(.+?)(?==== Prompt|$)", content, re.DOTALL) - count, restored = 0, [] - for u, r in zip(users, resps): - u, r = u.strip(), r.strip()[:500] - if u and r: - restored.extend([f"[USER]: {u}", f"[Agent] {r}"]) - count += 1 - if not restored: - return None, "❌ 历史记录里没有可恢复内容" - return (restored, os.path.basename(latest), count), None - - -class WeComApp: def __init__(self): - self.client = None - self.chat_frames = {} - - def _body(self, frame): - if hasattr(frame, "body"): - return frame.body or {} - if isinstance(frame, dict): - return frame.get("body", frame) - return {} + super().__init__(agent, USER_TASKS) + self.client, self.chat_frames = None, {} async def send_text(self, chat_id, content): - if not self.client: + if not self.client or chat_id not in self.chat_frames: + if chat_id not in self.chat_frames: + print(f"[WeCom] no frame found for chat: {chat_id}") return - frame = self.chat_frames.get(chat_id) - if not frame: - print(f"[WeCom] no frame found for chat: {chat_id}") - return - for part in _split_text(content): - stream_id = generate_req_id("stream") - await self.client.reply_stream(frame, stream_id, part, finish=True) - - async def send_done(self, chat_id, raw_text): - files = [p for p in _extract_files(raw_text) if os.path.exists(p)] - body = _strip_files(_clean(raw_text)) - if files: - body = (body + "\n\n" if body else "") + "\n".join([f"生成文件: {p}" for p in files]) - await self.send_text(chat_id, body or "...") - - async def handle_command(self, chat_id, cmd): - parts = (cmd or "").split() - op = (parts[0] if parts else "").lower() - if op == "/stop": - state = _USER_TASKS.get(chat_id) - if state: - state["running"] = False - agent.abort() - await self.send_text(chat_id, "⏹️ 正在停止...") - elif op == "/status": - llm = agent.get_llm_name() if agent.llmclient else "未配置" - await self.send_text(chat_id, f"状态: {'🔴 运行中' if agent.is_running else '🟢 空闲'}\nLLM: [{agent.llm_no}] {llm}") - elif op == "/llm": - if not agent.llmclient: - return await self.send_text(chat_id, "❌ 当前没有可用的 LLM 配置") - if len(parts) > 1: - try: - n = int(parts[1]) - agent.next_llm(n) - await self.send_text(chat_id, f"✅ 已切换到 [{agent.llm_no}] {agent.get_llm_name()}") - except Exception: - await self.send_text(chat_id, f"用法: /llm <0-{len(agent.list_llms()) - 1}>") - else: - lines = [f"{'→' if cur else ' '} [{i}] {name}" for i, name, cur in agent.list_llms()] - await self.send_text(chat_id, "LLMs:\n" + "\n".join(lines)) - elif op == "/restore": - try: - restored_info, err = _format_restore() - if err: - return await self.send_text(chat_id, err) - restored, fname, count = restored_info - agent.abort() - agent.history.extend(restored) - await self.send_text(chat_id, f"✅ 已恢复 {count} 轮对话\n来源: {fname}\n(仅恢复上下文,请输入新问题继续)") - except Exception as e: - await self.send_text(chat_id, f"❌ 恢复失败: {e}") - elif op == "/new": - agent.abort() - agent.history = [] - await self.send_text(chat_id, "🆕 已清空当前共享上下文") - else: - await self.send_text( - chat_id, - "📖 命令列表:\n/help - 显示帮助\n/status - 查看状态\n/stop - 停止当前任务\n/new - 清空当前上下文\n/restore - 恢复上次对话历史\n/llm [n] - 查看或切换模型", - ) - - async def run_agent(self, chat_id, text): - state = {"running": True} - _USER_TASKS[chat_id] = state - try: - await self.send_text(chat_id, "思考中...") - prompt = f"If you need to show files to user, use [FILE:filepath] in your response.\n\n{text}" - dq = agent.put_task(prompt, source="wecom") - last_ping = time.time() - while state["running"]: - try: - item = await asyncio.to_thread(dq.get, True, 3) - except Q.Empty: - if agent.is_running and time.time() - last_ping > 20: - await self.send_text(chat_id, "⏳ 还在处理中,请稍等...") - last_ping = time.time() - continue - if "done" in item: - await self.send_done(chat_id, item.get("done", "")) - break - if not state["running"]: - await self.send_text(chat_id, "⏹️ 已停止") - except Exception as e: - import traceback - - print(f"[WeCom] run_agent error: {e}") - traceback.print_exc() - await self.send_text(chat_id, f"❌ 错误: {e}") - finally: - _USER_TASKS.pop(chat_id, None) + frame = self.chat_frames[chat_id] + for part in split_text(content, self.split_limit): + await self.client.reply_stream(frame, generate_req_id("stream"), part, finish=True) async def on_text(self, frame): try: - body = self._body(frame) + body = frame.body if hasattr(frame, "body") else frame.get("body", frame) if isinstance(frame, dict) else {} if not isinstance(body, dict): return msg_id = body.get("msgid") or f"{body.get('chatid', '')}_{body.get('sendertime', '')}" - if msg_id in _PROCESSED_IDS: + if msg_id in PROCESSED_IDS: return - _PROCESSED_IDS.append(msg_id) + PROCESSED_IDS.append(msg_id) from_info = body.get("from", {}) if isinstance(body.get("from", {}), dict) else {} sender_id = str(from_info.get("userid", "") or "unknown") chat_id = str(body.get("chatid", "") or sender_id) content = str((body.get("text", {}) or {}).get("content", "") or "").strip() if not content: return - public_access = not ALLOWED or "*" in ALLOWED - if not public_access and sender_id not in ALLOWED: + if not public_access(ALLOWED) and sender_id not in ALLOWED: print(f"[WeCom] unauthorized user: {sender_id}") return self.chat_frames[chat_id] = frame print(f"[WeCom] message from {sender_id}: {content}") if content.startswith("/"): - await self.handle_command(chat_id, content) - return + return await self.handle_command(chat_id, content) asyncio.create_task(self.run_agent(chat_id, content)) except Exception: import traceback - print("[WeCom] handle_message error") traceback.print_exc() async def on_enter_chat(self, frame): - if not WELCOME or not self.client: - return - try: - await self.client.reply_welcome(frame, {"msgtype": "text", "text": {"content": WELCOME}}) - except Exception as e: - print(f"[WeCom] welcome error: {e}") + if WELCOME and self.client: + try: + await self.client.reply_welcome(frame, {"msgtype": "text", "text": {"content": WELCOME}}) + except Exception as e: + print(f"[WeCom] welcome error: {e}") async def on_connected(self, frame): print("[WeCom] connected") @@ -230,19 +84,16 @@ class WeComApp: print(f"[WeCom] error: {frame}") async def start(self): - self.client = WSClient({ - "bot_id": BOT_ID, - "secret": SECRET, - "reconnect_interval": 1000, - "max_reconnect_attempts": -1, - "heartbeat_interval": 30000, - }) - self.client.on("connected", self.on_connected) - self.client.on("authenticated", self.on_authenticated) - self.client.on("disconnected", self.on_disconnected) - self.client.on("error", self.on_error) - self.client.on("message.text", self.on_text) - self.client.on("event.enter_chat", self.on_enter_chat) + self.client = WSClient({"bot_id": BOT_ID, "secret": SECRET, "reconnect_interval": 1000, "max_reconnect_attempts": -1, "heartbeat_interval": 30000}) + for event, handler in { + "connected": self.on_connected, + "authenticated": self.on_authenticated, + "disconnected": self.on_disconnected, + "error": self.on_error, + "message.text": self.on_text, + "event.enter_chat": self.on_enter_chat, + }.items(): + self.client.on(event, handler) print("[WeCom] bot starting...") await self.client.connect_async() while True: @@ -250,25 +101,8 @@ class WeComApp: if __name__ == "__main__": - try: - _lock_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - _lock_sock.bind(("127.0.0.1", 19529)) - except OSError: - print("[WeCom] Another instance is already running, skipping...") - sys.exit(1) - - if not BOT_ID or not SECRET: - print("[WeCom] ERROR: please set wecom_bot_id and wecom_secret in mykey.py or mykey.json") - sys.exit(1) - if agent.llmclient is None: - print("[WeCom] ERROR: no usable LLM backend found in mykey.py or mykey.json") - sys.exit(1) - - log_dir = os.path.join(os.path.dirname(__file__), "temp") - os.makedirs(log_dir, exist_ok=True) - _logf = open(os.path.join(log_dir, "wecomapp.log"), "a", encoding="utf-8", buffering=1) - sys.stdout = sys.stderr = _logf - print("[NEW] WeCom process starting, the above are history infos ...") - print(f"[WeCom] allow list: {'public' if not ALLOWED or '*' in ALLOWED else sorted(ALLOWED)}") + _LOCK_SOCK = ensure_single_instance(19529, "WeCom") + require_runtime(agent, "WeCom", wecom_bot_id=BOT_ID, wecom_secret=SECRET) + redirect_log(__file__, "wecomapp.log", "WeCom", ALLOWED) threading.Thread(target=agent.run, daemon=True).start() asyncio.run(WeComApp().start())