diff --git a/.gitignore b/.gitignore
index 416fb38..28144b5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -60,6 +60,11 @@ memory/*
# Plan SOP
!memory/plan_sop.md
+# Skill Search SOP
+!memory/skill_search/
+!memory/skill_search/**
+memory/skill_search/**/__pycache__/
+
# ADB UI tool
!memory/adb_ui.py
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..641d051
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2025 lsdefine
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
index 4c8d9f5..bac59d6 100644
--- a/README.md
+++ b/README.md
@@ -226,6 +226,8 @@ The entire core loop is just **92 lines of code** (`agent_loop.py`).
| `web_execute_js` | Control browser behavior |
| `ask_user` | Human-in-the-loop confirmation |
+> Additionally, 2 **memory management tools** (`update_working_checkpoint`, `start_long_term_update`) allow the agent to persist context and accumulate experience across sessions.
+
4️⃣ **Capability Extension Mechanism**
> _Capable of dynamically creating new tools._
@@ -304,7 +306,7 @@ MIT License — see [LICENSE](LICENSE)
## 📅 最新动态
-- **2026-03-:** [发布百万级 Skill 库](https://mp.weixin.qq.com/s/q2gQ7YvWoiAcwxzaiwpuiQ?scene=1&click_id=7)
+- **2026-03-10:** [发布百万级 Skill 库](https://mp.weixin.qq.com/s/q2gQ7YvWoiAcwxzaiwpuiQ?scene=1&click_id=7)
- **2026-03-08:** [发布以 GenericAgent 为核心的"政务龙虾" Dintal Claw](https://mp.weixin.qq.com/s/eiEhwo-j6S-WpLxgBnNxBg)
- **2026-03-01:** [GenericAgent 被机器之心报道](https://mp.weixin.qq.com/s/uVWpTTF5I1yzAENV_qm7yg)
- **2026-01-11:** GenericAgent V1.0 公开版本发布
@@ -476,6 +478,8 @@ GenericAgent 通过**分层记忆 × 最小工具集 × 自主执行循环**完
| `web_execute_js` | 控制浏览器行为 |
| `ask_user` | 人机协作确认 |
+> 此外,还有 2 个**记忆管理工具**(`update_working_checkpoint`、`start_long_term_update`),使 Agent 能够跨会话积累经验、维持持久上下文。
+
4️⃣ **能力扩展机制**
> 具备动态创建新的工具能力
>
diff --git a/TMWebDriver.py b/TMWebDriver.py
index 86a2b0b..9669777 100644
--- a/TMWebDriver.py
+++ b/TMWebDriver.py
@@ -34,7 +34,7 @@ class Session:
class TMWebDriver:
- def __init__(self, host: str = 'localhost', port: int = 18765):
+ def __init__(self, host: str = '127.0.0.1', port: int = 18765):
self.host, self.port = host, port
self.sessions, self.results, self.acks = {}, {}, {}
self.default_session_id = None
@@ -202,7 +202,7 @@ class TMWebDriver:
hasjump = acked = False
while exec_id not in self.results:
- time.sleep(0.5)
+ time.sleep(0.2)
if not acked and exec_id in self.acks:
acked = True; start_time = time.time()
if tp == 'ws':
@@ -266,4 +266,4 @@ class TMWebDriver:
return self.execute_js(f'GM_openInTab("{url}");')
if __name__ == "__main__":
- driver = TMWebDriver(host='localhost', port=18765)
\ No newline at end of file
+ driver = TMWebDriver(host='127.0.0.1', port=18765)
\ No newline at end of file
diff --git a/agentmain.py b/agentmain.py
index 4151ad5..a1025c7 100644
--- a/agentmain.py
+++ b/agentmain.py
@@ -38,23 +38,16 @@ def get_system_prompt():
class GeneraticAgent:
def __init__(self):
script_dir = os.path.dirname(os.path.abspath(__file__))
- temp_dir = os.path.join(script_dir, 'temp')
- if not os.path.exists(temp_dir): os.makedirs(temp_dir)
+ os.makedirs(os.path.join(script_dir, 'temp'), exist_ok=True)
from llmcore import mykeys
llm_sessions = []
for k, cfg in mykeys.items():
if not any(x in k for x in ['api', 'config', 'cookie']): continue
try:
- if 'claude' in k: llm_sessions += [ClaudeSession(api_key=cfg['apikey'], api_base=cfg['apibase'], model=cfg['model'])]
- if 'oai' in k: llm_sessions += [LLMSession(
- api_key=cfg['apikey'], api_base=cfg['apibase'], model=cfg['model'], proxy=cfg.get('proxy'),
- api_mode=cfg.get('api_mode', 'chat_completions'),
- max_retries=cfg.get('max_retries', 2),
- connect_timeout=cfg.get('connect_timeout', 10),
- read_timeout=cfg.get('read_timeout', 120),
- )]
- if 'xai' in k: llm_sessions += [XaiSession(cfg, mykeys.get('proxy', ''))]
- if 'sider' in k: llm_sessions += [SiderLLMSession(cfg, default_model=x) for x in \
+ if 'claude' in k: llm_sessions += [ClaudeSession(cfg=cfg)]
+ if 'oai' in k: llm_sessions += [LLMSession(cfg=cfg)]
+ if 'xai' in k: llm_sessions += [XaiSession(cfg=cfg)]
+ if 'sider' in k: llm_sessions += [SiderLLMSession(cfg={'apikey': cfg, 'model': x}) for x in \
["gemini-3.0-flash", "gpt-5.4"]]
except: pass
if len(llm_sessions) > 0: self.llmclient = ToolClient(llm_sessions, auto_save_tokens=True)
@@ -63,10 +56,8 @@ class GeneraticAgent:
self.history = []
self.task_queue = queue.Queue()
self.is_running, self.stop_sig = False, False
- self.llm_no = 0
- self.inc_out = False
- self.handler = None
- self.verbose = True
+ self.llm_no = 0; self.inc_out = False
+ self.handler = None; self.verbose = True
def next_llm(self, n=-1):
self.llm_no = ((self.llm_no + 1) if n < 0 else n) % len(self.llmclient.backends)
@@ -80,8 +71,7 @@ class GeneraticAgent:
print('Abort current task...')
if not self.is_running: return
self.stop_sig = True
- if self.handler is not None:
- self.handler.code_stop_signal.append(1)
+ if self.handler is not None: self.handler.code_stop_signal.append(1)
def put_task(self, query, source="user", images=None):
display_queue = queue.Queue()
@@ -99,10 +89,11 @@ class GeneraticAgent:
sys_prompt = get_system_prompt()
script_dir = os.path.dirname(os.path.abspath(__file__))
handler = GenericAgentHandler(None, self.history, os.path.join(script_dir, 'temp'))
- if self.handler and self.handler.key_info:
- handler.key_info = self.handler.key_info
- if '清除工作记忆' not in handler.key_info:
- handler.key_info += '\n[SYSTEM] 若开始新任务,先更新或清除工作记忆\n'
+ if self.handler and 'key_info' in self.handler.working:
+ ki = re.sub(r'\n\[SYSTEM\] 此为.*?工作记忆[。\n]*', '', self.handler.working['key_info']) # 去旧
+ handler.working['key_info'] = ki
+ handler.working['passed_sessions'] = ps = self.handler.working.get('passed_sessions', 0) + 1
+ if ps > 0: handler.working['key_info'] += f'\n[SYSTEM] 此为 {ps} 个对话前设置的key_info,若已在新任务,先更新或清除工作记忆。\n'
self.handler = handler
self.llmclient.backend = self.llmclient.backends[self.llm_no]
user_input = raw_query
@@ -154,7 +145,6 @@ if __name__ == '__main__':
threading.Thread(target=agent.run, daemon=True).start()
if args.task:
- script_dir = os.path.dirname(os.path.abspath(__file__))
d = os.path.join(script_dir, f'temp/{args.task}'); rp = os.path.join(d, 'reply.txt'); nround = ''
with open(os.path.join(d, 'input.txt'), encoding='utf-8') as f: raw = f.read()
while True:
@@ -189,11 +179,13 @@ if __name__ == '__main__':
try:
while 'done' not in (item := dq.get(timeout=120)): pass
result = item['done']
+ print(result)
except Exception as e:
if once: raise
print(f'[Reflect] drain error: {e}'); result = f'[ERROR] {e}'
- script_dir = os.path.dirname(os.path.abspath(__file__))
- open(os.path.join(script_dir, './temp/reflect.log'), 'a', encoding='utf-8').write(f'[{datetime.now():%m-%d %H:%M}]\n{result}\n\n')
+ log_dir = os.path.join(script_dir, 'temp/reflect_logs'); os.makedirs(log_dir, exist_ok=True)
+ script_name = os.path.splitext(os.path.basename(args.reflect))[0]
+ open(os.path.join(log_dir, f'{script_name}_{datetime.now():%Y-%m-%d}.log'), 'a', encoding='utf-8').write(f'[{datetime.now():%m-%d %H:%M}]\n{result}\n\n')
if on_done:
try: on_done(result)
except Exception as e: print(f'[Reflect] on_done error: {e}')
diff --git a/assets/global_mem_insight_template.txt b/assets/global_mem_insight_template.txt
index a497ccc..dc16393 100644
--- a/assets/global_mem_insight_template.txt
+++ b/assets/global_mem_insight_template.txt
@@ -1,20 +1,23 @@
# [Global Memory Insight]
-# 【引导注释 - 使用后请删除整个注释块】
-# 格式:两层「场景→记忆」映射 + RULES,总计≤30行
-#
-# == 第一层:高频场景 key→value ==
-# 每行格式: 触发场景关键词: sop名/py名/L2:section名(关键参数)
-# 示例: 境外网站/API: proxies={'https':'http://127.0.0.1:2082'}
-# 示例: 微信发消息: wechat_send_sop | 微信读聊天: wechat_db_sop+wechat_db_utils(quick_connect)
-# 可用 | 分隔同类场景,括号内放硬参数
-#
-# == 第二层:低频关键词,自包含可只写一个词 ==
-# 格式: 一行内用 | 分隔多个关键词,需要时 read L2 或 ls memory/ 查详情
-# 示例: 邮件ezgmail | 游戏game | OCR | 华为云Huawei_Cloud
-#
-# == [RULES] ==
-# 压缩的通用操作准则,每条一行
-# 示例: 1. 搜索先行: 文件用es, 信息用google, 禁猜路径
+浏览器自动化: web_scan/web_execute_js直接调用 | 特殊:tmwebdriver_sop(文件上传/图搜/PDF blob/元素物理坐标/Cookie提取含HttpOnly/跨域iframe操控/CDP/跨tab/后台tab操作)
+键鼠模拟: ljqCtrl_sop+.py(仅win,禁pyautogui/先activate窗口)
+定时任务: scheduled_task_sop(报告→sche_tasks/done/) | 与自主任务完全独立
+自主探索任务: autonomous_operation_sop(报告→temp/autonomous_reports/history.txt,不在memory下) | 与定时任务完全独立
+手机操控: adb_ui.py
+需要时read L2 或 ls ../memory/ 查L3
L0(META-SOP): memory_management_sop
-出厂L3: web_setup_sop | autonomous_operation_sop | scheduled_task_sop | ljqCtrl_sop+.py | tmwebdriver_sop | subagent_sop | plan_sop | mem_scanner.py | adb_ui.py
\ No newline at end of file
+L2: 现空
+L3: web_setup_sop | autonomous_operation_sop | scheduled_task_sop | ljqCtrl_sop+.py | tmwebdriver_sop | subagent_sop | plan_sop | mem_scanner.py | adb_ui.py
+
+[RULES]
+1. 搜索先行: 信息尽量用google(必须web), 项目内os.listdir, 禁猜路径
+2. 交叉验证: 禁信搜索摘要, 数值必进详情页核实
+3. 编码安全: 改前必读源码; import memory用sys.path.append
+4. 闭环: 物理模拟后必确认; 3次失败请求干预;
+5. 进程: 禁无条件杀python(会杀自己), 精确PID, 禁os.kill判活
+6. 窗口: GUI状态优先枚举窗口, 比OCR快
+7. 物理红线: cwd用./; cwd指定后代码内禁用../向上切换,改用绝对路径
+8. web JS: 一次写对,输入用原生setter+事件链,点击前检查disabled,注意引号转义; scan空再scan或innerText
+9. SOP: 执行前读取缓存硬参数,禁凭印象,有utils必用; 复杂长程先读plan_sop
+10. 用户提及或复杂长程需规划任务要读plan_sop进入规划模式
diff --git a/assets/ljq_web_driver.user.js b/assets/ljq_web_driver.user.js
index 2e90d36..1674be2 100644
--- a/assets/ljq_web_driver.user.js
+++ b/assets/ljq_web_driver.user.js
@@ -11,7 +11,7 @@
// @grant GM_xmlhttpRequest
// @grant GM_openInTab
// @grant unsafeWindow
-// @connect localhost
+// @connect 127.0.0.1
// @run-at document-start
// ==/UserScript==
@@ -26,13 +26,13 @@
return;
}
- const wsUrl = 'ws://localhost:18765';
- const httpUrl = 'http://localhost:18766/';
+ const wsUrl = 'ws://127.0.0.1:18765';
+ const httpUrl = 'http://127.0.0.1:18766/';
function isWebSocketServerAlive(callback) {
GM_xmlhttpRequest({
method: 'GET',
- url: 'http://localhost:18765/',
+ url: 'http://127.0.0.1:18765/',
onload: () => callback(true),
onerror: () => callback(false)
});
diff --git a/chatapp_common.py b/chatapp_common.py
new file mode 100644
index 0000000..88d345a
--- /dev/null
+++ b/chatapp_common.py
@@ -0,0 +1,186 @@
+import asyncio, glob, os, queue as Q, re, socket, sys, time
+
+HELP_TEXT = "📖 命令列表:\n/help - 显示帮助\n/status - 查看状态\n/stop - 停止当前任务\n/new - 清空当前上下文\n/restore - 恢复上次对话历史\n/llm [n] - 查看或切换模型"
+FILE_HINT = "If you need to show files to user, use [FILE:filepath] in your response."
+TAG_PATS = [r"<" + t + r">.*?" + t + r">" for t in ("thinking", "summary", "tool_use", "file_content")]
+
+
+def clean_reply(text):
+ for pat in TAG_PATS:
+ text = re.sub(pat, "", text or "", flags=re.DOTALL)
+ return re.sub(r"\n{3,}", "\n\n", text).strip() or "..."
+
+
+def extract_files(text):
+ return re.findall(r"\[FILE:([^\]]+)\]", text or "")
+
+
+def strip_files(text):
+ return re.sub(r"\[FILE:[^\]]+\]", "", text or "").strip()
+
+
+def split_text(text, limit):
+ text, parts = (text or "").strip() or "...", []
+ while len(text) > limit:
+ cut = text.rfind("\n", 0, limit)
+ if cut < limit * 0.6:
+ cut = limit
+ parts.append(text[:cut].rstrip())
+ text = text[cut:].lstrip()
+ return parts + ([text] if text else []) or ["..."]
+
+
+def format_restore():
+ files = glob.glob("./temp/model_responses_*.txt")
+ if not files:
+ return None, "❌ 没有找到历史记录"
+ latest = max(files, key=os.path.getmtime)
+ with open(latest, "r", encoding="utf-8") as f:
+ content = f.read()
+ users = re.findall(r"=== USER ===\n(.+?)(?==== |$)", content, re.DOTALL)
+ resps = re.findall(r"=== Response ===.*?\n(.+?)(?==== Prompt|$)", content, re.DOTALL)
+ restored = []
+ for u, r in zip(users, resps):
+ u, r = u.strip(), r.strip()[:500]
+ if u and r:
+ restored.extend([f"[USER]: {u}", f"[Agent] {r}"])
+ if not restored:
+ return None, "❌ 历史记录里没有可恢复内容"
+ return (restored, os.path.basename(latest), len(restored) // 2), None
+
+
+def build_done_text(raw_text):
+ files = [p for p in extract_files(raw_text) if os.path.exists(p)]
+ body = strip_files(clean_reply(raw_text))
+ if files:
+ body = (body + "\n\n" if body else "") + "\n".join(f"生成文件: {p}" for p in files)
+ return body or "..."
+
+
+def public_access(allowed):
+ return not allowed or "*" in allowed
+
+
+def to_allowed_set(value):
+ if value is None:
+ return set()
+ if isinstance(value, str):
+ value = [value]
+ return {str(x).strip() for x in value if str(x).strip()}
+
+
+def allowed_label(allowed):
+ return "public" if public_access(allowed) else sorted(allowed)
+
+
+def ensure_single_instance(port, label):
+ try:
+ lock_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ lock_sock.bind(("127.0.0.1", port))
+ return lock_sock
+ except OSError:
+ print(f"[{label}] Another instance is already running, skipping...")
+ sys.exit(1)
+
+
+def require_runtime(agent, label, **required):
+ missing = [k for k, v in required.items() if not v]
+ if missing:
+ print(f"[{label}] ERROR: please set {', '.join(missing)} in mykey.py or mykey.json")
+ sys.exit(1)
+ if agent.llmclient is None:
+ print(f"[{label}] ERROR: no usable LLM backend found in mykey.py or mykey.json")
+ sys.exit(1)
+
+
+def redirect_log(script_file, log_name, label, allowed):
+ log_dir = os.path.join(os.path.dirname(script_file), "temp")
+ os.makedirs(log_dir, exist_ok=True)
+ logf = open(os.path.join(log_dir, log_name), "a", encoding="utf-8", buffering=1)
+ sys.stdout = sys.stderr = logf
+ print(f"[NEW] {label} process starting, the above are history infos ...")
+ print(f"[{label}] allow list: {allowed_label(allowed)}")
+
+
+class AgentChatMixin:
+ label = "Chat"
+ source = "chat"
+ split_limit = 1500
+ ping_interval = 20
+
+ def __init__(self, agent, user_tasks):
+ self.agent, self.user_tasks = agent, user_tasks
+
+ async def send_text(self, chat_id, content, **ctx):
+ raise NotImplementedError
+
+ async def send_done(self, chat_id, raw_text, **ctx):
+ await self.send_text(chat_id, build_done_text(raw_text), **ctx)
+
+ async def handle_command(self, chat_id, cmd, **ctx):
+ parts = (cmd or "").split()
+ op = (parts[0] if parts else "").lower()
+ if op == "/stop":
+ state = self.user_tasks.get(chat_id)
+ if state:
+ state["running"] = False
+ self.agent.abort()
+ return await self.send_text(chat_id, "⏹️ 正在停止...", **ctx)
+ if op == "/status":
+ llm = self.agent.get_llm_name() if self.agent.llmclient else "未配置"
+ return await self.send_text(chat_id, f"状态: {'🔴 运行中' if self.agent.is_running else '🟢 空闲'}\nLLM: [{self.agent.llm_no}] {llm}", **ctx)
+ if op == "/llm":
+ if not self.agent.llmclient:
+ return await self.send_text(chat_id, "❌ 当前没有可用的 LLM 配置", **ctx)
+ if len(parts) > 1:
+ try:
+ self.agent.next_llm(int(parts[1]))
+ return await self.send_text(chat_id, f"✅ 已切换到 [{self.agent.llm_no}] {self.agent.get_llm_name()}", **ctx)
+ except Exception:
+ return await self.send_text(chat_id, f"用法: /llm <0-{len(self.agent.list_llms()) - 1}>", **ctx)
+ lines = [f"{'→' if cur else ' '} [{i}] {name}" for i, name, cur in self.agent.list_llms()]
+ return await self.send_text(chat_id, "LLMs:\n" + "\n".join(lines), **ctx)
+ if op == "/restore":
+ try:
+ restored_info, err = format_restore()
+ if err:
+ return await self.send_text(chat_id, err, **ctx)
+ restored, fname, count = restored_info
+ self.agent.abort()
+ self.agent.history.extend(restored)
+ return await self.send_text(chat_id, f"✅ 已恢复 {count} 轮对话\n来源: {fname}\n(仅恢复上下文,请输入新问题继续)", **ctx)
+ except Exception as e:
+ return await self.send_text(chat_id, f"❌ 恢复失败: {e}", **ctx)
+ if op == "/new":
+ self.agent.abort()
+ self.agent.history = []
+ return await self.send_text(chat_id, "🆕 已清空当前共享上下文", **ctx)
+ return await self.send_text(chat_id, HELP_TEXT, **ctx)
+
+ async def run_agent(self, chat_id, text, **ctx):
+ state = {"running": True}
+ self.user_tasks[chat_id] = state
+ try:
+ await self.send_text(chat_id, "思考中...", **ctx)
+ dq = self.agent.put_task(f"{FILE_HINT}\n\n{text}", source=self.source)
+ last_ping = time.time()
+ while state["running"]:
+ try:
+ item = await asyncio.to_thread(dq.get, True, 3)
+ except Q.Empty:
+ if self.agent.is_running and time.time() - last_ping > self.ping_interval:
+ await self.send_text(chat_id, "⏳ 还在处理中,请稍等...", **ctx)
+ last_ping = time.time()
+ continue
+ if "done" in item:
+ await self.send_done(chat_id, item.get("done", ""), **ctx)
+ break
+ if not state["running"]:
+ await self.send_text(chat_id, "⏹️ 已停止", **ctx)
+ except Exception as e:
+ import traceback
+ print(f"[{self.label}] run_agent error: {e}")
+ traceback.print_exc()
+ await self.send_text(chat_id, f"❌ 错误: {e}", **ctx)
+ finally:
+ self.user_tasks.pop(chat_id, None)
diff --git a/dingtalkapp.py b/dingtalkapp.py
index d4a07dc..bcb2faa 100644
--- a/dingtalkapp.py
+++ b/dingtalkapp.py
@@ -1,8 +1,9 @@
-import os, sys, re, threading, asyncio, queue as Q, socket, time, glob, json
+import asyncio, json, os, sys, threading, time
import requests
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from agentmain import GeneraticAgent
+from chatapp_common import AgentChatMixin, ensure_single_instance, public_access, redirect_log, require_runtime, split_text
from llmcore import mykeys
try:
@@ -12,82 +13,26 @@ except Exception:
print("Please install dingtalk-stream to use DingTalk: pip install dingtalk-stream")
sys.exit(1)
-agent = GeneraticAgent()
-agent.verbose = False
-
+agent = GeneraticAgent(); agent.verbose = False
CLIENT_ID = str(mykeys.get("dingtalk_client_id", "") or "").strip()
CLIENT_SECRET = str(mykeys.get("dingtalk_client_secret", "") or "").strip()
ALLOWED = {str(x).strip() for x in mykeys.get("dingtalk_allowed_users", []) if str(x).strip()}
-
-_TAG_PATS = [r"<" + t + r">.*?" + t + r">" for t in ("thinking", "summary", "tool_use", "file_content")]
-_USER_TASKS = {}
+USER_TASKS = {}
-def _clean(text):
- for pat in _TAG_PATS:
- text = re.sub(pat, "", text, flags=re.DOTALL)
- return re.sub(r"\n{3,}", "\n\n", text).strip() or "..."
+class DingTalkApp(AgentChatMixin):
+ label, source, split_limit = "DingTalk", "dingtalk", 1800
-
-def _extract_files(text):
- return re.findall(r"\[FILE:([^\]]+)\]", text or "")
-
-
-def _strip_files(text):
- return re.sub(r"\[FILE:[^\]]+\]", "", text or "").strip()
-
-
-def _split_text(text, limit=1800):
- text = (text or "").strip() or "..."
- parts = []
- while len(text) > limit:
- cut = text.rfind("\n", 0, limit)
- if cut < limit * 0.6:
- cut = limit
- parts.append(text[:cut].rstrip())
- text = text[cut:].lstrip()
- if text:
- parts.append(text)
- return parts or ["..."]
-
-
-def _format_restore():
- files = glob.glob("./temp/model_responses_*.txt")
- if not files:
- return None, "❌ 没有找到历史记录"
- latest = max(files, key=os.path.getmtime)
- with open(latest, "r", encoding="utf-8") as f:
- content = f.read()
- users = re.findall(r"=== USER ===\n(.+?)(?==== |$)", content, re.DOTALL)
- resps = re.findall(r"=== Response ===.*?\n(.+?)(?==== Prompt|$)", content, re.DOTALL)
- count, restored = 0, []
- for u, r in zip(users, resps):
- u, r = u.strip(), r.strip()[:500]
- if u and r:
- restored.extend([f"[USER]: {u}", f"[Agent] {r}"])
- count += 1
- if not restored:
- return None, "❌ 历史记录里没有可恢复内容"
- return (restored, os.path.basename(latest), count), None
-
-
-class DingTalkApp:
def __init__(self):
- self.client = None
- self.access_token = None
- self.token_expiry = 0
- self.background_tasks = set()
+ super().__init__(agent, USER_TASKS)
+ self.client, self.access_token, self.token_expiry, self.background_tasks = None, None, 0, set()
async def _get_access_token(self):
if self.access_token and time.time() < self.token_expiry:
return self.access_token
def _fetch():
- resp = requests.post(
- "https://api.dingtalk.com/v1.0/oauth2/accessToken",
- json={"appKey": CLIENT_ID, "appSecret": CLIENT_SECRET},
- timeout=20,
- )
+ resp = requests.post("https://api.dingtalk.com/v1.0/oauth2/accessToken", json={"appKey": CLIENT_ID, "appSecret": CLIENT_SECRET}, timeout=20)
resp.raise_for_status()
return resp.json()
@@ -107,30 +52,17 @@ class DingTalkApp:
headers = {"x-acs-dingtalk-access-token": token}
if chat_id.startswith("group:"):
url = "https://api.dingtalk.com/v1.0/robot/groupMessages/send"
- payload = {
- "robotCode": CLIENT_ID,
- "openConversationId": chat_id[6:],
- "msgKey": msg_key,
- "msgParam": json.dumps(msg_param, ensure_ascii=False),
- }
+ payload = {"robotCode": CLIENT_ID, "openConversationId": chat_id[6:], "msgKey": msg_key, "msgParam": json.dumps(msg_param, ensure_ascii=False)}
else:
url = "https://api.dingtalk.com/v1.0/robot/oToMessages/batchSend"
- payload = {
- "robotCode": CLIENT_ID,
- "userIds": [chat_id],
- "msgKey": msg_key,
- "msgParam": json.dumps(msg_param, ensure_ascii=False),
- }
+ payload = {"robotCode": CLIENT_ID, "userIds": [chat_id], "msgKey": msg_key, "msgParam": json.dumps(msg_param, ensure_ascii=False)}
def _post():
resp = requests.post(url, json=payload, headers=headers, timeout=20)
body = resp.text
if resp.status_code != 200:
raise RuntimeError(f"HTTP {resp.status_code}: {body[:300]}")
- try:
- result = resp.json()
- except Exception:
- result = {}
+ result = resp.json() if "json" in resp.headers.get("content-type", "") else {}
errcode = result.get("errcode")
if errcode not in (None, 0):
raise RuntimeError(f"API errcode={errcode}: {body[:300]}")
@@ -143,119 +75,32 @@ class DingTalkApp:
return False
async def send_text(self, chat_id, content):
- for part in _split_text(content):
+ for part in split_text(content, self.split_limit):
await self._send_batch_message(chat_id, "sampleMarkdown", {"text": part, "title": "Agent Reply"})
- async def send_done(self, chat_id, raw_text):
- files = [p for p in _extract_files(raw_text) if os.path.exists(p)]
- body = _strip_files(_clean(raw_text))
- if files:
- body = (body + "\n\n" if body else "") + "\n".join([f"生成文件: {p}" for p in files])
- await self.send_text(chat_id, body or "...")
-
- async def handle_command(self, chat_id, cmd):
- parts = (cmd or "").split()
- op = (parts[0] if parts else "").lower()
- if op == "/stop":
- state = _USER_TASKS.get(chat_id)
- if state:
- state["running"] = False
- agent.abort()
- await self.send_text(chat_id, "⏹️ 正在停止...")
- elif op == "/status":
- llm = agent.get_llm_name() if agent.llmclient else "未配置"
- await self.send_text(chat_id, f"状态: {'🔴 运行中' if agent.is_running else '🟢 空闲'}\nLLM: [{agent.llm_no}] {llm}")
- elif op == "/llm":
- if not agent.llmclient:
- return await self.send_text(chat_id, "❌ 当前没有可用的 LLM 配置")
- if len(parts) > 1:
- try:
- n = int(parts[1])
- agent.next_llm(n)
- await self.send_text(chat_id, f"✅ 已切换到 [{agent.llm_no}] {agent.get_llm_name()}")
- except Exception:
- await self.send_text(chat_id, f"用法: /llm <0-{len(agent.list_llms()) - 1}>")
- else:
- lines = [f"{'→' if cur else ' '} [{i}] {name}" for i, name, cur in agent.list_llms()]
- await self.send_text(chat_id, "LLMs:\n" + "\n".join(lines))
- elif op == "/restore":
- try:
- restored_info, err = _format_restore()
- if err:
- return await self.send_text(chat_id, err)
- restored, fname, count = restored_info
- agent.abort()
- agent.history.extend(restored)
- await self.send_text(chat_id, f"✅ 已恢复 {count} 轮对话\n来源: {fname}\n(仅恢复上下文,请输入新问题继续)")
- except Exception as e:
- await self.send_text(chat_id, f"❌ 恢复失败: {e}")
- elif op == "/new":
- agent.abort()
- agent.history = []
- await self.send_text(chat_id, "🆕 已清空当前共享上下文")
- else:
- await self.send_text(
- chat_id,
- "📖 命令列表:\n/help - 显示帮助\n/status - 查看状态\n/stop - 停止当前任务\n/new - 清空当前上下文\n/restore - 恢复上次对话历史\n/llm [n] - 查看或切换模型",
- )
-
- async def run_agent(self, chat_id, text):
- state = {"running": True}
- _USER_TASKS[chat_id] = state
- try:
- await self.send_text(chat_id, "思考中...")
- prompt = f"If you need to show files to user, use [FILE:filepath] in your response.\n\n{text}"
- dq = agent.put_task(prompt, source="dingtalk")
- last_ping = time.time()
- while state["running"]:
- try:
- item = await asyncio.to_thread(dq.get, True, 3)
- except Q.Empty:
- if agent.is_running and time.time() - last_ping > 20:
- await self.send_text(chat_id, "⏳ 还在处理中,请稍等...")
- last_ping = time.time()
- continue
- if "done" in item:
- await self.send_done(chat_id, item.get("done", ""))
- break
- if not state["running"]:
- await self.send_text(chat_id, "⏹️ 已停止")
- except Exception as e:
- import traceback
-
- print(f"[DingTalk] run_agent error: {e}")
- traceback.print_exc()
- await self.send_text(chat_id, f"❌ 错误: {e}")
- finally:
- _USER_TASKS.pop(chat_id, None)
-
async def on_message(self, content, sender_id, sender_name, conversation_type=None, conversation_id=None):
try:
if not content:
return
- public_access = not ALLOWED or "*" in ALLOWED
- if not public_access and sender_id not in ALLOWED:
+ if not public_access(ALLOWED) and sender_id not in ALLOWED:
print(f"[DingTalk] unauthorized user: {sender_id}")
return
is_group = conversation_type == "2" and conversation_id
chat_id = f"group:{conversation_id}" if is_group else sender_id
print(f"[DingTalk] message from {sender_name} ({sender_id}): {content}")
if content.startswith("/"):
- await self.handle_command(chat_id, content)
- return
+ return await self.handle_command(chat_id, content)
task = asyncio.create_task(self.run_agent(chat_id, content))
self.background_tasks.add(task)
task.add_done_callback(self.background_tasks.discard)
except Exception:
import traceback
-
print("[DingTalk] handle_message error")
traceback.print_exc()
async def start(self):
- handler = _DingTalkHandler(self)
self.client = DingTalkStreamClient(Credential(CLIENT_ID, CLIENT_SECRET))
- self.client.register_callback_handler(ChatbotMessage.TOPIC, handler)
+ self.client.register_callback_handler(ChatbotMessage.TOPIC, _DingTalkHandler(self))
print("[DingTalk] bot starting...")
while True:
try:
@@ -274,44 +119,22 @@ class _DingTalkHandler(CallbackHandler):
async def process(self, message):
try:
chatbot_msg = ChatbotMessage.from_dict(message.data)
- text = ""
- if getattr(getattr(chatbot_msg, "text", None), "content", None):
- text = chatbot_msg.text.content.strip()
+ text = getattr(getattr(chatbot_msg, "text", None), "content", "") or ""
extensions = getattr(chatbot_msg, "extensions", None) or {}
recognition = ((extensions.get("content") or {}).get("recognition") or "").strip() if isinstance(extensions, dict) else ""
- if not text:
+ if not (text := text.strip()):
text = recognition or str((message.data.get("text", {}) or {}).get("content", "") or "").strip()
- sender_id = getattr(chatbot_msg, "sender_staff_id", None) or getattr(chatbot_msg, "sender_id", None) or "unknown"
+ sender_id = str(getattr(chatbot_msg, "sender_staff_id", None) or getattr(chatbot_msg, "sender_id", None) or "unknown")
sender_name = getattr(chatbot_msg, "sender_nick", None) or "Unknown"
- conversation_type = message.data.get("conversationType")
- conversation_id = message.data.get("conversationId") or message.data.get("openConversationId")
- await self.app.on_message(text, str(sender_id), sender_name, conversation_type, conversation_id)
- return AckMessage.STATUS_OK, "OK"
+ await self.app.on_message(text, sender_id, sender_name, message.data.get("conversationType"), message.data.get("conversationId") or message.data.get("openConversationId"))
except Exception as e:
print(f"[DingTalk] callback error: {e}")
- return AckMessage.STATUS_OK, "Error"
+ return AckMessage.STATUS_OK, "OK"
if __name__ == "__main__":
- try:
- _lock_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
- _lock_sock.bind(("127.0.0.1", 19530))
- except OSError:
- print("[DingTalk] Another instance is already running, skipping...")
- sys.exit(1)
-
- if not CLIENT_ID or not CLIENT_SECRET:
- print("[DingTalk] ERROR: please set dingtalk_client_id and dingtalk_client_secret in mykey.py or mykey.json")
- sys.exit(1)
- if agent.llmclient is None:
- print("[DingTalk] ERROR: no usable LLM backend found in mykey.py or mykey.json")
- sys.exit(1)
-
- log_dir = os.path.join(os.path.dirname(__file__), "temp")
- os.makedirs(log_dir, exist_ok=True)
- _logf = open(os.path.join(log_dir, "dingtalkapp.log"), "a", encoding="utf-8", buffering=1)
- sys.stdout = sys.stderr = _logf
- print("[NEW] DingTalk process starting, the above are history infos ...")
- print(f"[DingTalk] allow list: {'public' if not ALLOWED or '*' in ALLOWED else sorted(ALLOWED)}")
+ _LOCK_SOCK = ensure_single_instance(19530, "DingTalk")
+ require_runtime(agent, "DingTalk", dingtalk_client_id=CLIENT_ID, dingtalk_client_secret=CLIENT_SECRET)
+ redirect_log(__file__, "dingtalkapp.log", "DingTalk", ALLOWED)
threading.Thread(target=agent.run, daemon=True).start()
asyncio.run(DingTalkApp().start())
diff --git a/ga.py b/ga.py
index ee1d731..e5b64c1 100644
--- a/ga.py
+++ b/ga.py
@@ -246,8 +246,7 @@ class GenericAgentHandler(BaseHandler):
'''Generic Agent 工具库,包含多种工具的实现。工具函数自动加上了 do_ 前缀。实际工具名没有前缀。'''
def __init__(self, parent, last_history=None, cwd='./'):
self.parent = parent
- self.key_info = ""
- self.related_sop = ""
+ self.working = {}
self.cwd = cwd; self.current_turn = 0
self.history_info = last_history if last_history else []
self.code_stop_signal = []
@@ -408,11 +407,12 @@ class GenericAgentHandler(BaseHandler):
'''
key_info = args.get("key_info", "")
related_sop = args.get("related_sop", "")
- if "key_info" in args: self.key_info = key_info
- if "related_sop" in args: self.related_sop = related_sop
+ if "key_info" in args: self.working['key_info'] = key_info
+ if "related_sop" in args: self.working['related_sop'] = related_sop
+ self.working['passed_sessions'] = 0
yield f"[Info] Updated key_info and related_sop.\n"
- yield f"key_info:\n{self.key_info}\n\n"
- yield f"related_sop:\n{self.related_sop}\n\n"
+ yield f"key_info:\n{self.working.get('key_info', '')}\n\n"
+ yield f"related_sop:\n{self.working.get('related_sop', '')}\n\n"
next_prompt = self._get_anchor_prompt()
#next_prompt += '\n[SYSTEM TIPS] 此函数一般在任务开始或中间时调用,如果任务已成功完成应该是start_long_term_update用于结算长期记忆。\n'
return StepOutcome({"status": "success"}, next_prompt=next_prompt)
@@ -477,14 +477,14 @@ class GenericAgentHandler(BaseHandler):
h_str = "\n".join(self.history_info[-20:])
prompt = f"\n### [WORKING MEMORY]\n\n{h_str}\n"
prompt += f"\nCurrent turn: {self.current_turn}\n"
- if self.key_info: prompt += f"\n{self.key_info}"
- if self.related_sop: prompt += f"\n有不清晰的地方请再次读取{self.related_sop}"
+ if self.working.get('key_info'): prompt += f"\n{self.working.get('key_info')}"
+ if self.working.get('related_sop'): prompt += f"\n有不清晰的地方请再次读取{self.working.get('related_sop')}"
try: print(prompt)
except: pass
return prompt
def next_prompt_patcher(self, next_prompt, outcome, turn):
- if turn % 35 == 0 and 'plan' not in str(self.related_sop):
+ if turn % 35 == 0 and 'plan' not in str(self.working.get('related_sop')):
next_prompt += f"\n\n[DANGER] 已连续执行第 {turn} 轮。你必须总结情况进行ask_user,不允许继续重试。"
elif turn % 7 == 0:
next_prompt += f"\n\n[DANGER] 已连续执行第 {turn} 轮。禁止无效重试。若无有效进展,必须切换策略:1. 探测物理边界 2. 请求用户协助。如有需要,可调用 update_working_checkpoint 保存关键上下文。"
diff --git a/llmcore.py b/llmcore.py
index c7ac438..2c46a0f 100644
--- a/llmcore.py
+++ b/llmcore.py
@@ -54,10 +54,10 @@ def build_multimodal_content(prompt_text, image_paths):
return parts
class SiderLLMSession:
- def __init__(self, sider_cookie, default_model="gemini-3.0-flash"):
+ def __init__(self, cfg):
from sider_ai_api import Session # 不使用sider的话没必要安装这个包
- self._core = Session(cookie=sider_cookie, proxies=proxies)
- self.default_model = default_model
+ self._core = Session(cookie=cfg['apikey'], proxies=proxies)
+ self.default_model = cfg.get('model', 'gemini-3.0-flash')
def ask(self, prompt, model=None, stream=False):
if model is None: model = self.default_model
if len(prompt) > 28000:
@@ -68,8 +68,10 @@ class SiderLLMSession:
return full_text
class ClaudeSession:
- def __init__(self, api_key, api_base, model="claude-opus", context_win=12000):
- self.api_key, self.api_base, self.default_model, self.context_win = api_key, api_base.rstrip('/'), model, context_win
+ def __init__(self, cfg):
+ self.api_key = cfg['apikey']; self.api_base = cfg['apibase'].rstrip('/')
+ self.default_model = cfg.get('model', 'claude-opus')
+ self.context_win = cfg.get('context_win', 12000)
self.raw_msgs, self.lock = [], threading.Lock()
def _trim_messages(self, messages):
compress_history_tags(messages)
@@ -84,6 +86,7 @@ class ClaudeSession:
return result[::-1] or messages[-2:]
def raw_ask(self, messages, model=None, temperature=0.5, max_tokens=6144):
model = model or self.default_model
+ if 'kimi' in model.lower() or 'moonshot' in model.lower(): temperature = 1.0 # kimi/moonshot only accepts temp 1.0
headers = {"x-api-key": self.api_key, "Content-Type": "application/json", "anthropic-version": "2023-06-01"}
payload = {"model": model, "messages": messages, "temperature": temperature, "max_tokens": max_tokens, "stream": True}
try:
@@ -117,28 +120,28 @@ class ClaudeSession:
return _ask_gen() if stream else ''.join(list(_ask_gen()))
class LLMSession:
- def __init__(self, api_key, api_base, model, context_win=16000, proxy=None, api_mode="chat_completions",
- max_retries=2, connect_timeout=10, read_timeout=120):
- self.api_key = api_key; self.api_base = api_base.rstrip('/'); self.default_model = model
- self.context_win = context_win; self.raw_msgs = []; self.messages = []
+ def __init__(self, cfg):
+ self.api_key = cfg['apikey']; self.api_base = cfg['apibase'].rstrip('/')
+ self.default_model = cfg['model']
+ self.context_win = cfg.get('context_win', 16000)
+ self.raw_msgs, self.messages = [], []
+ proxy = cfg.get('proxy')
self.proxies = {"http": proxy, "https": proxy} if proxy else None
+ self.prompt_cache = cfg.get('prompt_cache', False)
self.lock = threading.Lock()
- self.max_retries = max(0, int(max_retries))
- self.connect_timeout = max(1, int(connect_timeout))
- self.read_timeout = max(5, int(read_timeout))
- mode = str(api_mode or "chat_completions").strip().lower().replace('-', '_')
+ self.max_retries = max(0, int(cfg.get('max_retries', 2)))
+ self.connect_timeout = max(1, int(cfg.get('connect_timeout', 10)))
+ self.read_timeout = max(5, int(cfg.get('read_timeout', 120)))
+ mode = str(cfg.get('api_mode', 'chat_completions')).strip().lower().replace('-', '_')
if mode in ["responses", "response"]: self.api_mode = "responses"
else: self.api_mode = "chat_completions"
def _retry_delay(self, resp, attempt):
retry_after = None
try:
- if resp is not None:
- retry_after = (resp.headers or {}).get("retry-after")
- if retry_after is not None:
- retry_after = float(retry_after)
- except:
- retry_after = None
+ if resp is not None: retry_after = (resp.headers or {}).get("retry-after")
+ if retry_after is not None: retry_after = float(retry_after)
+ except: retry_after = None
if retry_after is None: retry_after = min(30.0, 1.5 * (2 ** attempt))
return max(0.5, float(retry_after))
@@ -168,6 +171,7 @@ class LLMSession:
def raw_ask(self, messages, model=None, temperature=0.5):
if model is None: model = self.default_model
+ if 'kimi' in model.lower() or 'moonshot' in model.lower(): temperature = 1.0 # kimi/moonshot only accepts temp 1.0
headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json", "Accept": "text/event-stream"}
if self.api_mode == "responses":
url = auto_make_url(self.api_base, "responses")
@@ -310,7 +314,7 @@ class LLMSession:
content += chunk; yield chunk
if not content.startswith("Error:"):
self.raw_msgs.append({"role": "assistant", "prompt": content, "image": None})
- if total_len > 5000: print(f"[Debug] Whole context length {total_len} {str(msg_lens)}.")
+ if total_len > self.context_win // 2: print(f"[Debug] Whole context length {total_len} {str(msg_lens)}.")
if total_len > self.context_win:
yield '[NextWillSummary]'
threading.Thread(target=self.summary_history, daemon=True).start()
@@ -319,11 +323,12 @@ class LLMSession:
class GeminiSession:
- def __init__(self, api_key=None, default_model="gemini-2.0-flash-001", proxy=proxy):
- self.api_key = api_key or google_api_key
+ def __init__(self, cfg):
+ self.api_key = cfg.get('apikey') or google_api_key
if not self.api_key: raise ValueError("google_api_key 未配置或为空,请在 mykey.py 中设置")
- self.default_model = default_model
- self.proxies = {"http":proxy, "https":proxy} if proxy else None
+ self.default_model = cfg.get('model', 'gemini-2.0-flash-001')
+ p = cfg.get('proxy', proxy)
+ self.proxies = {"http":p, "https":p} if p else None
def ask(self, prompt, model=None, stream=False):
if model is None: model = self.default_model
url = f"https://generativelanguage.googleapis.com/v1/models/{model}:generateContent?key={self.api_key}"
@@ -347,13 +352,14 @@ class GeminiSession:
return iter([full_text]) if stream else full_text
class XaiSession:
- def __init__(self, api_key, proxy="http://127.0.0.1:2082", default_model="grok-4-1-fast-non-reasoning"):
+ def __init__(self, cfg):
import xai_sdk
from xai_sdk.chat import user, system
self._user, self._system = user, system
- self.default_model = default_model
+ self.default_model = cfg.get('model', 'grok-4-1-fast-non-reasoning')
self._last_response_id = None # 多轮对话链
- os.environ["XAI_API_KEY"] = api_key
+ os.environ["XAI_API_KEY"] = cfg['apikey']
+ proxy = cfg.get('proxy', 'http://127.0.0.1:2082')
if not proxy.startswith("http"): proxy = f"http://{proxy}"
os.environ.setdefault("grpc_proxy", proxy)
self._client = xai_sdk.Client()
@@ -411,23 +417,28 @@ class ToolClient:
self.total_cd_tokens = 0
def chat(self, messages, tools=None):
- if self._should_use_structured_messages(messages):
- return (yield from self._chat_structured(messages, tools))
- full_prompt = self._build_protocol_prompt(messages, tools)
- print("Full prompt length:", len(full_prompt), 'chars')
script_dir = os.path.dirname(os.path.abspath(__file__))
- with open(os.path.join(script_dir, f'./temp/model_responses_{os.getpid()}.txt'), 'a', encoding='utf-8', errors="replace") as f:
- f.write(f"=== Prompt === {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n{full_prompt}\n")
- gen = self.backend.ask(full_prompt, stream=True)
+ log_path = os.path.join(script_dir, f'./temp/model_responses_{os.getpid()}.txt')
+ if self._should_use_structured_messages(messages):
+ backend_messages = self._build_backend_messages(messages, tools)
+ print("Structured prompt length:", sum(self._estimate_content_len(m.get("content")) for m in backend_messages), 'chars')
+ prompt_log = self._serialize_messages_for_log(backend_messages)
+ gen = self.backend.raw_ask(backend_messages)
+ else:
+ full_prompt = self._build_protocol_prompt(messages, tools)
+ print("Full prompt length:", len(full_prompt), 'chars')
+ prompt_log = full_prompt
+ gen = self.backend.ask(full_prompt, stream=True)
+ with open(log_path, 'a', encoding='utf-8', errors="replace") as f:
+ f.write(f"=== Prompt === {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n{prompt_log}\n")
raw_text = ''; summarytag = '[NextWillSummary]'
for chunk in gen:
- raw_text += chunk;
+ raw_text += chunk
if chunk != summarytag: yield chunk
print('Complete response received.')
if raw_text.endswith(summarytag):
self.last_tools = ''; raw_text = raw_text[:-len(summarytag)]
- script_dir = os.path.dirname(os.path.abspath(__file__))
- with open(os.path.join(script_dir, f'./temp/model_responses_{os.getpid()}.txt'), 'a', encoding='utf-8', errors="replace") as f:
+ with open(log_path, 'a', encoding='utf-8', errors="replace") as f:
f.write(f"=== Response === {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n{raw_text}\n\n")
return self._parse_mixed_response(raw_text)
@@ -504,24 +515,6 @@ class ToolClient:
logged.append(msg)
return json.dumps(logged, ensure_ascii=False, indent=2)
- def _chat_structured(self, messages, tools):
- backend_messages = self._build_backend_messages(messages, tools)
- print("Structured prompt length:", sum(self._estimate_content_len(m.get("content")) for m in backend_messages), 'chars')
- script_dir = os.path.dirname(os.path.abspath(__file__))
- with open(os.path.join(script_dir, f'./temp/model_responses_{os.getpid()}.txt'), 'a', encoding='utf-8', errors="replace") as f:
- f.write(f"=== Prompt === {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n{self._serialize_messages_for_log(backend_messages)}\n")
- gen = self.backend.raw_ask(backend_messages)
- raw_text = ''; summarytag = '[NextWillSummary]'
- for chunk in gen:
- raw_text += chunk
- if chunk != summarytag: yield chunk
- print('Complete response received.')
- if raw_text.endswith(summarytag):
- self.last_tools = ''; raw_text = raw_text[:-len(summarytag)]
- with open(os.path.join(script_dir, f'./temp/model_responses_{os.getpid()}.txt'), 'a', encoding='utf-8', errors="replace") as f:
- f.write(f"=== Response === {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n{raw_text}\n\n")
- return self._parse_mixed_response(raw_text)
-
def _build_protocol_prompt(self, messages, tools):
system_content = next((m['content'] for m in messages if m['role'].lower() == 'system'), "")
history_msgs = [m for m in messages if m['role'].lower() != 'system']
@@ -609,10 +602,7 @@ if __name__ == "__main__":
}
google_api_key = mykeys.get("google_api_key")
cfg = oai_configs.get("oai_config")
-
- llmclient = ToolClient(GeminiSession(api_key=google_api_key, proxy='127.0.0.1:2082').ask)
- #llmclient = ToolClient(LLMSession(api_key=cfg['apikey'], api_base=cfg['apibase'], model=cfg['model']).ask)
- #llmclient = ToolClient(SiderLLMSession().ask)
+ llmclient = ToolClient(LLMSession(cfg))
def get_final(gen):
try:
while True: print('mid:', next(gen))
diff --git a/memory/autonomous_operation_sop.md b/memory/autonomous_operation_sop.md
index 324610f..3cb8bf8 100644
--- a/memory/autonomous_operation_sop.md
+++ b/memory/autonomous_operation_sop.md
@@ -15,7 +15,7 @@
## 执行
**启动**:
-- update_working_checkpoint: `自主行动|报告→./autonomous_reports/R{XX}_简短描述.md|≤30回合|收尾:重读sop,写报告+更新history+标记TODO`
+- update_working_checkpoint: `自主行动|报告→./autonomous_reports/R{XX}_简短描述.md|≤30回合|收尾:重读sop,写报告+更新history+标记TODO | 报告严禁放错位置`
- 读 `./autonomous_reports/history.txt` 推断下一编号RXX + 了解历史避免重复
**执行**:
diff --git a/memory/skill_search/SKILL.md b/memory/skill_search/SKILL.md
new file mode 100644
index 0000000..e89b34b
--- /dev/null
+++ b/memory/skill_search/SKILL.md
@@ -0,0 +1,64 @@
+# Skill Search — 105K 技能卡检索
+
+> 从 105K+ 技能卡中语义搜索最匹配的 skill。零依赖,内置默认 API 地址,开箱即用。
+
+## 最简调用
+
+```python
+import sys; sys.path.append('../memory/skill_search')
+from skill_search import search
+
+results = search("python send email") # ⚠️ 必须用英文查询,中文匹配效果极差
+for r in results:
+ s = r.skill
+ print(f"[{r.final_score:.2f}] {s.name} — {s.one_line_summary}")
+ print(f" key: {s.key} category: {s.category} tags: {s.tags[:3]}")
+```
+
+## API 签名
+
+```python
+search(query, env=None, category=None, top_k=10) -> list[SearchResult]
+# env: 自动检测,一般不传
+# category: 可选过滤,如 "devops"
+# top_k: 返回数量,默认10
+```
+
+## 返回结构
+
+```
+SearchResult
+ .final_score float 综合评分 (0~1)
+ .relevance float 语义相关度
+ .quality float 质量分
+ .match_reasons list[str] 匹配原因
+ .warnings list[str] 警告
+ .skill SkillIndex ↓
+
+SkillIndex (常用字段)
+ .key str 唯一标识/路径
+ .name str 名称
+ .one_line_summary str 一句话摘要
+ .description str 详细描述
+ .category str 类别
+ .tags list[str] 标签
+ .form str 形式(sop/script/...)
+ .autonomous_safe bool 是否自主安全
+```
+
+## CLI
+
+```bash
+python -m skill_search "python testing"
+python -m skill_search "docker deployment" --category devops --top 5
+python -m skill_search "git" --json
+python -m skill_search --stats
+python -m skill_search --env
+```
+
+## 配置
+
+| 项 | 默认值 | 说明 |
+|---|---|---|
+| API地址 | `http://www.fudankw.cn:58787` | 环境变量 `SKILL_SEARCH_API` 可覆盖 |
+| API密钥 | 无(可选) | 环境变量 `SKILL_SEARCH_KEY` |
\ No newline at end of file
diff --git a/memory/skill_search/skill_search/__init__.py b/memory/skill_search/skill_search/__init__.py
new file mode 100644
index 0000000..70b246f
--- /dev/null
+++ b/memory/skill_search/skill_search/__init__.py
@@ -0,0 +1,8 @@
+"""skill_search — Skill 检索 API 客户端"""
+from .engine import (
+ SkillIndex, SearchResult, SkillSearchError,
+ search, get_stats, detect_environment,
+)
+
+__all__ = ["SkillIndex", "SearchResult", "SkillSearchError",
+ "search", "get_stats", "detect_environment"]
\ No newline at end of file
diff --git a/memory/skill_search/skill_search/__main__.py b/memory/skill_search/skill_search/__main__.py
new file mode 100644
index 0000000..6691a27
--- /dev/null
+++ b/memory/skill_search/skill_search/__main__.py
@@ -0,0 +1,116 @@
+"""CLI 入口: python -m skill_search"""
+from __future__ import annotations
+import argparse, json, sys
+from .engine import SearchResult, SkillSearchError, detect_environment, search, get_stats
+
+
+# ── 格式化 ───────────────────────────────────────────────
+
+def format_results(results: list[SearchResult], env: dict, query: str) -> str:
+ lines = [f'🔍 搜索: "{query}"',
+ f"🖥️ 环境: {env.get('os','?')} / {env.get('shell','?')} / {', '.join(env.get('runtimes',[]))}",
+ f"📊 找到 {len(results)} 个匹配结果\n"]
+ if not results:
+ lines.append("未找到匹配的 skill。试试其他关键词?")
+ return "\n".join(lines)
+ for i, r in enumerate(results, 1):
+ s = r.skill
+ safe_icon = "🟢" if s.autonomous_safe else "🔴"
+ score_bar = "█" * int(r.final_score * 10) + "░" * (10 - int(r.final_score * 10))
+ lines += [
+ f"{'─'*60}",
+ f"#{i} {safe_icon} {s.name}",
+ f" 路径: {s.key}",
+ f" 类别: {s.category} | 标签: {', '.join(s.tags[:5])}",
+ f" 摘要: {s.one_line_summary}",
+ f" 评分: [{score_bar}] {r.final_score:.2f} (相关={r.relevance:.2f} 质量={r.quality:.1f})",
+ f" 清晰={s.clarity} 完整={s.completeness} 可操作={s.actionability} | 形式={s.form}",
+ ]
+ if r.match_reasons:
+ lines.append(f" 匹配: {' | '.join(r.match_reasons[:3])}")
+ if r.warnings:
+ lines.extend(f" {w}" for w in r.warnings)
+ lines.append("")
+ lines.append(f"{'─'*60}")
+ return "\n".join(lines)
+
+
+def format_results_json(results: list[SearchResult]) -> list[dict]:
+ out = []
+ for r in results:
+ s = r.skill
+ out.append({
+ "rank": len(out) + 1, "key": s.key, "name": s.name,
+ "category": s.category, "tags": s.tags,
+ "description": s.description, "one_line_summary": s.one_line_summary,
+ "scores": {"final": round(r.final_score, 3), "relevance": round(r.relevance, 3),
+ "quality": round(r.quality, 1), "clarity": s.clarity,
+ "completeness": s.completeness, "actionability": s.actionability},
+ "safety": {"autonomous_safe": s.autonomous_safe, "blast_radius": s.blast_radius,
+ "requires_credentials": s.requires_credentials,
+ "data_exposure": s.data_exposure, "effect_scope": s.effect_scope},
+ "platform": {"os": s.os, "runtimes": s.runtimes, "tools": s.tools, "services": s.services},
+ "warnings": r.warnings, "match_reasons": r.match_reasons,
+ })
+ return out
+
+
+# ── CLI ──────────────────────────────────────────────────
+
+def main():
+ parser = argparse.ArgumentParser(prog="skill_search",
+ description="Skill 检索系统 — 根据环境和需求智能推荐 skill(API 客户端)")
+ parser.add_argument("query", nargs="?", help="搜索关键词(如: 'python testing')")
+ parser.add_argument("--category", "-cat", help="限定类别")
+ parser.add_argument("--top", "-k", type=int, default=10, help="返回结果数(默认 10)")
+ parser.add_argument("--json", action="store_true", help="JSON 格式输出")
+ parser.add_argument("--env", action="store_true", help="仅显示检测到的环境信息")
+ parser.add_argument("--stats", action="store_true", help="显示索引统计信息")
+ parser.add_argument("--api-url", help="指定 API 地址(也可用 SKILL_SEARCH_API 环境变量)")
+ args = parser.parse_args()
+
+ if args.api_url:
+ import os; os.environ["SKILL_SEARCH_API"] = args.api_url
+
+ env = detect_environment()
+
+ if args.env:
+ print("🖥️ 当前环境:")
+ print(f" OS: {env['os']}")
+ print(f" Shell: {env['shell']}")
+ print(f" 运行时: {', '.join(env['runtimes'])}")
+ print(f" 工具: {', '.join(env['tools'])}")
+ print(f" 模型能力: tool_calling={env['model']['tool_calling']}, "
+ f"reasoning={env['model']['reasoning']}, context={env['model']['context_window']}")
+ return
+
+ if args.stats:
+ try:
+ stats = get_stats(env)
+ print(f"📊 索引统计:")
+ print(f" 总计: {stats.get('total', '?')} 个 skills")
+ print(f" 自动安全: {stats.get('safe_count', '?')} 个")
+ if 'categories' in stats:
+ print(f" 类别分布:")
+ for cat, cnt in sorted(stats['categories'].items(), key=lambda x: -x[1]):
+ print(f" {cat:15s} {cnt:4d}")
+ except SkillSearchError as e:
+ print(f"❌ {e}", file=sys.stderr); sys.exit(1)
+ return
+
+ if not args.query:
+ parser.print_help(); return
+
+ try:
+ results = search(query=args.query, env=env, category=args.category, top_k=args.top)
+ except SkillSearchError as e:
+ print(f"❌ {e}", file=sys.stderr); sys.exit(1)
+
+ if args.json:
+ print(json.dumps(format_results_json(results), indent=2, ensure_ascii=False))
+ else:
+ print(format_results(results, env, args.query))
+
+
+if __name__ == "__main__":
+ main()
\ No newline at end of file
diff --git a/memory/skill_search/skill_search/engine.py b/memory/skill_search/skill_search/engine.py
new file mode 100644
index 0000000..5625a4f
--- /dev/null
+++ b/memory/skill_search/skill_search/engine.py
@@ -0,0 +1,156 @@
+"""Skill 检索引擎 — API 客户端(含数据模型与环境检测)"""
+from __future__ import annotations
+import json, os, platform, shutil, subprocess, urllib.request, urllib.error
+from dataclasses import dataclass, field
+
+# ── 数据模型 ─────────────────────────────────────────────
+
+@dataclass
+class SkillIndex:
+ """Skill 索引条目(与服务端结构对齐)"""
+ key: str
+ name: str = ""
+ description: str = ""
+ one_line_summary: str = ""
+ category: str = ""
+ tags: list[str] = field(default_factory=list)
+ language: str = "en"
+ os: list[str] = field(default_factory=list)
+ shell: list[str] = field(default_factory=list)
+ runtimes: list[str] = field(default_factory=list)
+ tools: list[str] = field(default_factory=list)
+ services: list[str] = field(default_factory=list)
+ needs_tool_calling: bool = False
+ needs_reasoning: bool = False
+ min_context_window: str = "standard"
+ decay_risk: str = "low"
+ clarity: int = 0
+ completeness: int = 0
+ actionability: int = 0
+ autonomous_safe: bool = True
+ blast_radius: str = "low"
+ requires_credentials: bool = False
+ data_exposure: str = "none"
+ effect_scope: str = "local"
+ form: str = ""
+ estimated_tokens: str = "medium"
+ capabilities: list[str] = field(default_factory=list)
+ github_stars: int = 0
+ github_url: str = ""
+
+ @property
+ def quality_score(self):
+ return self.clarity * 0.3 + self.completeness * 0.3 + self.actionability * 0.4
+
+ @classmethod
+ def from_dict(cls, d):
+ known = {f.name for f in cls.__dataclass_fields__.values()}
+ return cls(**{k: v for k, v in d.items() if k in known})
+
+
+@dataclass
+class SearchResult:
+ """单条检索结果"""
+ skill: SkillIndex
+ relevance: float = 0.0
+ quality: float = 0.0
+ final_score: float = 0.0
+ match_reasons: list[str] = field(default_factory=list)
+ warnings: list[str] = field(default_factory=list)
+
+ @classmethod
+ def from_dict(cls, d):
+ skill = SkillIndex.from_dict(d.get("skill", d))
+ return cls(skill=skill, relevance=d.get("relevance", 0.0),
+ quality=d.get("quality", 0.0), final_score=d.get("final_score", 0.0),
+ match_reasons=d.get("match_reasons", []), warnings=d.get("warnings", []))
+
+
+# ── 环境检测 ─────────────────────────────────────────────
+
+def _run(cmd):
+ try:
+ r = subprocess.run(cmd.split(), capture_output=True, text=True, timeout=5)
+ return r.stdout.strip() if r.returncode == 0 else ""
+ except Exception:
+ return ""
+
+def _detect_os():
+ s = platform.system().lower()
+ return {"darwin": "macos", "linux": "linux", "windows": "windows"}.get(s, s)
+
+def _detect_shell():
+ shell = os.environ.get("SHELL", "")
+ if "zsh" in shell: return "zsh"
+ if "bash" in shell: return "bash"
+ if platform.system() == "Windows": return "powershell"
+ return os.path.basename(shell) if shell else "unknown"
+
+def _detect_runtimes():
+ checks = {"python": ["python3", "python"], "node": ["node"], "go": ["go"],
+ "rust": ["rustc"], "java": ["java"], "ruby": ["ruby"],
+ "php": ["php"], "dotnet": ["dotnet"]}
+ found = []
+ for name, cmds in checks.items():
+ for cmd in cmds:
+ if shutil.which(cmd):
+ found.append(name); break
+ return found
+
+def _detect_tools():
+ tools = ["git", "docker", "npm", "pip", "curl", "wget", "kubectl",
+ "terraform", "aws", "gcloud", "az", "brew", "cargo", "make", "cmake"]
+ return [t for t in tools if shutil.which(t)]
+
+def detect_environment():
+ """采集完整环境信息"""
+ return {"os": _detect_os(), "shell": _detect_shell(),
+ "runtimes": _detect_runtimes(), "tools": _detect_tools(),
+ "model": {"tool_calling": True, "reasoning": True, "context_window": "large"}}
+
+
+# ── API 配置与调用 ────────────────────────────────────────
+
+DEFAULT_API_URL = "http://www.fudankw.cn:58787"
+
+def _get_api_url():
+ return os.environ.get("SKILL_SEARCH_API", DEFAULT_API_URL)
+
+def _get_api_key():
+ return os.environ.get("SKILL_SEARCH_KEY")
+
+class SkillSearchError(Exception):
+ pass
+
+def _api_request(endpoint, payload):
+ url = f"{_get_api_url()}/{endpoint}"
+ data = json.dumps(payload).encode("utf-8")
+ headers = {"Content-Type": "application/json"}
+ api_key = _get_api_key()
+ if api_key:
+ headers["Authorization"] = f"Bearer {api_key}"
+ req = urllib.request.Request(url, data=data, headers=headers, method="POST")
+ try:
+ with urllib.request.urlopen(req, timeout=30) as resp:
+ return json.loads(resp.read().decode("utf-8"))
+ except urllib.error.HTTPError as e:
+ body = e.read().decode("utf-8", errors="replace")
+ raise SkillSearchError(f"API 错误 {e.code}: {body}") from e
+ except urllib.error.URLError as e:
+ raise SkillSearchError(f"无法连接服务: {e.reason}") from e
+ except Exception as e:
+ raise SkillSearchError(f"请求失败: {e}") from e
+
+
+# ── 公开接口 ─────────────────────────────────────────────
+
+def search(query, env=None, category=None, top_k=10):
+ if env is None: env = detect_environment()
+ payload = {"query": query, "env": env, "top_k": top_k}
+ if category: payload["category"] = category
+ resp = _api_request("search", payload)
+ return [SearchResult.from_dict(r) for r in resp.get("results", [])]
+
+def get_stats(env=None):
+ if env is None: env = detect_environment()
+ return _api_request("stats", {"env": env})
\ No newline at end of file
diff --git a/memory/tmwebdriver_sop.md b/memory/tmwebdriver_sop.md
index ad26ebd..a004fc2 100644
--- a/memory/tmwebdriver_sop.md
+++ b/memory/tmwebdriver_sop.md
@@ -104,4 +104,7 @@ web_scan失败时按序排查:
①TM没装?→遍历本机所有Chromium浏览器(Chrome/Edge/Brave…)用户数据目录下Extensions/,各子目录manifest.json搜"tampermonkey"
没找到→走web_setup_sop;找到→记住装在哪个浏览器
②浏览器没开?→检查①对应的浏览器进程是否在跑(tasklist/ps),没有则启动并打开正常URL(⚠about:blank等内部页不加载扩展)
-③WS后台挂了?→socket.connect_ex(('localhost',18766))非0即dead→手动`from TMWebDriver import TMWebDriver; TMWebDriver()`起master
\ No newline at end of file
+③WS后台挂了?→socket.connect_ex(('127.0.0.1',18766))非0即dead→手动`from TMWebDriver import TMWebDriver; TMWebDriver()`起master
+
+## 性能
+- ⚠ URL必须用`127.0.0.1`不用`localhost`。Windows下localhost先尝试IPv6(::1)超时2s再回退IPv4,每次HTTP请求多2s
\ No newline at end of file
diff --git a/qqapp.py b/qqapp.py
index 5e65002..cd4223f 100644
--- a/qqapp.py
+++ b/qqapp.py
@@ -1,8 +1,9 @@
-import os, sys, re, threading, asyncio, queue as Q, socket, time, glob
+import asyncio, os, sys, threading, time
from collections import deque
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from agentmain import GeneraticAgent
+from chatapp_common import AgentChatMixin, ensure_single_instance, public_access, redirect_log, require_runtime, split_text
from llmcore import mykeys
try:
@@ -12,73 +13,19 @@ except Exception:
print("Please install qq-botpy to use QQ module: pip install qq-botpy")
sys.exit(1)
-agent = GeneraticAgent()
-agent.verbose = False
-
+agent = GeneraticAgent(); agent.verbose = False
APP_ID = str(mykeys.get("qq_app_id", "") or "").strip()
APP_SECRET = str(mykeys.get("qq_app_secret", "") or "").strip()
ALLOWED = {str(x).strip() for x in mykeys.get("qq_allowed_users", []) if str(x).strip()}
-
-_TAG_PATS = [r"<" + t + r">.*?" + t + r">" for t in ("thinking", "summary", "tool_use", "file_content")]
-_PROCESSED_IDS = deque(maxlen=1000)
-_USER_TASKS = {}
-_SEQ_LOCK = threading.Lock()
-_MSG_SEQ = 1
+PROCESSED_IDS, USER_TASKS = deque(maxlen=1000), {}
+SEQ_LOCK, MSG_SEQ = threading.Lock(), 1
def _next_msg_seq():
- global _MSG_SEQ
- with _SEQ_LOCK:
- _MSG_SEQ += 1
- return _MSG_SEQ
-
-
-def _clean(text):
- for pat in _TAG_PATS:
- text = re.sub(pat, "", text, flags=re.DOTALL)
- return re.sub(r"\n{3,}", "\n\n", text).strip() or "..."
-
-
-def _extract_files(text):
- return re.findall(r"\[FILE:([^\]]+)\]", text or "")
-
-
-def _strip_files(text):
- return re.sub(r"\[FILE:[^\]]+\]", "", text or "").strip()
-
-
-def _split_text(text, limit=1500):
- text = (text or "").strip() or "..."
- parts = []
- while len(text) > limit:
- cut = text.rfind("\n", 0, limit)
- if cut < limit * 0.6:
- cut = limit
- parts.append(text[:cut].rstrip())
- text = text[cut:].lstrip()
- if text:
- parts.append(text)
- return parts or ["..."]
-
-
-def _format_restore():
- files = glob.glob("./temp/model_responses_*.txt")
- if not files:
- return None, "❌ 没有找到历史记录"
- latest = max(files, key=os.path.getmtime)
- with open(latest, "r", encoding="utf-8") as f:
- content = f.read()
- users = re.findall(r"=== USER ===\n(.+?)(?==== |$)", content, re.DOTALL)
- resps = re.findall(r"=== Response ===.*?\n(.+?)(?==== Prompt|$)", content, re.DOTALL)
- count, restored = 0, []
- for u, r in zip(users, resps):
- u, r = u.strip(), r.strip()[:500]
- if u and r:
- restored.extend([f"[USER]: {u}", f"[Agent] {r}"])
- count += 1
- if not restored:
- return None, "❌ 历史记录里没有可恢复内容"
- return (restored, os.path.basename(latest), count), None
+ global MSG_SEQ
+ with SEQ_LOCK:
+ MSG_SEQ += 1
+ return MSG_SEQ
def _build_intents():
@@ -86,16 +33,7 @@ def _build_intents():
return botpy.Intents(public_messages=True, direct_message=True)
except Exception:
intents = botpy.Intents.none() if hasattr(botpy.Intents, "none") else botpy.Intents()
- for attr in (
- "public_messages",
- "public_guild_messages",
- "direct_message",
- "direct_messages",
- "c2c_message",
- "c2c_messages",
- "group_at_message",
- "group_at_messages",
- ):
+ for attr in ("public_messages", "public_guild_messages", "direct_message", "direct_messages", "c2c_message", "c2c_messages", "group_at_message", "group_at_messages"):
if hasattr(intents, attr):
try:
setattr(intents, attr, True)
@@ -105,15 +43,12 @@ def _build_intents():
def _make_bot_class(app):
- intents = _build_intents()
-
- class _QQBot(botpy.Client):
+ class QQBot(botpy.Client):
def __init__(self):
- super().__init__(intents=intents, ext_handlers=False)
+ super().__init__(intents=_build_intents(), ext_handlers=False)
async def on_ready(self):
- name = getattr(getattr(self, "robot", None), "name", "QQBot")
- print(f"[QQ] bot ready: {name}")
+ print(f"[QQ] bot ready: {getattr(getattr(self, 'robot', None), 'name', 'QQBot')}")
async def on_c2c_message_create(self, message: C2CMessage):
await app.on_message(message, is_group=False)
@@ -124,154 +59,50 @@ def _make_bot_class(app):
async def on_direct_message_create(self, message):
await app.on_message(message, is_group=False)
- return _QQBot
+ return QQBot
-class QQApp:
+class QQApp(AgentChatMixin):
+ label, source, split_limit = "QQ", "qq", 1500
+
def __init__(self):
+ super().__init__(agent, USER_TASKS)
self.client = None
async def send_text(self, chat_id, content, *, msg_id=None, is_group=False):
if not self.client:
return
- for part in _split_text(content):
- seq = _next_msg_seq()
- if is_group:
- await self.client.api.post_group_message(
- group_openid=chat_id,
- msg_type=0,
- content=part,
- msg_id=msg_id,
- msg_seq=seq,
- )
- else:
- await self.client.api.post_c2c_message(
- openid=chat_id,
- msg_type=0,
- content=part,
- msg_id=msg_id,
- msg_seq=seq,
- )
-
- async def send_done(self, chat_id, raw_text, *, msg_id=None, is_group=False):
- files = [p for p in _extract_files(raw_text) if os.path.exists(p)]
- body = _strip_files(_clean(raw_text))
- if files:
- body = (body + "\n\n" if body else "") + "\n".join([f"生成文件: {p}" for p in files])
- await self.send_text(chat_id, body or "...", msg_id=msg_id, is_group=is_group)
-
- async def handle_command(self, chat_id, cmd, *, msg_id=None, is_group=False):
- parts = (cmd or "").split()
- op = (parts[0] if parts else "").lower()
- if op == "/stop":
- state = _USER_TASKS.get(chat_id)
- if state:
- state["running"] = False
- agent.abort()
- await self.send_text(chat_id, "⏹️ 正在停止...", msg_id=msg_id, is_group=is_group)
- elif op == "/status":
- llm = agent.get_llm_name() if agent.llmclient else "未配置"
- await self.send_text(chat_id, f"状态: {'🔴 运行中' if agent.is_running else '🟢 空闲'}\nLLM: [{agent.llm_no}] {llm}", msg_id=msg_id, is_group=is_group)
- elif op == "/llm":
- if not agent.llmclient:
- return await self.send_text(chat_id, "❌ 当前没有可用的 LLM 配置", msg_id=msg_id, is_group=is_group)
- if len(parts) > 1:
- try:
- n = int(parts[1])
- agent.next_llm(n)
- await self.send_text(chat_id, f"✅ 已切换到 [{agent.llm_no}] {agent.get_llm_name()}", msg_id=msg_id, is_group=is_group)
- except Exception:
- await self.send_text(chat_id, f"用法: /llm <0-{len(agent.list_llms()) - 1}>", msg_id=msg_id, is_group=is_group)
- else:
- lines = [f"{'→' if cur else ' '} [{i}] {name}" for i, name, cur in agent.list_llms()]
- await self.send_text(chat_id, "LLMs:\n" + "\n".join(lines), msg_id=msg_id, is_group=is_group)
- elif op == "/restore":
- try:
- restored_info, err = _format_restore()
- if err:
- return await self.send_text(chat_id, err, msg_id=msg_id, is_group=is_group)
- restored, fname, count = restored_info
- agent.abort()
- agent.history.extend(restored)
- await self.send_text(chat_id, f"✅ 已恢复 {count} 轮对话\n来源: {fname}\n(仅恢复上下文,请输入新问题继续)", msg_id=msg_id, is_group=is_group)
- except Exception as e:
- await self.send_text(chat_id, f"❌ 恢复失败: {e}", msg_id=msg_id, is_group=is_group)
- elif op == "/new":
- agent.abort()
- agent.history = []
- await self.send_text(chat_id, "🆕 已清空当前共享上下文", msg_id=msg_id, is_group=is_group)
- else:
- await self.send_text(
- chat_id,
- "📖 命令列表:\n/help - 显示帮助\n/status - 查看状态\n/stop - 停止当前任务\n/new - 清空当前上下文\n/restore - 恢复上次对话历史\n/llm [n] - 查看或切换模型",
- msg_id=msg_id,
- is_group=is_group,
- )
-
- async def run_agent(self, chat_id, text, *, msg_id=None, is_group=False):
- state = {"running": True}
- _USER_TASKS[chat_id] = state
- try:
- await self.send_text(chat_id, "思考中...", msg_id=msg_id, is_group=is_group)
- prompt = f"If you need to show files to user, use [FILE:filepath] in your response.\n\n{text}"
- dq = agent.put_task(prompt, source="qq")
- last_ping = time.time()
- while state["running"]:
- try:
- item = await asyncio.to_thread(dq.get, True, 3)
- except Q.Empty:
- if agent.is_running and time.time() - last_ping > 20:
- await self.send_text(chat_id, "⏳ 还在处理中,请稍等...", msg_id=msg_id, is_group=is_group)
- last_ping = time.time()
- continue
- if "done" in item:
- await self.send_done(chat_id, item.get("done", ""), msg_id=msg_id, is_group=is_group)
- break
- if not state["running"]:
- await self.send_text(chat_id, "⏹️ 已停止", msg_id=msg_id, is_group=is_group)
- except Exception as e:
- import traceback
-
- print(f"[QQ] run_agent error: {e}")
- traceback.print_exc()
- await self.send_text(chat_id, f"❌ 错误: {e}", msg_id=msg_id, is_group=is_group)
- finally:
- _USER_TASKS.pop(chat_id, None)
+ api = self.client.api.post_group_message if is_group else self.client.api.post_c2c_message
+ key = "group_openid" if is_group else "openid"
+ for part in split_text(content, self.split_limit):
+ await api(**{key: chat_id, "msg_type": 0, "content": part, "msg_id": msg_id, "msg_seq": _next_msg_seq()})
async def on_message(self, data, is_group=False):
try:
msg_id = getattr(data, "id", None)
- if msg_id in _PROCESSED_IDS:
+ if msg_id in PROCESSED_IDS:
return
- _PROCESSED_IDS.append(msg_id)
+ PROCESSED_IDS.append(msg_id)
content = (getattr(data, "content", "") or "").strip()
if not content:
return
author = getattr(data, "author", None)
- if is_group:
- chat_id = str(getattr(data, "group_openid", "") or "")
- user_id = str(getattr(author, "member_openid", "") or getattr(author, "id", "") or "unknown")
- else:
- user_id = str(getattr(author, "user_openid", "") or getattr(author, "id", "") or "unknown")
- chat_id = user_id
- public_access = not ALLOWED or "*" in ALLOWED
- if not public_access and user_id not in ALLOWED:
+ user_id = str(getattr(author, "member_openid" if is_group else "user_openid", "") or getattr(author, "id", "") or "unknown")
+ chat_id = str(getattr(data, "group_openid", "") or user_id) if is_group else user_id
+ if not public_access(ALLOWED) and user_id not in ALLOWED:
print(f"[QQ] unauthorized user: {user_id}")
return
print(f"[QQ] message from {user_id} ({'group' if is_group else 'c2c'}): {content}")
if content.startswith("/"):
- await self.handle_command(chat_id, content, msg_id=msg_id, is_group=is_group)
- return
+ return await self.handle_command(chat_id, content, msg_id=msg_id, is_group=is_group)
asyncio.create_task(self.run_agent(chat_id, content, msg_id=msg_id, is_group=is_group))
except Exception:
import traceback
-
print("[QQ] handle_message error")
traceback.print_exc()
async def start(self):
- BotClass = _make_bot_class(self)
- self.client = BotClass()
+ self.client = _make_bot_class(self)()
while True:
try:
print(f"[QQ] bot starting... {time.strftime('%m-%d %H:%M')}")
@@ -283,25 +114,8 @@ class QQApp:
if __name__ == "__main__":
- try:
- _lock_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
- _lock_sock.bind(("127.0.0.1", 19528))
- except OSError:
- print("[QQ] Another instance is already running, skipping...")
- sys.exit(1)
-
- if not APP_ID or not APP_SECRET:
- print("[QQ] ERROR: please set qq_app_id and qq_app_secret in mykey.py or mykey.json")
- sys.exit(1)
- if agent.llmclient is None:
- print("[QQ] ERROR: no usable LLM backend found in mykey.py or mykey.json")
- sys.exit(1)
-
- log_dir = os.path.join(os.path.dirname(__file__), "temp")
- os.makedirs(log_dir, exist_ok=True)
- _logf = open(os.path.join(log_dir, "qqapp.log"), "a", encoding="utf-8", buffering=1)
- sys.stdout = sys.stderr = _logf
- print("[NEW] QQ process starting, the above are history infos ...")
- print(f"[QQ] allow list: {'public' if not ALLOWED or '*' in ALLOWED else sorted(ALLOWED)}")
+ _LOCK_SOCK = ensure_single_instance(19528, "QQ")
+ require_runtime(agent, "QQ", qq_app_id=APP_ID, qq_app_secret=APP_SECRET)
+ redirect_log(__file__, "qqapp.log", "QQ", ALLOWED)
threading.Thread(target=agent.run, daemon=True).start()
asyncio.run(QQApp().start())
diff --git a/simphtml.py b/simphtml.py
index c439dd4..b5294da 100644
--- a/simphtml.py
+++ b/simphtml.py
@@ -17,12 +17,14 @@ function createEnhancedDOMCopy() {
if (sourceNode.nodeType === 3) return sourceNode.cloneNode(false);
const clone = sourceNode.cloneNode(false);
if ((sourceNode.tagName === 'INPUT' || sourceNode.tagName === 'TEXTAREA') && sourceNode.value) clone.setAttribute('value', sourceNode.value);
+ if (sourceNode.tagName === 'INPUT' && (sourceNode.type === 'radio' || sourceNode.type === 'checkbox') && sourceNode.checked) clone.setAttribute('checked', '');
else if (sourceNode.tagName === 'SELECT' && sourceNode.value) clone.setAttribute('data-selected', sourceNode.value);
try { if (sourceNode.matches && sourceNode.matches(':-webkit-autofill')) { clone.setAttribute('data-autofilled', 'true'); if (!sourceNode.value) clone.setAttribute('value', '⚠️受保护-读tmwebdriver_sop的autofill章节提取'); } } catch(e) {}
const isDropdown = sourceNode.classList?.contains('dropdown-menu') ||
/dropdown|menu/i.test(sourceNode.className) || sourceNode.getAttribute('role') === 'menu';
- const isSmallDropdown = isDropdown && (sourceNode.querySelectorAll('a, button, [role="menuitem"], li').length <= 7 && sourceNode.textContent.length < 500);
+ const _ddItems = isDropdown ? sourceNode.querySelectorAll('a, button, [role="menuitem"], li').length : 0;
+ const isSmallDropdown = _ddItems > 0 && _ddItems <= 7 && sourceNode.textContent.length < 500;
const childNodes = [];
for (const child of sourceNode.childNodes) {
@@ -704,6 +706,7 @@ js_findMainContent = '''
def optimize_html_for_tokens(html):
if type(html) is str: soup = BeautifulSoup(html, 'html.parser')
else: soup = html
+ for svg in soup.find_all('svg'): svg.clear()
[tag.attrs.pop('style', None) for tag in soup.find_all(True)]
for tag in soup.find_all(True):
if tag.has_attr('src'):
@@ -788,6 +791,10 @@ def find_changed_elements(before_html, after_html):
for sig, els in after_sigs.items():
if sig not in before_sigs: changed.extend(els)
elif len(els) > len(before_sigs[sig]): changed.extend(els[:len(els) - len(before_sigs[sig])])
+ if len(changed) == 0 and str(before_soup) != str(after_soup):
+ before_els, after_els = before_soup.find_all(True), after_soup.find_all(True)
+ for i in range(min(len(before_els), len(after_els))):
+ if get_sig(before_els[i]) != get_sig(after_els[i]): changed.append(after_els[i])
# 变化边界: parent不在changed中的元素
cids = set(id(el) for el in changed)
boundaries = [el for el in changed if el.parent is None or id(el.parent) not in cids]
diff --git a/stapp.py b/stapp.py
index 99c79e0..a59a8ce 100644
--- a/stapp.py
+++ b/stapp.py
@@ -82,6 +82,11 @@ if "messages" not in st.session_state: st.session_state.messages = []
for msg in st.session_state.messages:
with st.chat_message(msg["role"]): st.markdown(msg["content"], unsafe_allow_html=True)
+# IME composition fix (macOS only) - prevents Enter from submitting during CJK input
+if os.name != 'nt':
+ import streamlit.components.v1 as components
+ components.html('', height=0)
+
if prompt := st.chat_input("请输入指令"):
st.session_state.messages.append({"role": "user", "content": prompt})
with st.chat_message("user"): st.markdown(prompt, unsafe_allow_html=False) # 小心 XSS
diff --git a/wecomapp.py b/wecomapp.py
index b5ef37b..bd37ddd 100644
--- a/wecomapp.py
+++ b/wecomapp.py
@@ -1,8 +1,9 @@
-import os, sys, re, threading, asyncio, queue as Q, socket, time, glob
+import asyncio, os, sys, threading
from collections import deque
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from agentmain import GeneraticAgent
+from chatapp_common import AgentChatMixin, ensure_single_instance, public_access, redirect_log, require_runtime, split_text
from llmcore import mykeys
try:
@@ -11,211 +12,64 @@ except Exception:
print("Please install wecom_aibot_sdk to use WeCom: pip install wecom_aibot_sdk")
sys.exit(1)
-agent = GeneraticAgent()
-agent.verbose = False
-
+agent = GeneraticAgent(); agent.verbose = False
BOT_ID = str(mykeys.get("wecom_bot_id", "") or "").strip()
SECRET = str(mykeys.get("wecom_secret", "") or "").strip()
WELCOME = str(mykeys.get("wecom_welcome_message", "") or "").strip()
ALLOWED = {str(x).strip() for x in mykeys.get("wecom_allowed_users", []) if str(x).strip()}
-
-_TAG_PATS = [r"<" + t + r">.*?" + t + r">" for t in ("thinking", "summary", "tool_use", "file_content")]
-_PROCESSED_IDS = deque(maxlen=1000)
-_USER_TASKS = {}
+PROCESSED_IDS, USER_TASKS = deque(maxlen=1000), {}
-def _clean(text):
- for pat in _TAG_PATS:
- text = re.sub(pat, "", text, flags=re.DOTALL)
- return re.sub(r"\n{3,}", "\n\n", text).strip() or "..."
+class WeComApp(AgentChatMixin):
+ label, source, split_limit = "WeCom", "wecom", 1200
-
-def _extract_files(text):
- return re.findall(r"\[FILE:([^\]]+)\]", text or "")
-
-
-def _strip_files(text):
- return re.sub(r"\[FILE:[^\]]+\]", "", text or "").strip()
-
-
-def _split_text(text, limit=1200):
- text = (text or "").strip() or "..."
- parts = []
- while len(text) > limit:
- cut = text.rfind("\n", 0, limit)
- if cut < limit * 0.6:
- cut = limit
- parts.append(text[:cut].rstrip())
- text = text[cut:].lstrip()
- if text:
- parts.append(text)
- return parts or ["..."]
-
-
-def _format_restore():
- files = glob.glob("./temp/model_responses_*.txt")
- if not files:
- return None, "❌ 没有找到历史记录"
- latest = max(files, key=os.path.getmtime)
- with open(latest, "r", encoding="utf-8") as f:
- content = f.read()
- users = re.findall(r"=== USER ===\n(.+?)(?==== |$)", content, re.DOTALL)
- resps = re.findall(r"=== Response ===.*?\n(.+?)(?==== Prompt|$)", content, re.DOTALL)
- count, restored = 0, []
- for u, r in zip(users, resps):
- u, r = u.strip(), r.strip()[:500]
- if u and r:
- restored.extend([f"[USER]: {u}", f"[Agent] {r}"])
- count += 1
- if not restored:
- return None, "❌ 历史记录里没有可恢复内容"
- return (restored, os.path.basename(latest), count), None
-
-
-class WeComApp:
def __init__(self):
- self.client = None
- self.chat_frames = {}
-
- def _body(self, frame):
- if hasattr(frame, "body"):
- return frame.body or {}
- if isinstance(frame, dict):
- return frame.get("body", frame)
- return {}
+ super().__init__(agent, USER_TASKS)
+ self.client, self.chat_frames = None, {}
async def send_text(self, chat_id, content):
- if not self.client:
+ if not self.client or chat_id not in self.chat_frames:
+ if chat_id not in self.chat_frames:
+ print(f"[WeCom] no frame found for chat: {chat_id}")
return
- frame = self.chat_frames.get(chat_id)
- if not frame:
- print(f"[WeCom] no frame found for chat: {chat_id}")
- return
- for part in _split_text(content):
- stream_id = generate_req_id("stream")
- await self.client.reply_stream(frame, stream_id, part, finish=True)
-
- async def send_done(self, chat_id, raw_text):
- files = [p for p in _extract_files(raw_text) if os.path.exists(p)]
- body = _strip_files(_clean(raw_text))
- if files:
- body = (body + "\n\n" if body else "") + "\n".join([f"生成文件: {p}" for p in files])
- await self.send_text(chat_id, body or "...")
-
- async def handle_command(self, chat_id, cmd):
- parts = (cmd or "").split()
- op = (parts[0] if parts else "").lower()
- if op == "/stop":
- state = _USER_TASKS.get(chat_id)
- if state:
- state["running"] = False
- agent.abort()
- await self.send_text(chat_id, "⏹️ 正在停止...")
- elif op == "/status":
- llm = agent.get_llm_name() if agent.llmclient else "未配置"
- await self.send_text(chat_id, f"状态: {'🔴 运行中' if agent.is_running else '🟢 空闲'}\nLLM: [{agent.llm_no}] {llm}")
- elif op == "/llm":
- if not agent.llmclient:
- return await self.send_text(chat_id, "❌ 当前没有可用的 LLM 配置")
- if len(parts) > 1:
- try:
- n = int(parts[1])
- agent.next_llm(n)
- await self.send_text(chat_id, f"✅ 已切换到 [{agent.llm_no}] {agent.get_llm_name()}")
- except Exception:
- await self.send_text(chat_id, f"用法: /llm <0-{len(agent.list_llms()) - 1}>")
- else:
- lines = [f"{'→' if cur else ' '} [{i}] {name}" for i, name, cur in agent.list_llms()]
- await self.send_text(chat_id, "LLMs:\n" + "\n".join(lines))
- elif op == "/restore":
- try:
- restored_info, err = _format_restore()
- if err:
- return await self.send_text(chat_id, err)
- restored, fname, count = restored_info
- agent.abort()
- agent.history.extend(restored)
- await self.send_text(chat_id, f"✅ 已恢复 {count} 轮对话\n来源: {fname}\n(仅恢复上下文,请输入新问题继续)")
- except Exception as e:
- await self.send_text(chat_id, f"❌ 恢复失败: {e}")
- elif op == "/new":
- agent.abort()
- agent.history = []
- await self.send_text(chat_id, "🆕 已清空当前共享上下文")
- else:
- await self.send_text(
- chat_id,
- "📖 命令列表:\n/help - 显示帮助\n/status - 查看状态\n/stop - 停止当前任务\n/new - 清空当前上下文\n/restore - 恢复上次对话历史\n/llm [n] - 查看或切换模型",
- )
-
- async def run_agent(self, chat_id, text):
- state = {"running": True}
- _USER_TASKS[chat_id] = state
- try:
- await self.send_text(chat_id, "思考中...")
- prompt = f"If you need to show files to user, use [FILE:filepath] in your response.\n\n{text}"
- dq = agent.put_task(prompt, source="wecom")
- last_ping = time.time()
- while state["running"]:
- try:
- item = await asyncio.to_thread(dq.get, True, 3)
- except Q.Empty:
- if agent.is_running and time.time() - last_ping > 20:
- await self.send_text(chat_id, "⏳ 还在处理中,请稍等...")
- last_ping = time.time()
- continue
- if "done" in item:
- await self.send_done(chat_id, item.get("done", ""))
- break
- if not state["running"]:
- await self.send_text(chat_id, "⏹️ 已停止")
- except Exception as e:
- import traceback
-
- print(f"[WeCom] run_agent error: {e}")
- traceback.print_exc()
- await self.send_text(chat_id, f"❌ 错误: {e}")
- finally:
- _USER_TASKS.pop(chat_id, None)
+ frame = self.chat_frames[chat_id]
+ for part in split_text(content, self.split_limit):
+ await self.client.reply_stream(frame, generate_req_id("stream"), part, finish=True)
async def on_text(self, frame):
try:
- body = self._body(frame)
+ body = frame.body if hasattr(frame, "body") else frame.get("body", frame) if isinstance(frame, dict) else {}
if not isinstance(body, dict):
return
msg_id = body.get("msgid") or f"{body.get('chatid', '')}_{body.get('sendertime', '')}"
- if msg_id in _PROCESSED_IDS:
+ if msg_id in PROCESSED_IDS:
return
- _PROCESSED_IDS.append(msg_id)
+ PROCESSED_IDS.append(msg_id)
from_info = body.get("from", {}) if isinstance(body.get("from", {}), dict) else {}
sender_id = str(from_info.get("userid", "") or "unknown")
chat_id = str(body.get("chatid", "") or sender_id)
content = str((body.get("text", {}) or {}).get("content", "") or "").strip()
if not content:
return
- public_access = not ALLOWED or "*" in ALLOWED
- if not public_access and sender_id not in ALLOWED:
+ if not public_access(ALLOWED) and sender_id not in ALLOWED:
print(f"[WeCom] unauthorized user: {sender_id}")
return
self.chat_frames[chat_id] = frame
print(f"[WeCom] message from {sender_id}: {content}")
if content.startswith("/"):
- await self.handle_command(chat_id, content)
- return
+ return await self.handle_command(chat_id, content)
asyncio.create_task(self.run_agent(chat_id, content))
except Exception:
import traceback
-
print("[WeCom] handle_message error")
traceback.print_exc()
async def on_enter_chat(self, frame):
- if not WELCOME or not self.client:
- return
- try:
- await self.client.reply_welcome(frame, {"msgtype": "text", "text": {"content": WELCOME}})
- except Exception as e:
- print(f"[WeCom] welcome error: {e}")
+ if WELCOME and self.client:
+ try:
+ await self.client.reply_welcome(frame, {"msgtype": "text", "text": {"content": WELCOME}})
+ except Exception as e:
+ print(f"[WeCom] welcome error: {e}")
async def on_connected(self, frame):
print("[WeCom] connected")
@@ -230,19 +84,16 @@ class WeComApp:
print(f"[WeCom] error: {frame}")
async def start(self):
- self.client = WSClient({
- "bot_id": BOT_ID,
- "secret": SECRET,
- "reconnect_interval": 1000,
- "max_reconnect_attempts": -1,
- "heartbeat_interval": 30000,
- })
- self.client.on("connected", self.on_connected)
- self.client.on("authenticated", self.on_authenticated)
- self.client.on("disconnected", self.on_disconnected)
- self.client.on("error", self.on_error)
- self.client.on("message.text", self.on_text)
- self.client.on("event.enter_chat", self.on_enter_chat)
+ self.client = WSClient({"bot_id": BOT_ID, "secret": SECRET, "reconnect_interval": 1000, "max_reconnect_attempts": -1, "heartbeat_interval": 30000})
+ for event, handler in {
+ "connected": self.on_connected,
+ "authenticated": self.on_authenticated,
+ "disconnected": self.on_disconnected,
+ "error": self.on_error,
+ "message.text": self.on_text,
+ "event.enter_chat": self.on_enter_chat,
+ }.items():
+ self.client.on(event, handler)
print("[WeCom] bot starting...")
await self.client.connect_async()
while True:
@@ -250,25 +101,8 @@ class WeComApp:
if __name__ == "__main__":
- try:
- _lock_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
- _lock_sock.bind(("127.0.0.1", 19529))
- except OSError:
- print("[WeCom] Another instance is already running, skipping...")
- sys.exit(1)
-
- if not BOT_ID or not SECRET:
- print("[WeCom] ERROR: please set wecom_bot_id and wecom_secret in mykey.py or mykey.json")
- sys.exit(1)
- if agent.llmclient is None:
- print("[WeCom] ERROR: no usable LLM backend found in mykey.py or mykey.json")
- sys.exit(1)
-
- log_dir = os.path.join(os.path.dirname(__file__), "temp")
- os.makedirs(log_dir, exist_ok=True)
- _logf = open(os.path.join(log_dir, "wecomapp.log"), "a", encoding="utf-8", buffering=1)
- sys.stdout = sys.stderr = _logf
- print("[NEW] WeCom process starting, the above are history infos ...")
- print(f"[WeCom] allow list: {'public' if not ALLOWED or '*' in ALLOWED else sorted(ALLOWED)}")
+ _LOCK_SOCK = ensure_single_instance(19529, "WeCom")
+ require_runtime(agent, "WeCom", wecom_bot_id=BOT_ID, wecom_secret=SECRET)
+ redirect_log(__file__, "wecomapp.log", "WeCom", ALLOWED)
threading.Thread(target=agent.run, daemon=True).start()
asyncio.run(WeComApp().start())