diff --git a/agent_loop.py b/agent_loop.py index 6a77a0f..4f1a394 100644 --- a/agent_loop.py +++ b/agent_loop.py @@ -28,10 +28,7 @@ class BaseHandler: yield f"未知工具: {tool_name}\n" return StepOutcome(None, next_prompt=f"未知工具 {tool_name}", should_exit=False) -def json_default(o): - if isinstance(o, set): return list(o) - return str(o) - +def json_default(o): return list(o) if isinstance(o, set) else str(o) def exhaust(g): try: while True: next(g) @@ -47,7 +44,7 @@ def agent_runner_loop(client, system_prompt, user_input, handler, tools_schema, {"role": "system", "content": system_prompt}, {"role": "user", "content": initial_user_content if initial_user_content is not None else user_input} ] - turn = 0; handler._done_hooks = []; handler.max_turns = max_turns + turn = 0; handler.max_turns = max_turns while turn < handler.max_turns: turn += 1; md = '**' if verbose else '' yield f"{md}LLM Running (Turn {turn}) ...{md}\n\n" @@ -65,7 +62,7 @@ def agent_runner_loop(client, system_prompt, user_input, handler, tools_schema, else: tool_calls = [{'tool_name': tc.function.name, 'args': json.loads(tc.function.arguments), 'id': tc.id} for tc in response.tool_calls] - tool_results = []; next_prompts = set(); exit_reason = None + tool_results = []; next_prompts = set(); exit_reason = {} for ii, tc in enumerate(tool_calls): tool_name, args, tid = tc['tool_name'], tc['args'], tc.get('id', '') if tool_name == 'no_tool': pass @@ -92,7 +89,7 @@ def agent_runner_loop(client, system_prompt, user_input, handler, tools_schema, tool_results.append({'tool_use_id': tid, 'content': datastr}) next_prompts.add(outcome.next_prompt) if len(next_prompts) == 0 or exit_reason: - if len(handler._done_hooks) == 0: break + if len(handler._done_hooks) == 0 or exit_reason.get('result', '') == 'EXITED': break next_prompts.add(handler._done_hooks.pop(0)) next_prompt = handler.turn_end_callback(response, tool_calls, tool_results, turn, '\n'.join(next_prompts), exit_reason) messages = [{"role": "user", "content": next_prompt, "tool_results": tool_results}] # just new message, history is kept in *Session diff --git a/agentmain.py b/agentmain.py index 2faf70d..a5a7e83 100644 --- a/agentmain.py +++ b/agentmain.py @@ -77,13 +77,15 @@ class GeneraticAgent: self.llmclient = self.llmclients[self.llm_no] self.llmclient.backend.history = lastc.backend.history self.llmclient.last_tools = '' - name = self.get_llm_name().lower() + name = self.get_llm_name(model=True) if 'glm' in name or 'minimax' in name or 'kimi' in name: load_tool_schema('_cn') else: load_tool_schema() def list_llms(self): return [(i, self.get_llm_name(b), i == self.llm_no) for i, b in enumerate(self.llmclients)] - def get_llm_name(self, b=None): + def get_llm_name(self, b=None, model=False): b = self.llmclient if b is None else b - return f"{type(b.backend).__name__}/{b.backend.name}" if not isinstance(b, dict) else "BADCONFIG_MIXIN" + if isinstance(b, dict): return 'BADCONFIG_MIXIN' + if model: return b.backend.model.lower() + return f"{type(b.backend).__name__}/{b.backend.name}" def abort(self): if not self.is_running: return @@ -135,11 +137,10 @@ class GeneraticAgent: user_input = raw_query if source == 'feishu' and len(self.history) > 1: # 如果有历史记录且来自飞书,注入到首轮 user_input 中(支持/restore恢复上下文) user_input = handler._get_anchor_prompt() + f"\n\n### 用户当前消息\n{raw_query}" - initial_user_content = None + if 'gpt' in self.get_llm_name(model=True): handler._done_hooks.append('请确定用户任务是否完成,如未完成需要继续工具调用直到完成任务,确实需要问用户应使用ask_user工具') # although new handler, the **full** history is in llmclient, so it is full history! gen = agent_runner_loop(self.llmclient, sys_prompt, user_input, - handler, TOOLS_SCHEMA, max_turns=40, verbose=self.verbose, - initial_user_content=initial_user_content) + handler, TOOLS_SCHEMA, max_turns=40, verbose=self.verbose) try: full_resp = ""; last_pos = 0 for chunk in gen: diff --git a/frontends/wechatapp.py b/frontends/wechatapp.py index 52aee39..701e114 100644 --- a/frontends/wechatapp.py +++ b/frontends/wechatapp.py @@ -11,8 +11,8 @@ from agentmain import GeneraticAgent API = 'https://ilinkai.weixin.qq.com' TOKEN_FILE = Path.home() / '.wxbot' / 'token.json' TOKEN_FILE.parent.mkdir(exist_ok=True) -VER, MSG_USER, MSG_BOT, ITEM_TEXT, STATE_FINISH = '0.2.5', 1, 2, 1, 2 -ITEM_FILE = 4 +VER, MSG_USER, MSG_BOT, ITEM_TEXT, STATE_FINISH = '2.1.8', 1, 2, 1, 2 +ITEM_IMAGE, ITEM_FILE, ITEM_VIDEO = 2, 4, 5 CDN_BASE = 'https://novac2c.cdn.weixin.qq.com/c2c' def _uin(): @@ -94,48 +94,72 @@ class WxBotClient: 'to_user_id': to_user_id, 'typing_ticket': typing_ticket, 'typing_status': 2 if cancel else 1, 'base_info': {'channel_version': VER}}) - def send_file(self, to_user_id, file_path, context_token=''): - """Send a file to user via CDN upload.""" + def _enc(self, raw, aes_key): + pad = 16 - (len(raw) % 16) + return AES.new(aes_key, AES.MODE_ECB).encrypt(raw + bytes([pad] * pad)) + + def _upload(self, filekey, upload_param, raw, aes_key, timeout=120, upload_url=''): + url = upload_url.strip() if upload_url else f'{CDN_BASE}/upload?encrypted_query_param={quote(upload_param)}&filekey={filekey}' + data = self._enc(raw, aes_key) + last_err = None + for attempt in range(1, 4): + try: + r = requests.post(url, data=data, headers={'Content-Type': 'application/octet-stream'}, timeout=timeout) + if 400 <= r.status_code < 500: + msg = r.headers.get('x-error-message') or r.text[:300] + raise RuntimeError(f'CDN upload client error {r.status_code}: {msg}') + if r.status_code != 200: + msg = r.headers.get('x-error-message') or f'status {r.status_code}' + raise RuntimeError(f'CDN upload server error: {msg}') + eq = r.headers.get('x-encrypted-param', '') + if not eq: raise RuntimeError('CDN upload response missing x-encrypted-param header') + return {'encrypt_query_param': eq, + 'aes_key': base64.b64encode(aes_key.hex().encode()).decode(), 'encrypt_type': 1} + except Exception as e: + last_err = e + if 'client error' in str(e) or attempt >= 3: break + print(f'[WX] CDN upload retry {attempt}: {e}', file=sys.__stdout__) + raise last_err + + def _send_media(self, to_user_id, file_path, media_type, item_type, item_key, context_token=''): fp = Path(file_path) raw = fp.read_bytes() - rawsize = len(raw) - rawfilemd5 = hashlib.md5(raw).hexdigest() - aes_key = os.urandom(16) filekey = uuid.uuid4().hex - ciphertext_size = ((rawsize // 16) + 1) * 16 - # 1. get upload url - resp = self._post('ilink/bot/getuploadurl', { - 'filekey': filekey, 'media_type': 3, 'to_user_id': to_user_id, - 'rawsize': rawsize, 'rawfilemd5': rawfilemd5, + aes_key = os.urandom(16) + ciphertext_size = ((len(raw) // 16) + 1) * 16 + body = { + 'filekey': filekey, 'media_type': media_type, 'to_user_id': to_user_id, + 'rawsize': len(raw), 'rawfilemd5': hashlib.md5(raw).hexdigest(), 'filesize': ciphertext_size, 'no_need_thumb': True, - 'aeskey': aes_key.hex(), - 'base_info': {'channel_version': VER}}) + 'aeskey': aes_key.hex(), 'base_info': {'channel_version': VER}} + resp = self._post('ilink/bot/getuploadurl', body) upload_param = resp.get('upload_param', '') - if not upload_param: - raise RuntimeError(f'getuploadurl failed: {resp}') - # 2. AES-128-ECB encrypt (PKCS7) - cipher = AES.new(aes_key, AES.MODE_ECB) - pad_len = 16 - (rawsize % 16) - ciphertext = cipher.encrypt(raw + bytes([pad_len] * pad_len)) - # 3. upload to CDN - upload_url = (f'{CDN_BASE}/upload?encrypted_query_param=' - f'{quote(upload_param)}&filekey={filekey}') - r = requests.post(upload_url, data=ciphertext, headers={'Content-Type': 'application/octet-stream'}, timeout=120) - r.raise_for_status() - download_param = r.headers.get('x-encrypted-param', '') - if not download_param: - raise RuntimeError(f'CDN upload: no x-encrypted-param. status={r.status_code}') - # 4. send message with file attachment + upload_url = resp.get('upload_full_url', '') + if not (upload_param or upload_url): raise RuntimeError(f'getuploadurl failed: {resp}') + media = self._upload(filekey, upload_param, raw, aes_key=aes_key, upload_url=upload_url) + item = {'media': media} + if item_key == 'file_item': + item.update({'file_name': fp.name, 'len': str(len(raw))}) + elif item_key == 'image_item': + item.update({'mid_size': ciphertext_size}) + elif item_key == 'video_item': + item.update({'video_size': ciphertext_size}) msg = {'from_user_id': '', 'to_user_id': to_user_id, 'client_id': f'pyclient-{uuid.uuid4().hex[:16]}', 'message_type': MSG_BOT, 'message_state': STATE_FINISH, - 'item_list': [{'type': ITEM_FILE, 'file_item': { - 'media': {'encrypt_query_param': download_param, - 'aes_key': base64.b64encode(aes_key.hex().encode()).decode(), 'encrypt_type': 1}, - 'file_name': fp.name, 'len': str(rawsize)}}]} + 'item_list': [{'type': item_type, item_key: item}]} if context_token: msg['context_token'] = context_token return self._post('ilink/bot/sendmessage', {'msg': msg, 'base_info': {'channel_version': VER}}) + def send_file(self, to_user_id, file_path, context_token=''): + return self._send_media(to_user_id, file_path, 3, ITEM_FILE, 'file_item', context_token) + + def send_image(self, to_user_id, file_path, context_token=''): + return self._send_media(to_user_id, file_path, 1, ITEM_IMAGE, 'image_item', context_token) + + def send_video(self, to_user_id, file_path, context_token=''): + return self._send_media(to_user_id, file_path, 2, ITEM_VIDEO, 'video_item', context_token) + @staticmethod def extract_text(msg): return '\n'.join(it['text_item'].get('text', '') @@ -190,7 +214,7 @@ def _dl_media(items): agent = GeneraticAgent() agent.verbose = False -_TAG_PATS = [r'<' + t + r'>.*?' for t in ('thinking', 'summary', 'tool_use')] +_TAG_PATS = [r'<' + t + r'>.*?' for t in ('thinking', 'tool_use')] _TAG_PATS.append(r'.*?') def _strip_md(t): @@ -213,8 +237,11 @@ def _strip_md(t): return re.sub(r'\n{3,}', '\n\n', t).strip() def _clean(t): + t = re.sub(r'^\s*LLM Running \(Turn \d+\) \.{3}\s*$', '', t, flags=re.M) + t = re.sub(r'^\s*🛠️\s*[A-Za-z_][A-Za-z0-9_]*\(.*$', '', t, flags=re.M) for p in _TAG_PATS: t = re.sub(p, '', t, flags=re.DOTALL) + t = re.sub(r'', '', t) return re.sub(r'\n{3,}', '\n\n', _strip_md(t)).strip() or '...' def _split(text, limit=1800): @@ -270,7 +297,8 @@ def on_message(bot, msg): if 'done' in item: result = item['done']; break except queue.Empty: result = '[超时]' files = re.findall(r'\[FILE:([^\]]+)\]', result) - files = [f for f in files if (f if os.path.isabs(f) else os.path.join(_TEMP_DIR, f)) not in media_paths] + bad = {'filepath', '', 'path', '', 'file_path', '', '...'} + files = [f for f in files if f.strip().lower() not in bad and (f if os.path.isabs(f) else os.path.join(_TEMP_DIR, f)) not in media_paths] show = _clean(result) chunks = _split(show) _MAX_MSGS = 6 @@ -285,9 +313,12 @@ def on_message(bot, msg): if not os.path.isabs(fpath): fpath = os.path.join(_TEMP_DIR, fpath) try: if not os.path.exists(fpath): raise FileNotFoundError(f"文件不存在: {fpath}") - bot.send_file(uid, fpath, context_token=ctx) - print(f'[WX] sent file: {fpath}', file=sys.__stdout__) - except Exception as e: print(f'[WX] send_file err: {e}', file=sys.__stdout__) + ext = os.path.splitext(fpath)[1].lower() + sender = bot.send_video if ext in {'.mp4', '.mov', '.m4v', '.webm'} else \ + bot.send_image if ext in {'.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp'} else bot.send_file + sender(uid, fpath, context_token=ctx) + print(f'[WX] sent media: {fpath}', file=sys.__stdout__) + except Exception as e: print(f'[WX] send media err: {e}', file=sys.__stdout__) threading.Thread(target=_handle, daemon=True).start() diff --git a/ga.py b/ga.py index 1ad9836..1325bc4 100644 --- a/ga.py +++ b/ga.py @@ -17,7 +17,7 @@ def code_run(code, code_type="python", timeout=60, cwd=None, code_cwd=None, stop yield f"[Action] Running {code_type} in {os.path.basename(cwd)}: {preview}\n" script_dir = os.path.dirname(os.path.abspath(__file__)) cwd = cwd or os.path.join(script_dir, 'temp'); tmp_path = None - if code_type == "python": + if code_type in ["python", "py"]: tmp_file = tempfile.NamedTemporaryFile(suffix=".ai.py", delete=False, mode='w', encoding='utf-8', dir=code_cwd) cr_header = os.path.join(script_dir, 'assets', 'code_run_header.py') if os.path.exists(cr_header): tmp_file.write(open(cr_header, encoding='utf-8').read()) @@ -25,7 +25,7 @@ def code_run(code, code_type="python", timeout=60, cwd=None, code_cwd=None, stop tmp_path = tmp_file.name tmp_file.close() cmd = [sys.executable, "-X", "utf8", "-u", tmp_path] - elif code_type in ["powershell", "bash"]: + elif code_type in ["powershell", "bash", "sh", "shell", "ps1", "pwsh"]: if os.name == 'nt': cmd = ["powershell", "-NoProfile", "-NonInteractive", "-Command", code] else: cmd = ["bash", "-c", code] else: @@ -110,12 +110,10 @@ def first_init_driver(): time.sleep(3) def web_scan(tabs_only=False, switch_tab_id=None, text_only=False): - """ - 获取当前页面的简化HTML内容和标签页列表。注意:简化过程会过滤边栏、浮动元素等非主体内容。 + """获取当前页面的简化HTML内容和标签页列表。注意:简化过程会过滤边栏、浮动元素等非主体内容。 tabs_only: 仅返回标签页列表,不获取HTML内容(节省token)。 switch_tab_id: 可选参数,如果提供,则在扫描前切换到该标签页。 - 应当多用execute_js,少全量观察html。 - """ + 应当多用execute_js,少全量观察html""" global driver try: if driver is None: first_init_driver() @@ -265,13 +263,15 @@ class GenericAgentHandler(BaseHandler): self.cwd = cwd; self.current_turn = 0 self.history_info = last_history if last_history else [] self.code_stop_signal = [] + self._done_hooks = [] def _get_abs_path(self, path): if not path: return "" return os.path.abspath(os.path.join(self.cwd, path)) def _extract_code_block(self, response, code_type): - matches = re.findall(rf"```{code_type}\n(.*?)\n```", response.content, re.DOTALL) + code_type = {'python':'python|py', 'powershell':'powershell|ps1|pwsh', 'bash':'bash|sh|shell'}.get(code_type, re.escape(code_type)) + matches = re.findall(rf"```(?:{code_type})\n(.*?)\n```", response.content, re.DOTALL) return matches[-1].strip() if matches else None def do_code_run(self, args, response): @@ -280,7 +280,7 @@ class GenericAgentHandler(BaseHandler): code = args.get("code") or args.get("script") if not code: code = self._extract_code_block(response, code_type) - if not code: return StepOutcome("[Error] Code missing. Use ```{code_type} block or 'script' arg.", next_prompt="\n") + if not code: return StepOutcome("[Error] Code missing. Must use reply code block or 'script' arg.", next_prompt="\n") timeout = args.get("timeout", 60) raw_path = os.path.join(self.cwd, args.get("cwd", './')) cwd = os.path.normpath(os.path.abspath(raw_path)) @@ -309,8 +309,7 @@ class GenericAgentHandler(BaseHandler): def do_web_scan(self, args, response): '''获取当前页面内容和标签页列表。也可用于切换标签页。 注意:HTML经过简化,边栏/浮动元素等可能被过滤。如需查看被过滤的内容请用execute_js。 - tabs_only=true时仅返回标签页列表,不获取HTML(省token)。 - ''' + tabs_only=true时仅返回标签页列表,不获取HTML(省token)''' tabs_only = args.get("tabs_only", False) switch_tab_id = args.get("switch_tab_id", None) text_only = args.get("text_only", False) @@ -523,7 +522,7 @@ class GenericAgentHandler(BaseHandler): clean_args = {k: v for k, v in args.items() if not k.startswith('_')} summary = f"调用工具{tool_name}, args: {clean_args}" if tool_name == 'no_tool': summary = "直接回答了用户问题" - next_prompt += "\n[DANGER] 上一轮遗漏了,已根据物理动作自动补全。在下次回复中记得协议。" + next_prompt += "\n[DANGER] 上一轮遗漏了,需要按协议在中输出极简单行摘要!" summary = smart_format(summary, max_str_len=100) self.history_info.append(f'[Agent] {summary}') if turn % 35 == 0 and 'plan' not in str(self.working.get('related_sop')):