diff --git a/agent_loop.py b/agent_loop.py
index 6a77a0f..4f1a394 100644
--- a/agent_loop.py
+++ b/agent_loop.py
@@ -28,10 +28,7 @@ class BaseHandler:
yield f"未知工具: {tool_name}\n"
return StepOutcome(None, next_prompt=f"未知工具 {tool_name}", should_exit=False)
-def json_default(o):
- if isinstance(o, set): return list(o)
- return str(o)
-
+def json_default(o): return list(o) if isinstance(o, set) else str(o)
def exhaust(g):
try:
while True: next(g)
@@ -47,7 +44,7 @@ def agent_runner_loop(client, system_prompt, user_input, handler, tools_schema,
{"role": "system", "content": system_prompt},
{"role": "user", "content": initial_user_content if initial_user_content is not None else user_input}
]
- turn = 0; handler._done_hooks = []; handler.max_turns = max_turns
+ turn = 0; handler.max_turns = max_turns
while turn < handler.max_turns:
turn += 1; md = '**' if verbose else ''
yield f"{md}LLM Running (Turn {turn}) ...{md}\n\n"
@@ -65,7 +62,7 @@ def agent_runner_loop(client, system_prompt, user_input, handler, tools_schema,
else: tool_calls = [{'tool_name': tc.function.name, 'args': json.loads(tc.function.arguments), 'id': tc.id}
for tc in response.tool_calls]
- tool_results = []; next_prompts = set(); exit_reason = None
+ tool_results = []; next_prompts = set(); exit_reason = {}
for ii, tc in enumerate(tool_calls):
tool_name, args, tid = tc['tool_name'], tc['args'], tc.get('id', '')
if tool_name == 'no_tool': pass
@@ -92,7 +89,7 @@ def agent_runner_loop(client, system_prompt, user_input, handler, tools_schema,
tool_results.append({'tool_use_id': tid, 'content': datastr})
next_prompts.add(outcome.next_prompt)
if len(next_prompts) == 0 or exit_reason:
- if len(handler._done_hooks) == 0: break
+ if len(handler._done_hooks) == 0 or exit_reason.get('result', '') == 'EXITED': break
next_prompts.add(handler._done_hooks.pop(0))
next_prompt = handler.turn_end_callback(response, tool_calls, tool_results, turn, '\n'.join(next_prompts), exit_reason)
messages = [{"role": "user", "content": next_prompt, "tool_results": tool_results}] # just new message, history is kept in *Session
diff --git a/agentmain.py b/agentmain.py
index 2faf70d..a5a7e83 100644
--- a/agentmain.py
+++ b/agentmain.py
@@ -77,13 +77,15 @@ class GeneraticAgent:
self.llmclient = self.llmclients[self.llm_no]
self.llmclient.backend.history = lastc.backend.history
self.llmclient.last_tools = ''
- name = self.get_llm_name().lower()
+ name = self.get_llm_name(model=True)
if 'glm' in name or 'minimax' in name or 'kimi' in name: load_tool_schema('_cn')
else: load_tool_schema()
def list_llms(self): return [(i, self.get_llm_name(b), i == self.llm_no) for i, b in enumerate(self.llmclients)]
- def get_llm_name(self, b=None):
+ def get_llm_name(self, b=None, model=False):
b = self.llmclient if b is None else b
- return f"{type(b.backend).__name__}/{b.backend.name}" if not isinstance(b, dict) else "BADCONFIG_MIXIN"
+ if isinstance(b, dict): return 'BADCONFIG_MIXIN'
+ if model: return b.backend.model.lower()
+ return f"{type(b.backend).__name__}/{b.backend.name}"
def abort(self):
if not self.is_running: return
@@ -135,11 +137,10 @@ class GeneraticAgent:
user_input = raw_query
if source == 'feishu' and len(self.history) > 1: # 如果有历史记录且来自飞书,注入到首轮 user_input 中(支持/restore恢复上下文)
user_input = handler._get_anchor_prompt() + f"\n\n### 用户当前消息\n{raw_query}"
- initial_user_content = None
+ if 'gpt' in self.get_llm_name(model=True): handler._done_hooks.append('请确定用户任务是否完成,如未完成需要继续工具调用直到完成任务,确实需要问用户应使用ask_user工具')
# although new handler, the **full** history is in llmclient, so it is full history!
gen = agent_runner_loop(self.llmclient, sys_prompt, user_input,
- handler, TOOLS_SCHEMA, max_turns=40, verbose=self.verbose,
- initial_user_content=initial_user_content)
+ handler, TOOLS_SCHEMA, max_turns=40, verbose=self.verbose)
try:
full_resp = ""; last_pos = 0
for chunk in gen:
diff --git a/frontends/wechatapp.py b/frontends/wechatapp.py
index 52aee39..701e114 100644
--- a/frontends/wechatapp.py
+++ b/frontends/wechatapp.py
@@ -11,8 +11,8 @@ from agentmain import GeneraticAgent
API = 'https://ilinkai.weixin.qq.com'
TOKEN_FILE = Path.home() / '.wxbot' / 'token.json'
TOKEN_FILE.parent.mkdir(exist_ok=True)
-VER, MSG_USER, MSG_BOT, ITEM_TEXT, STATE_FINISH = '0.2.5', 1, 2, 1, 2
-ITEM_FILE = 4
+VER, MSG_USER, MSG_BOT, ITEM_TEXT, STATE_FINISH = '2.1.8', 1, 2, 1, 2
+ITEM_IMAGE, ITEM_FILE, ITEM_VIDEO = 2, 4, 5
CDN_BASE = 'https://novac2c.cdn.weixin.qq.com/c2c'
def _uin():
@@ -94,48 +94,72 @@ class WxBotClient:
'to_user_id': to_user_id, 'typing_ticket': typing_ticket,
'typing_status': 2 if cancel else 1, 'base_info': {'channel_version': VER}})
- def send_file(self, to_user_id, file_path, context_token=''):
- """Send a file to user via CDN upload."""
+ def _enc(self, raw, aes_key):
+ pad = 16 - (len(raw) % 16)
+ return AES.new(aes_key, AES.MODE_ECB).encrypt(raw + bytes([pad] * pad))
+
+ def _upload(self, filekey, upload_param, raw, aes_key, timeout=120, upload_url=''):
+ url = upload_url.strip() if upload_url else f'{CDN_BASE}/upload?encrypted_query_param={quote(upload_param)}&filekey={filekey}'
+ data = self._enc(raw, aes_key)
+ last_err = None
+ for attempt in range(1, 4):
+ try:
+ r = requests.post(url, data=data, headers={'Content-Type': 'application/octet-stream'}, timeout=timeout)
+ if 400 <= r.status_code < 500:
+ msg = r.headers.get('x-error-message') or r.text[:300]
+ raise RuntimeError(f'CDN upload client error {r.status_code}: {msg}')
+ if r.status_code != 200:
+ msg = r.headers.get('x-error-message') or f'status {r.status_code}'
+ raise RuntimeError(f'CDN upload server error: {msg}')
+ eq = r.headers.get('x-encrypted-param', '')
+ if not eq: raise RuntimeError('CDN upload response missing x-encrypted-param header')
+ return {'encrypt_query_param': eq,
+ 'aes_key': base64.b64encode(aes_key.hex().encode()).decode(), 'encrypt_type': 1}
+ except Exception as e:
+ last_err = e
+ if 'client error' in str(e) or attempt >= 3: break
+ print(f'[WX] CDN upload retry {attempt}: {e}', file=sys.__stdout__)
+ raise last_err
+
+ def _send_media(self, to_user_id, file_path, media_type, item_type, item_key, context_token=''):
fp = Path(file_path)
raw = fp.read_bytes()
- rawsize = len(raw)
- rawfilemd5 = hashlib.md5(raw).hexdigest()
- aes_key = os.urandom(16)
filekey = uuid.uuid4().hex
- ciphertext_size = ((rawsize // 16) + 1) * 16
- # 1. get upload url
- resp = self._post('ilink/bot/getuploadurl', {
- 'filekey': filekey, 'media_type': 3, 'to_user_id': to_user_id,
- 'rawsize': rawsize, 'rawfilemd5': rawfilemd5,
+ aes_key = os.urandom(16)
+ ciphertext_size = ((len(raw) // 16) + 1) * 16
+ body = {
+ 'filekey': filekey, 'media_type': media_type, 'to_user_id': to_user_id,
+ 'rawsize': len(raw), 'rawfilemd5': hashlib.md5(raw).hexdigest(),
'filesize': ciphertext_size, 'no_need_thumb': True,
- 'aeskey': aes_key.hex(),
- 'base_info': {'channel_version': VER}})
+ 'aeskey': aes_key.hex(), 'base_info': {'channel_version': VER}}
+ resp = self._post('ilink/bot/getuploadurl', body)
upload_param = resp.get('upload_param', '')
- if not upload_param:
- raise RuntimeError(f'getuploadurl failed: {resp}')
- # 2. AES-128-ECB encrypt (PKCS7)
- cipher = AES.new(aes_key, AES.MODE_ECB)
- pad_len = 16 - (rawsize % 16)
- ciphertext = cipher.encrypt(raw + bytes([pad_len] * pad_len))
- # 3. upload to CDN
- upload_url = (f'{CDN_BASE}/upload?encrypted_query_param='
- f'{quote(upload_param)}&filekey={filekey}')
- r = requests.post(upload_url, data=ciphertext, headers={'Content-Type': 'application/octet-stream'}, timeout=120)
- r.raise_for_status()
- download_param = r.headers.get('x-encrypted-param', '')
- if not download_param:
- raise RuntimeError(f'CDN upload: no x-encrypted-param. status={r.status_code}')
- # 4. send message with file attachment
+ upload_url = resp.get('upload_full_url', '')
+ if not (upload_param or upload_url): raise RuntimeError(f'getuploadurl failed: {resp}')
+ media = self._upload(filekey, upload_param, raw, aes_key=aes_key, upload_url=upload_url)
+ item = {'media': media}
+ if item_key == 'file_item':
+ item.update({'file_name': fp.name, 'len': str(len(raw))})
+ elif item_key == 'image_item':
+ item.update({'mid_size': ciphertext_size})
+ elif item_key == 'video_item':
+ item.update({'video_size': ciphertext_size})
msg = {'from_user_id': '', 'to_user_id': to_user_id,
'client_id': f'pyclient-{uuid.uuid4().hex[:16]}',
'message_type': MSG_BOT, 'message_state': STATE_FINISH,
- 'item_list': [{'type': ITEM_FILE, 'file_item': {
- 'media': {'encrypt_query_param': download_param,
- 'aes_key': base64.b64encode(aes_key.hex().encode()).decode(), 'encrypt_type': 1},
- 'file_name': fp.name, 'len': str(rawsize)}}]}
+ 'item_list': [{'type': item_type, item_key: item}]}
if context_token: msg['context_token'] = context_token
return self._post('ilink/bot/sendmessage', {'msg': msg, 'base_info': {'channel_version': VER}})
+ def send_file(self, to_user_id, file_path, context_token=''):
+ return self._send_media(to_user_id, file_path, 3, ITEM_FILE, 'file_item', context_token)
+
+ def send_image(self, to_user_id, file_path, context_token=''):
+ return self._send_media(to_user_id, file_path, 1, ITEM_IMAGE, 'image_item', context_token)
+
+ def send_video(self, to_user_id, file_path, context_token=''):
+ return self._send_media(to_user_id, file_path, 2, ITEM_VIDEO, 'video_item', context_token)
+
@staticmethod
def extract_text(msg):
return '\n'.join(it['text_item'].get('text', '')
@@ -190,7 +214,7 @@ def _dl_media(items):
agent = GeneraticAgent()
agent.verbose = False
-_TAG_PATS = [r'<' + t + r'>.*?' + t + r'>' for t in ('thinking', 'summary', 'tool_use')]
+_TAG_PATS = [r'<' + t + r'>.*?' + t + r'>' for t in ('thinking', 'tool_use')]
_TAG_PATS.append(r'.*?')
def _strip_md(t):
@@ -213,8 +237,11 @@ def _strip_md(t):
return re.sub(r'\n{3,}', '\n\n', t).strip()
def _clean(t):
+ t = re.sub(r'^\s*LLM Running \(Turn \d+\) \.{3}\s*$', '', t, flags=re.M)
+ t = re.sub(r'^\s*🛠️\s*[A-Za-z_][A-Za-z0-9_]*\(.*$', '', t, flags=re.M)
for p in _TAG_PATS:
t = re.sub(p, '', t, flags=re.DOTALL)
+ t = re.sub(r'?summary>', '', t)
return re.sub(r'\n{3,}', '\n\n', _strip_md(t)).strip() or '...'
def _split(text, limit=1800):
@@ -270,7 +297,8 @@ def on_message(bot, msg):
if 'done' in item: result = item['done']; break
except queue.Empty: result = '[超时]'
files = re.findall(r'\[FILE:([^\]]+)\]', result)
- files = [f for f in files if (f if os.path.isabs(f) else os.path.join(_TEMP_DIR, f)) not in media_paths]
+ bad = {'filepath', '', 'path', '', 'file_path', '', '...'}
+ files = [f for f in files if f.strip().lower() not in bad and (f if os.path.isabs(f) else os.path.join(_TEMP_DIR, f)) not in media_paths]
show = _clean(result)
chunks = _split(show)
_MAX_MSGS = 6
@@ -285,9 +313,12 @@ def on_message(bot, msg):
if not os.path.isabs(fpath): fpath = os.path.join(_TEMP_DIR, fpath)
try:
if not os.path.exists(fpath): raise FileNotFoundError(f"文件不存在: {fpath}")
- bot.send_file(uid, fpath, context_token=ctx)
- print(f'[WX] sent file: {fpath}', file=sys.__stdout__)
- except Exception as e: print(f'[WX] send_file err: {e}', file=sys.__stdout__)
+ ext = os.path.splitext(fpath)[1].lower()
+ sender = bot.send_video if ext in {'.mp4', '.mov', '.m4v', '.webm'} else \
+ bot.send_image if ext in {'.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp'} else bot.send_file
+ sender(uid, fpath, context_token=ctx)
+ print(f'[WX] sent media: {fpath}', file=sys.__stdout__)
+ except Exception as e: print(f'[WX] send media err: {e}', file=sys.__stdout__)
threading.Thread(target=_handle, daemon=True).start()
diff --git a/ga.py b/ga.py
index 1ad9836..1325bc4 100644
--- a/ga.py
+++ b/ga.py
@@ -17,7 +17,7 @@ def code_run(code, code_type="python", timeout=60, cwd=None, code_cwd=None, stop
yield f"[Action] Running {code_type} in {os.path.basename(cwd)}: {preview}\n"
script_dir = os.path.dirname(os.path.abspath(__file__))
cwd = cwd or os.path.join(script_dir, 'temp'); tmp_path = None
- if code_type == "python":
+ if code_type in ["python", "py"]:
tmp_file = tempfile.NamedTemporaryFile(suffix=".ai.py", delete=False, mode='w', encoding='utf-8', dir=code_cwd)
cr_header = os.path.join(script_dir, 'assets', 'code_run_header.py')
if os.path.exists(cr_header): tmp_file.write(open(cr_header, encoding='utf-8').read())
@@ -25,7 +25,7 @@ def code_run(code, code_type="python", timeout=60, cwd=None, code_cwd=None, stop
tmp_path = tmp_file.name
tmp_file.close()
cmd = [sys.executable, "-X", "utf8", "-u", tmp_path]
- elif code_type in ["powershell", "bash"]:
+ elif code_type in ["powershell", "bash", "sh", "shell", "ps1", "pwsh"]:
if os.name == 'nt': cmd = ["powershell", "-NoProfile", "-NonInteractive", "-Command", code]
else: cmd = ["bash", "-c", code]
else:
@@ -110,12 +110,10 @@ def first_init_driver():
time.sleep(3)
def web_scan(tabs_only=False, switch_tab_id=None, text_only=False):
- """
- 获取当前页面的简化HTML内容和标签页列表。注意:简化过程会过滤边栏、浮动元素等非主体内容。
+ """获取当前页面的简化HTML内容和标签页列表。注意:简化过程会过滤边栏、浮动元素等非主体内容。
tabs_only: 仅返回标签页列表,不获取HTML内容(节省token)。
switch_tab_id: 可选参数,如果提供,则在扫描前切换到该标签页。
- 应当多用execute_js,少全量观察html。
- """
+ 应当多用execute_js,少全量观察html"""
global driver
try:
if driver is None: first_init_driver()
@@ -265,13 +263,15 @@ class GenericAgentHandler(BaseHandler):
self.cwd = cwd; self.current_turn = 0
self.history_info = last_history if last_history else []
self.code_stop_signal = []
+ self._done_hooks = []
def _get_abs_path(self, path):
if not path: return ""
return os.path.abspath(os.path.join(self.cwd, path))
def _extract_code_block(self, response, code_type):
- matches = re.findall(rf"```{code_type}\n(.*?)\n```", response.content, re.DOTALL)
+ code_type = {'python':'python|py', 'powershell':'powershell|ps1|pwsh', 'bash':'bash|sh|shell'}.get(code_type, re.escape(code_type))
+ matches = re.findall(rf"```(?:{code_type})\n(.*?)\n```", response.content, re.DOTALL)
return matches[-1].strip() if matches else None
def do_code_run(self, args, response):
@@ -280,7 +280,7 @@ class GenericAgentHandler(BaseHandler):
code = args.get("code") or args.get("script")
if not code:
code = self._extract_code_block(response, code_type)
- if not code: return StepOutcome("[Error] Code missing. Use ```{code_type} block or 'script' arg.", next_prompt="\n")
+ if not code: return StepOutcome("[Error] Code missing. Must use reply code block or 'script' arg.", next_prompt="\n")
timeout = args.get("timeout", 60)
raw_path = os.path.join(self.cwd, args.get("cwd", './'))
cwd = os.path.normpath(os.path.abspath(raw_path))
@@ -309,8 +309,7 @@ class GenericAgentHandler(BaseHandler):
def do_web_scan(self, args, response):
'''获取当前页面内容和标签页列表。也可用于切换标签页。
注意:HTML经过简化,边栏/浮动元素等可能被过滤。如需查看被过滤的内容请用execute_js。
- tabs_only=true时仅返回标签页列表,不获取HTML(省token)。
- '''
+ tabs_only=true时仅返回标签页列表,不获取HTML(省token)'''
tabs_only = args.get("tabs_only", False)
switch_tab_id = args.get("switch_tab_id", None)
text_only = args.get("text_only", False)
@@ -523,7 +522,7 @@ class GenericAgentHandler(BaseHandler):
clean_args = {k: v for k, v in args.items() if not k.startswith('_')}
summary = f"调用工具{tool_name}, args: {clean_args}"
if tool_name == 'no_tool': summary = "直接回答了用户问题"
- next_prompt += "\n[DANGER] 上一轮遗漏了,已根据物理动作自动补全。在下次回复中记得协议。"
+ next_prompt += "\n[DANGER] 上一轮遗漏了,需要按协议在中输出极简单行摘要!"
summary = smart_format(summary, max_str_len=100)
self.history_info.append(f'[Agent] {summary}')
if turn % 35 == 0 and 'plan' not in str(self.working.get('related_sop')):