refactor: update agent loop and tool extraction

This commit is contained in:
Liang Jiaqing
2026-04-19 23:21:37 +08:00
parent 86ca4625ad
commit 63c7c83c51
4 changed files with 90 additions and 62 deletions

View File

@@ -28,10 +28,7 @@ class BaseHandler:
yield f"未知工具: {tool_name}\n" yield f"未知工具: {tool_name}\n"
return StepOutcome(None, next_prompt=f"未知工具 {tool_name}", should_exit=False) return StepOutcome(None, next_prompt=f"未知工具 {tool_name}", should_exit=False)
def json_default(o): def json_default(o): return list(o) if isinstance(o, set) else str(o)
if isinstance(o, set): return list(o)
return str(o)
def exhaust(g): def exhaust(g):
try: try:
while True: next(g) while True: next(g)
@@ -47,7 +44,7 @@ def agent_runner_loop(client, system_prompt, user_input, handler, tools_schema,
{"role": "system", "content": system_prompt}, {"role": "system", "content": system_prompt},
{"role": "user", "content": initial_user_content if initial_user_content is not None else user_input} {"role": "user", "content": initial_user_content if initial_user_content is not None else user_input}
] ]
turn = 0; handler._done_hooks = []; handler.max_turns = max_turns turn = 0; handler.max_turns = max_turns
while turn < handler.max_turns: while turn < handler.max_turns:
turn += 1; md = '**' if verbose else '' turn += 1; md = '**' if verbose else ''
yield f"{md}LLM Running (Turn {turn}) ...{md}\n\n" yield f"{md}LLM Running (Turn {turn}) ...{md}\n\n"
@@ -65,7 +62,7 @@ def agent_runner_loop(client, system_prompt, user_input, handler, tools_schema,
else: tool_calls = [{'tool_name': tc.function.name, 'args': json.loads(tc.function.arguments), 'id': tc.id} else: tool_calls = [{'tool_name': tc.function.name, 'args': json.loads(tc.function.arguments), 'id': tc.id}
for tc in response.tool_calls] for tc in response.tool_calls]
tool_results = []; next_prompts = set(); exit_reason = None tool_results = []; next_prompts = set(); exit_reason = {}
for ii, tc in enumerate(tool_calls): for ii, tc in enumerate(tool_calls):
tool_name, args, tid = tc['tool_name'], tc['args'], tc.get('id', '') tool_name, args, tid = tc['tool_name'], tc['args'], tc.get('id', '')
if tool_name == 'no_tool': pass if tool_name == 'no_tool': pass
@@ -92,7 +89,7 @@ def agent_runner_loop(client, system_prompt, user_input, handler, tools_schema,
tool_results.append({'tool_use_id': tid, 'content': datastr}) tool_results.append({'tool_use_id': tid, 'content': datastr})
next_prompts.add(outcome.next_prompt) next_prompts.add(outcome.next_prompt)
if len(next_prompts) == 0 or exit_reason: if len(next_prompts) == 0 or exit_reason:
if len(handler._done_hooks) == 0: break if len(handler._done_hooks) == 0 or exit_reason.get('result', '') == 'EXITED': break
next_prompts.add(handler._done_hooks.pop(0)) next_prompts.add(handler._done_hooks.pop(0))
next_prompt = handler.turn_end_callback(response, tool_calls, tool_results, turn, '\n'.join(next_prompts), exit_reason) next_prompt = handler.turn_end_callback(response, tool_calls, tool_results, turn, '\n'.join(next_prompts), exit_reason)
messages = [{"role": "user", "content": next_prompt, "tool_results": tool_results}] # just new message, history is kept in *Session messages = [{"role": "user", "content": next_prompt, "tool_results": tool_results}] # just new message, history is kept in *Session

View File

@@ -77,13 +77,15 @@ class GeneraticAgent:
self.llmclient = self.llmclients[self.llm_no] self.llmclient = self.llmclients[self.llm_no]
self.llmclient.backend.history = lastc.backend.history self.llmclient.backend.history = lastc.backend.history
self.llmclient.last_tools = '' self.llmclient.last_tools = ''
name = self.get_llm_name().lower() name = self.get_llm_name(model=True)
if 'glm' in name or 'minimax' in name or 'kimi' in name: load_tool_schema('_cn') if 'glm' in name or 'minimax' in name or 'kimi' in name: load_tool_schema('_cn')
else: load_tool_schema() else: load_tool_schema()
def list_llms(self): return [(i, self.get_llm_name(b), i == self.llm_no) for i, b in enumerate(self.llmclients)] def list_llms(self): return [(i, self.get_llm_name(b), i == self.llm_no) for i, b in enumerate(self.llmclients)]
def get_llm_name(self, b=None): def get_llm_name(self, b=None, model=False):
b = self.llmclient if b is None else b b = self.llmclient if b is None else b
return f"{type(b.backend).__name__}/{b.backend.name}" if not isinstance(b, dict) else "BADCONFIG_MIXIN" if isinstance(b, dict): return 'BADCONFIG_MIXIN'
if model: return b.backend.model.lower()
return f"{type(b.backend).__name__}/{b.backend.name}"
def abort(self): def abort(self):
if not self.is_running: return if not self.is_running: return
@@ -135,11 +137,10 @@ class GeneraticAgent:
user_input = raw_query user_input = raw_query
if source == 'feishu' and len(self.history) > 1: # 如果有历史记录且来自飞书,注入到首轮 user_input 中(支持/restore恢复上下文 if source == 'feishu' and len(self.history) > 1: # 如果有历史记录且来自飞书,注入到首轮 user_input 中(支持/restore恢复上下文
user_input = handler._get_anchor_prompt() + f"\n\n### 用户当前消息\n{raw_query}" user_input = handler._get_anchor_prompt() + f"\n\n### 用户当前消息\n{raw_query}"
initial_user_content = None if 'gpt' in self.get_llm_name(model=True): handler._done_hooks.append('请确定用户任务是否完成如未完成需要继续工具调用直到完成任务确实需要问用户应使用ask_user工具')
# although new handler, the **full** history is in llmclient, so it is full history! # although new handler, the **full** history is in llmclient, so it is full history!
gen = agent_runner_loop(self.llmclient, sys_prompt, user_input, gen = agent_runner_loop(self.llmclient, sys_prompt, user_input,
handler, TOOLS_SCHEMA, max_turns=40, verbose=self.verbose, handler, TOOLS_SCHEMA, max_turns=40, verbose=self.verbose)
initial_user_content=initial_user_content)
try: try:
full_resp = ""; last_pos = 0 full_resp = ""; last_pos = 0
for chunk in gen: for chunk in gen:

View File

@@ -11,8 +11,8 @@ from agentmain import GeneraticAgent
API = 'https://ilinkai.weixin.qq.com' API = 'https://ilinkai.weixin.qq.com'
TOKEN_FILE = Path.home() / '.wxbot' / 'token.json' TOKEN_FILE = Path.home() / '.wxbot' / 'token.json'
TOKEN_FILE.parent.mkdir(exist_ok=True) TOKEN_FILE.parent.mkdir(exist_ok=True)
VER, MSG_USER, MSG_BOT, ITEM_TEXT, STATE_FINISH = '0.2.5', 1, 2, 1, 2 VER, MSG_USER, MSG_BOT, ITEM_TEXT, STATE_FINISH = '2.1.8', 1, 2, 1, 2
ITEM_FILE = 4 ITEM_IMAGE, ITEM_FILE, ITEM_VIDEO = 2, 4, 5
CDN_BASE = 'https://novac2c.cdn.weixin.qq.com/c2c' CDN_BASE = 'https://novac2c.cdn.weixin.qq.com/c2c'
def _uin(): def _uin():
@@ -94,48 +94,72 @@ class WxBotClient:
'to_user_id': to_user_id, 'typing_ticket': typing_ticket, 'to_user_id': to_user_id, 'typing_ticket': typing_ticket,
'typing_status': 2 if cancel else 1, 'base_info': {'channel_version': VER}}) 'typing_status': 2 if cancel else 1, 'base_info': {'channel_version': VER}})
def send_file(self, to_user_id, file_path, context_token=''): def _enc(self, raw, aes_key):
"""Send a file to user via CDN upload.""" pad = 16 - (len(raw) % 16)
return AES.new(aes_key, AES.MODE_ECB).encrypt(raw + bytes([pad] * pad))
def _upload(self, filekey, upload_param, raw, aes_key, timeout=120, upload_url=''):
url = upload_url.strip() if upload_url else f'{CDN_BASE}/upload?encrypted_query_param={quote(upload_param)}&filekey={filekey}'
data = self._enc(raw, aes_key)
last_err = None
for attempt in range(1, 4):
try:
r = requests.post(url, data=data, headers={'Content-Type': 'application/octet-stream'}, timeout=timeout)
if 400 <= r.status_code < 500:
msg = r.headers.get('x-error-message') or r.text[:300]
raise RuntimeError(f'CDN upload client error {r.status_code}: {msg}')
if r.status_code != 200:
msg = r.headers.get('x-error-message') or f'status {r.status_code}'
raise RuntimeError(f'CDN upload server error: {msg}')
eq = r.headers.get('x-encrypted-param', '')
if not eq: raise RuntimeError('CDN upload response missing x-encrypted-param header')
return {'encrypt_query_param': eq,
'aes_key': base64.b64encode(aes_key.hex().encode()).decode(), 'encrypt_type': 1}
except Exception as e:
last_err = e
if 'client error' in str(e) or attempt >= 3: break
print(f'[WX] CDN upload retry {attempt}: {e}', file=sys.__stdout__)
raise last_err
def _send_media(self, to_user_id, file_path, media_type, item_type, item_key, context_token=''):
fp = Path(file_path) fp = Path(file_path)
raw = fp.read_bytes() raw = fp.read_bytes()
rawsize = len(raw)
rawfilemd5 = hashlib.md5(raw).hexdigest()
aes_key = os.urandom(16)
filekey = uuid.uuid4().hex filekey = uuid.uuid4().hex
ciphertext_size = ((rawsize // 16) + 1) * 16 aes_key = os.urandom(16)
# 1. get upload url ciphertext_size = ((len(raw) // 16) + 1) * 16
resp = self._post('ilink/bot/getuploadurl', { body = {
'filekey': filekey, 'media_type': 3, 'to_user_id': to_user_id, 'filekey': filekey, 'media_type': media_type, 'to_user_id': to_user_id,
'rawsize': rawsize, 'rawfilemd5': rawfilemd5, 'rawsize': len(raw), 'rawfilemd5': hashlib.md5(raw).hexdigest(),
'filesize': ciphertext_size, 'no_need_thumb': True, 'filesize': ciphertext_size, 'no_need_thumb': True,
'aeskey': aes_key.hex(), 'aeskey': aes_key.hex(), 'base_info': {'channel_version': VER}}
'base_info': {'channel_version': VER}}) resp = self._post('ilink/bot/getuploadurl', body)
upload_param = resp.get('upload_param', '') upload_param = resp.get('upload_param', '')
if not upload_param: upload_url = resp.get('upload_full_url', '')
raise RuntimeError(f'getuploadurl failed: {resp}') if not (upload_param or upload_url): raise RuntimeError(f'getuploadurl failed: {resp}')
# 2. AES-128-ECB encrypt (PKCS7) media = self._upload(filekey, upload_param, raw, aes_key=aes_key, upload_url=upload_url)
cipher = AES.new(aes_key, AES.MODE_ECB) item = {'media': media}
pad_len = 16 - (rawsize % 16) if item_key == 'file_item':
ciphertext = cipher.encrypt(raw + bytes([pad_len] * pad_len)) item.update({'file_name': fp.name, 'len': str(len(raw))})
# 3. upload to CDN elif item_key == 'image_item':
upload_url = (f'{CDN_BASE}/upload?encrypted_query_param=' item.update({'mid_size': ciphertext_size})
f'{quote(upload_param)}&filekey={filekey}') elif item_key == 'video_item':
r = requests.post(upload_url, data=ciphertext, headers={'Content-Type': 'application/octet-stream'}, timeout=120) item.update({'video_size': ciphertext_size})
r.raise_for_status()
download_param = r.headers.get('x-encrypted-param', '')
if not download_param:
raise RuntimeError(f'CDN upload: no x-encrypted-param. status={r.status_code}')
# 4. send message with file attachment
msg = {'from_user_id': '', 'to_user_id': to_user_id, msg = {'from_user_id': '', 'to_user_id': to_user_id,
'client_id': f'pyclient-{uuid.uuid4().hex[:16]}', 'client_id': f'pyclient-{uuid.uuid4().hex[:16]}',
'message_type': MSG_BOT, 'message_state': STATE_FINISH, 'message_type': MSG_BOT, 'message_state': STATE_FINISH,
'item_list': [{'type': ITEM_FILE, 'file_item': { 'item_list': [{'type': item_type, item_key: item}]}
'media': {'encrypt_query_param': download_param,
'aes_key': base64.b64encode(aes_key.hex().encode()).decode(), 'encrypt_type': 1},
'file_name': fp.name, 'len': str(rawsize)}}]}
if context_token: msg['context_token'] = context_token if context_token: msg['context_token'] = context_token
return self._post('ilink/bot/sendmessage', {'msg': msg, 'base_info': {'channel_version': VER}}) return self._post('ilink/bot/sendmessage', {'msg': msg, 'base_info': {'channel_version': VER}})
def send_file(self, to_user_id, file_path, context_token=''):
return self._send_media(to_user_id, file_path, 3, ITEM_FILE, 'file_item', context_token)
def send_image(self, to_user_id, file_path, context_token=''):
return self._send_media(to_user_id, file_path, 1, ITEM_IMAGE, 'image_item', context_token)
def send_video(self, to_user_id, file_path, context_token=''):
return self._send_media(to_user_id, file_path, 2, ITEM_VIDEO, 'video_item', context_token)
@staticmethod @staticmethod
def extract_text(msg): def extract_text(msg):
return '\n'.join(it['text_item'].get('text', '') return '\n'.join(it['text_item'].get('text', '')
@@ -190,7 +214,7 @@ def _dl_media(items):
agent = GeneraticAgent() agent = GeneraticAgent()
agent.verbose = False agent.verbose = False
_TAG_PATS = [r'<' + t + r'>.*?</' + t + r'>' for t in ('thinking', 'summary', 'tool_use')] _TAG_PATS = [r'<' + t + r'>.*?</' + t + r'>' for t in ('thinking', 'tool_use')]
_TAG_PATS.append(r'<file_content>.*?</file_content>') _TAG_PATS.append(r'<file_content>.*?</file_content>')
def _strip_md(t): def _strip_md(t):
@@ -213,8 +237,11 @@ def _strip_md(t):
return re.sub(r'\n{3,}', '\n\n', t).strip() return re.sub(r'\n{3,}', '\n\n', t).strip()
def _clean(t): def _clean(t):
t = re.sub(r'^\s*LLM Running \(Turn \d+\) \.{3}\s*$', '', t, flags=re.M)
t = re.sub(r'^\s*🛠️\s*[A-Za-z_][A-Za-z0-9_]*\(.*$', '', t, flags=re.M)
for p in _TAG_PATS: for p in _TAG_PATS:
t = re.sub(p, '', t, flags=re.DOTALL) t = re.sub(p, '', t, flags=re.DOTALL)
t = re.sub(r'</?summary>', '', t)
return re.sub(r'\n{3,}', '\n\n', _strip_md(t)).strip() or '...' return re.sub(r'\n{3,}', '\n\n', _strip_md(t)).strip() or '...'
def _split(text, limit=1800): def _split(text, limit=1800):
@@ -270,7 +297,8 @@ def on_message(bot, msg):
if 'done' in item: result = item['done']; break if 'done' in item: result = item['done']; break
except queue.Empty: result = '[超时]' except queue.Empty: result = '[超时]'
files = re.findall(r'\[FILE:([^\]]+)\]', result) files = re.findall(r'\[FILE:([^\]]+)\]', result)
files = [f for f in files if (f if os.path.isabs(f) else os.path.join(_TEMP_DIR, f)) not in media_paths] bad = {'filepath', '<filepath>', 'path', '<path>', 'file_path', '<file_path>', '...'}
files = [f for f in files if f.strip().lower() not in bad and (f if os.path.isabs(f) else os.path.join(_TEMP_DIR, f)) not in media_paths]
show = _clean(result) show = _clean(result)
chunks = _split(show) chunks = _split(show)
_MAX_MSGS = 6 _MAX_MSGS = 6
@@ -285,9 +313,12 @@ def on_message(bot, msg):
if not os.path.isabs(fpath): fpath = os.path.join(_TEMP_DIR, fpath) if not os.path.isabs(fpath): fpath = os.path.join(_TEMP_DIR, fpath)
try: try:
if not os.path.exists(fpath): raise FileNotFoundError(f"文件不存在: {fpath}") if not os.path.exists(fpath): raise FileNotFoundError(f"文件不存在: {fpath}")
bot.send_file(uid, fpath, context_token=ctx) ext = os.path.splitext(fpath)[1].lower()
print(f'[WX] sent file: {fpath}', file=sys.__stdout__) sender = bot.send_video if ext in {'.mp4', '.mov', '.m4v', '.webm'} else \
except Exception as e: print(f'[WX] send_file err: {e}', file=sys.__stdout__) bot.send_image if ext in {'.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp'} else bot.send_file
sender(uid, fpath, context_token=ctx)
print(f'[WX] sent media: {fpath}', file=sys.__stdout__)
except Exception as e: print(f'[WX] send media err: {e}', file=sys.__stdout__)
threading.Thread(target=_handle, daemon=True).start() threading.Thread(target=_handle, daemon=True).start()

21
ga.py
View File

@@ -17,7 +17,7 @@ def code_run(code, code_type="python", timeout=60, cwd=None, code_cwd=None, stop
yield f"[Action] Running {code_type} in {os.path.basename(cwd)}: {preview}\n" yield f"[Action] Running {code_type} in {os.path.basename(cwd)}: {preview}\n"
script_dir = os.path.dirname(os.path.abspath(__file__)) script_dir = os.path.dirname(os.path.abspath(__file__))
cwd = cwd or os.path.join(script_dir, 'temp'); tmp_path = None cwd = cwd or os.path.join(script_dir, 'temp'); tmp_path = None
if code_type == "python": if code_type in ["python", "py"]:
tmp_file = tempfile.NamedTemporaryFile(suffix=".ai.py", delete=False, mode='w', encoding='utf-8', dir=code_cwd) tmp_file = tempfile.NamedTemporaryFile(suffix=".ai.py", delete=False, mode='w', encoding='utf-8', dir=code_cwd)
cr_header = os.path.join(script_dir, 'assets', 'code_run_header.py') cr_header = os.path.join(script_dir, 'assets', 'code_run_header.py')
if os.path.exists(cr_header): tmp_file.write(open(cr_header, encoding='utf-8').read()) if os.path.exists(cr_header): tmp_file.write(open(cr_header, encoding='utf-8').read())
@@ -25,7 +25,7 @@ def code_run(code, code_type="python", timeout=60, cwd=None, code_cwd=None, stop
tmp_path = tmp_file.name tmp_path = tmp_file.name
tmp_file.close() tmp_file.close()
cmd = [sys.executable, "-X", "utf8", "-u", tmp_path] cmd = [sys.executable, "-X", "utf8", "-u", tmp_path]
elif code_type in ["powershell", "bash"]: elif code_type in ["powershell", "bash", "sh", "shell", "ps1", "pwsh"]:
if os.name == 'nt': cmd = ["powershell", "-NoProfile", "-NonInteractive", "-Command", code] if os.name == 'nt': cmd = ["powershell", "-NoProfile", "-NonInteractive", "-Command", code]
else: cmd = ["bash", "-c", code] else: cmd = ["bash", "-c", code]
else: else:
@@ -110,12 +110,10 @@ def first_init_driver():
time.sleep(3) time.sleep(3)
def web_scan(tabs_only=False, switch_tab_id=None, text_only=False): def web_scan(tabs_only=False, switch_tab_id=None, text_only=False):
""" """获取当前页面的简化HTML内容和标签页列表。注意简化过程会过滤边栏、浮动元素等非主体内容。
获取当前页面的简化HTML内容和标签页列表。注意简化过程会过滤边栏、浮动元素等非主体内容。
tabs_only: 仅返回标签页列表不获取HTML内容节省token tabs_only: 仅返回标签页列表不获取HTML内容节省token
switch_tab_id: 可选参数,如果提供,则在扫描前切换到该标签页。 switch_tab_id: 可选参数,如果提供,则在扫描前切换到该标签页。
应当多用execute_js少全量观察html 应当多用execute_js少全量观察html"""
"""
global driver global driver
try: try:
if driver is None: first_init_driver() if driver is None: first_init_driver()
@@ -265,13 +263,15 @@ class GenericAgentHandler(BaseHandler):
self.cwd = cwd; self.current_turn = 0 self.cwd = cwd; self.current_turn = 0
self.history_info = last_history if last_history else [] self.history_info = last_history if last_history else []
self.code_stop_signal = [] self.code_stop_signal = []
self._done_hooks = []
def _get_abs_path(self, path): def _get_abs_path(self, path):
if not path: return "" if not path: return ""
return os.path.abspath(os.path.join(self.cwd, path)) return os.path.abspath(os.path.join(self.cwd, path))
def _extract_code_block(self, response, code_type): def _extract_code_block(self, response, code_type):
matches = re.findall(rf"```{code_type}\n(.*?)\n```", response.content, re.DOTALL) code_type = {'python':'python|py', 'powershell':'powershell|ps1|pwsh', 'bash':'bash|sh|shell'}.get(code_type, re.escape(code_type))
matches = re.findall(rf"```(?:{code_type})\n(.*?)\n```", response.content, re.DOTALL)
return matches[-1].strip() if matches else None return matches[-1].strip() if matches else None
def do_code_run(self, args, response): def do_code_run(self, args, response):
@@ -280,7 +280,7 @@ class GenericAgentHandler(BaseHandler):
code = args.get("code") or args.get("script") code = args.get("code") or args.get("script")
if not code: if not code:
code = self._extract_code_block(response, code_type) code = self._extract_code_block(response, code_type)
if not code: return StepOutcome("[Error] Code missing. Use ```{code_type} block or 'script' arg.", next_prompt="\n") if not code: return StepOutcome("[Error] Code missing. Must use reply code block or 'script' arg.", next_prompt="\n")
timeout = args.get("timeout", 60) timeout = args.get("timeout", 60)
raw_path = os.path.join(self.cwd, args.get("cwd", './')) raw_path = os.path.join(self.cwd, args.get("cwd", './'))
cwd = os.path.normpath(os.path.abspath(raw_path)) cwd = os.path.normpath(os.path.abspath(raw_path))
@@ -309,8 +309,7 @@ class GenericAgentHandler(BaseHandler):
def do_web_scan(self, args, response): def do_web_scan(self, args, response):
'''获取当前页面内容和标签页列表。也可用于切换标签页。 '''获取当前页面内容和标签页列表。也可用于切换标签页。
注意HTML经过简化边栏/浮动元素等可能被过滤。如需查看被过滤的内容请用execute_js。 注意HTML经过简化边栏/浮动元素等可能被过滤。如需查看被过滤的内容请用execute_js。
tabs_only=true时仅返回标签页列表不获取HTML省token tabs_only=true时仅返回标签页列表不获取HTML省token'''
'''
tabs_only = args.get("tabs_only", False) tabs_only = args.get("tabs_only", False)
switch_tab_id = args.get("switch_tab_id", None) switch_tab_id = args.get("switch_tab_id", None)
text_only = args.get("text_only", False) text_only = args.get("text_only", False)
@@ -523,7 +522,7 @@ class GenericAgentHandler(BaseHandler):
clean_args = {k: v for k, v in args.items() if not k.startswith('_')} clean_args = {k: v for k, v in args.items() if not k.startswith('_')}
summary = f"调用工具{tool_name}, args: {clean_args}" summary = f"调用工具{tool_name}, args: {clean_args}"
if tool_name == 'no_tool': summary = "直接回答了用户问题" if tool_name == 'no_tool': summary = "直接回答了用户问题"
next_prompt += "\n[DANGER] 上一轮遗漏了<summary>已根据物理动作自动补全。在下次回复中记得<summary>协议。" next_prompt += "\n[DANGER] 上一轮遗漏了<summary>需要按协议在<summary>中输出极简单行摘要!"
summary = smart_format(summary, max_str_len=100) summary = smart_format(summary, max_str_len=100)
self.history_info.append(f'[Agent] {summary}') self.history_info.append(f'[Agent] {summary}')
if turn % 35 == 0 and 'plan' not in str(self.working.get('related_sop')): if turn % 35 == 0 and 'plan' not in str(self.working.get('related_sop')):