diff --git a/agent_loop.py b/agent_loop.py index 1c3e6bc..673dd3e 100644 --- a/agent_loop.py +++ b/agent_loop.py @@ -75,7 +75,7 @@ def agent_runner_loop(client, system_prompt, user_input, handler, tools_schema, next_prompt += outcome.next_prompt if (turn+1) % 5 == 0: next_prompt += f"\n\n[DANGER] 已连续执行第 {turn+1} 轮。禁止无效重试。若无有效进展,必须切换策略:1. 探测物理边界 2. 请求用户协助。" - if turn == 23: - next_prompt += f"\n\n[DANGER] 已连续执行第 {turn+1} 轮。你必须总结情况进行ask_user,不允许继续重试。" + if (turn+1) % 25 == 0: + next_prompt += f"\n\n### [DANGER] 已连续执行第 {turn+1} 轮。你必须总结情况进行ask_user,不允许继续重试。" messages = [{"role": "user", "content": next_prompt}] return {'result': 'MAX_TURNS_EXCEEDED'} \ No newline at end of file diff --git a/agentmain.py b/agentmain.py index 9a45e09..532443f 100644 --- a/agentmain.py +++ b/agentmain.py @@ -81,7 +81,7 @@ class GeneraticAgent: self.handler = handler self.llmclient.backend = self.llmclient.backends[self.llm_no] gen = agent_runner_loop(self.llmclient, sys_prompt, - raw_query, handler, TOOLS_SCHEMA, max_turns=25) + raw_query, handler, TOOLS_SCHEMA, max_turns=40) try: full_response = ""; last_pos = 0 diff --git a/assets/tools_schema.json b/assets/tools_schema.json index 0611135..f360e63 100644 --- a/assets/tools_schema.json +++ b/assets/tools_schema.json @@ -34,23 +34,23 @@ }}, {"type": "function", "function": { "name": "web_scan", - "description": "获取当前网页的清洗后内容,并列出所有已打开的标签页。支持切换标签页。在长页面中,可以使用 focus_item 进行语义过滤以提取关键信息。", + "description": "获取当前网页的清洗后内容,并列出所有已打开的标签页。支持切换标签页。切换页面后一般应先调用查看。", "parameters": {"type": "object", "properties": { "focus_item": {"type": "string", "description": "语义过滤指令,用于在长列表中优先保留与该关键词相关的项。"}, "switch_tab_id": {"type": "string", "description": "可选的标签页 ID。如果提供,系统将在扫描前切换到该标签页。"}}} }}, {"type": "function", "function": { "name": "web_execute_js", - "description": "万能网页操控工具。通过执行 JavaScript 脚本实现对浏览器的完全控制(如点击、滚动、提取特定数据)。这是 Web 场景下的首选工具。执行结果可选择保存到本地文件进行后续分析。", + "description": "万能网页操控工具。通过执行 JavaScript 脚本实现对浏览器的完全控制(如点击、滚动、提取特定数据)。鼓励在有把握情况下(记忆中有selector/做法等)精准使用以减少web_scan调用。执行结果可选择保存到本地文件进行后续分析。", "parameters": {"type": "object", "properties": { "script": {"type": "string", "description": "要执行的 JavaScript 代码。"}, - "save_to_file": {"type": "string", "description": "可选。将 JS 执行结果(js_return)保存到的文件路径。注意:该功能不支持 await 等异步结果。"}}, "required": ["script"]} + "save_to_file": {"type": "string", "description": "可选。将 JS 执行结果(js_return)保存到的文件路径。该功能不支持 await 等异步结果。"}}, "required": ["script"]} }}, {"type": "function", "function": { "name": "update_working_mem", - "description": "更新当前任务的工作记忆。当任务目标细化、阶段切换或发现关键物理约束(坑)时调用,内容将作为物理锚点在后续每轮自动注入,强行锁定推理方向,防止长程任务逻辑漂移。", + "description": "更新当前任务的工作记忆。当任务目标细化、阶段切换或发现关键物理约束(坑)时调用,内容(不得超过200 tokens)将作为物理锚点在后续每轮自动注入,强行锁定推理方向,防止长程任务逻辑漂移。", "parameters": {"type": "object", "properties": { - "key_info": {"type": "string", "description": "替换掉当前的极简关键信息(200tokens以内),包含当前进度和目标、已获取的关键信息及下一步必须绕过的坑,已经解决或完成的步骤可以去掉"}, + "key_info": {"type": "string", "description": "替换掉当前的极简关键信息(<200 tokens),包含当前进度和目标、已获取的关键信息及下一步必须绕过的坑,已经解决或完成的步骤可以去掉"}, "related_sop": {"type": "string", "description": "相关sop名称,可以多个,必要时需要再读"}}} }}, {"type": "function", "function": { diff --git a/ga.py b/ga.py index 9d2123b..0c35b89 100644 --- a/ga.py +++ b/ga.py @@ -458,7 +458,7 @@ class GenericAgentHandler(BaseHandler): def _get_anchor_prompt(self): h_str = "\n".join(self.history_info[-20:]) prompt = f"\n### [WORKING MEMORY]\n\n{h_str}\n" - if self.key_info: prompt += f"\n{self.key_info}" + if self.key_info: prompt += f"\n{self.key_info}" if self.related_sop: prompt += f"\n有不清晰的地方请再次读取{self.related_sop}" print(prompt) return prompt diff --git a/memory/ljqCtrl.py b/memory/ljqCtrl.py index d12480f..debfbd4 100644 --- a/memory/ljqCtrl.py +++ b/memory/ljqCtrl.py @@ -1,11 +1,12 @@ # coding=utf-8 """ +CRITICAL: 严禁在此工具链中 import pyautogui (会污染 win32api 导致逻辑冲突)。 ljqCtrl Quick Reference: -- dpi_scale: float (Physical = Logical / dpi_scale) -- Click(x, y=None): Click logical/physical coordinates -- SetCursorPos(z): Move mouse to logical coordinate z=(x, y) -- Press(cmd, staytime=0): Keyboard shortcuts (e.g. 'ctrl+c') -- FindBlock(fn, wrect=None, threshold=0.8) -> (obj_center, is_found) +- dpi_scale: float (Logical = Physical * dpi_scale) +- Click(x, y): Use Physical Coordinates (from screenshots) +- SetCursorPos(z): Use Physical Coordinates z=(x, y) +- Press(cmd, staytime=0): Keyboard shortcuts (e.g. 'ctrl+v') +- FindBlock(fn, wrect=None, threshold=0.8) -> (obj_center_phys, is_found) - MouseDClick(staytime=0.05), MouseClick(staytime=0.05) """ diff --git a/memory/memory_management_sop.md b/memory/memory_management_sop.md index 5a444b8..9a0546a 100644 --- a/memory/memory_management_sop.md +++ b/memory/memory_management_sop.md @@ -12,7 +12,7 @@ --- ## 记忆层级架构 ``` -L1: global_mem_insight.txt (极简索引层 - 严格控制 ≤50 行) +L1: global_mem_insight.txt (极简索引层 - 严格控制 ≤30 行) ↓ 导航指向 (Pointer) L2: global_mem.txt (事实库层 - 现短但会膨胀) ↓ 详细引用 (Reference) diff --git a/sidercall.py b/sidercall.py index 3512a1c..0365a82 100644 --- a/sidercall.py +++ b/sidercall.py @@ -56,7 +56,7 @@ class GeminiSession: return iter([full_text]) if stream else full_text class LLMSession: - def __init__(self, api_key=oai_apikey, api_base=oai_apibase, model=oai_model, context_win=12000): + def __init__(self, api_key=oai_apikey, api_base=oai_apibase, model=oai_model, context_win=16000): self.api_key = api_key self.api_base = api_base self.raw_msgs = [] @@ -202,18 +202,18 @@ class ToolClient: if tools: tools_json = json.dumps(tools, ensure_ascii=False, separators=(',', ':')) tool_instruction = f""" -### 交互协议 (必须严格遵守) +### 交互协议 (必须严格遵守,持续有效) 请按照以下步骤思考并行动,标签之间需要回车换行: 1. **思考**: 在 `` 标签中先进行思考,分析现状和策略。 2. **总结**: 在 `` 中输出*极为简短*的高度概括的单行(<30字)物理快照,包括上次工具调用结果获取的新信息+本次工具调用意图和预期。此内容将进入长期工作记忆,记录关键信息,严禁输出无实际信息增量的描述。 3. **行动**: 如果需要调用工具,请在回复正文之后输出一个 **块**,然后结束,我会稍后给你返回块。 格式: ```\n{{"name": "工具名", "arguments": {{参数}}}}\n\n``` -### 可用工具库 +### 可用工具库(已挂载,持续有效) {tools_json} """ if self.auto_save_tokens and self.last_tools == tools_json: - tool_instruction = "\n### 交互协议保持不变,沿用之前的协议和工具库。\n" + tool_instruction = "\n### 工具库状态:持续有效(code_run/file_read等),**可正常调用**。调用协议沿用。\n" else: self.total_cd_tokens = 0 self.last_tools = tools_json @@ -255,7 +255,11 @@ class ToolClient: if json_str == '' and '```' in weaktoolstr and weaktoolstr.split('```')[0].strip().endswith('}'): json_str = weaktoolstr.split('```')[0].strip() remaining_text = remaining_text.replace(''+weaktoolstr, "") - + elif '"name":' in remaining_text and '"arguments":' in remaining_text: + json_match = re.search(r"(\{.*\"name\":.*?\})", remaining_text, re.DOTALL | re.MULTILINE) + if json_match: + json_str = json_match.group(1).strip() + remaining_text = remaining_text.replace(json_str, "").strip() if json_str: try: data = tryparse(json_str) @@ -275,8 +279,14 @@ class ToolClient: def tryparse(json_str): try: return json.loads(json_str) - except: - return json.loads(json_str[:-1]) + except: pass + json_str = json_str.strip().strip('`').replace('json\n', '', 1).strip() + try: return json.loads(json_str) + except: pass + try: return json.loads(json_str[:-1]) + except: pass + if '}' in json_str: json_str = json_str[:json_str.rfind('}') + 1] + return json.loads(json_str) if __name__ == "__main__": import sys, os