diff --git a/agent_loop.py b/agent_loop.py
index 1c3e6bc..673dd3e 100644
--- a/agent_loop.py
+++ b/agent_loop.py
@@ -75,7 +75,7 @@ def agent_runner_loop(client, system_prompt, user_input, handler, tools_schema,
next_prompt += outcome.next_prompt
if (turn+1) % 5 == 0:
next_prompt += f"\n\n[DANGER] 已连续执行第 {turn+1} 轮。禁止无效重试。若无有效进展,必须切换策略:1. 探测物理边界 2. 请求用户协助。"
- if turn == 23:
- next_prompt += f"\n\n[DANGER] 已连续执行第 {turn+1} 轮。你必须总结情况进行ask_user,不允许继续重试。"
+ if (turn+1) % 25 == 0:
+ next_prompt += f"\n\n### [DANGER] 已连续执行第 {turn+1} 轮。你必须总结情况进行ask_user,不允许继续重试。"
messages = [{"role": "user", "content": next_prompt}]
return {'result': 'MAX_TURNS_EXCEEDED'}
\ No newline at end of file
diff --git a/agentmain.py b/agentmain.py
index 9a45e09..532443f 100644
--- a/agentmain.py
+++ b/agentmain.py
@@ -81,7 +81,7 @@ class GeneraticAgent:
self.handler = handler
self.llmclient.backend = self.llmclient.backends[self.llm_no]
gen = agent_runner_loop(self.llmclient, sys_prompt,
- raw_query, handler, TOOLS_SCHEMA, max_turns=25)
+ raw_query, handler, TOOLS_SCHEMA, max_turns=40)
try:
full_response = ""; last_pos = 0
diff --git a/assets/tools_schema.json b/assets/tools_schema.json
index 0611135..f360e63 100644
--- a/assets/tools_schema.json
+++ b/assets/tools_schema.json
@@ -34,23 +34,23 @@
}},
{"type": "function", "function": {
"name": "web_scan",
- "description": "获取当前网页的清洗后内容,并列出所有已打开的标签页。支持切换标签页。在长页面中,可以使用 focus_item 进行语义过滤以提取关键信息。",
+ "description": "获取当前网页的清洗后内容,并列出所有已打开的标签页。支持切换标签页。切换页面后一般应先调用查看。",
"parameters": {"type": "object", "properties": {
"focus_item": {"type": "string", "description": "语义过滤指令,用于在长列表中优先保留与该关键词相关的项。"},
"switch_tab_id": {"type": "string", "description": "可选的标签页 ID。如果提供,系统将在扫描前切换到该标签页。"}}}
}},
{"type": "function", "function": {
"name": "web_execute_js",
- "description": "万能网页操控工具。通过执行 JavaScript 脚本实现对浏览器的完全控制(如点击、滚动、提取特定数据)。这是 Web 场景下的首选工具。执行结果可选择保存到本地文件进行后续分析。",
+ "description": "万能网页操控工具。通过执行 JavaScript 脚本实现对浏览器的完全控制(如点击、滚动、提取特定数据)。鼓励在有把握情况下(记忆中有selector/做法等)精准使用以减少web_scan调用。执行结果可选择保存到本地文件进行后续分析。",
"parameters": {"type": "object", "properties": {
"script": {"type": "string", "description": "要执行的 JavaScript 代码。"},
- "save_to_file": {"type": "string", "description": "可选。将 JS 执行结果(js_return)保存到的文件路径。注意:该功能不支持 await 等异步结果。"}}, "required": ["script"]}
+ "save_to_file": {"type": "string", "description": "可选。将 JS 执行结果(js_return)保存到的文件路径。该功能不支持 await 等异步结果。"}}, "required": ["script"]}
}},
{"type": "function", "function": {
"name": "update_working_mem",
- "description": "更新当前任务的工作记忆。当任务目标细化、阶段切换或发现关键物理约束(坑)时调用,内容将作为物理锚点在后续每轮自动注入,强行锁定推理方向,防止长程任务逻辑漂移。",
+ "description": "更新当前任务的工作记忆。当任务目标细化、阶段切换或发现关键物理约束(坑)时调用,内容(不得超过200 tokens)将作为物理锚点在后续每轮自动注入,强行锁定推理方向,防止长程任务逻辑漂移。",
"parameters": {"type": "object", "properties": {
- "key_info": {"type": "string", "description": "替换掉当前的极简关键信息(200tokens以内),包含当前进度和目标、已获取的关键信息及下一步必须绕过的坑,已经解决或完成的步骤可以去掉"},
+ "key_info": {"type": "string", "description": "替换掉当前的极简关键信息(<200 tokens),包含当前进度和目标、已获取的关键信息及下一步必须绕过的坑,已经解决或完成的步骤可以去掉"},
"related_sop": {"type": "string", "description": "相关sop名称,可以多个,必要时需要再读"}}}
}},
{"type": "function", "function": {
diff --git a/ga.py b/ga.py
index 9d2123b..0c35b89 100644
--- a/ga.py
+++ b/ga.py
@@ -458,7 +458,7 @@ class GenericAgentHandler(BaseHandler):
def _get_anchor_prompt(self):
h_str = "\n".join(self.history_info[-20:])
prompt = f"\n### [WORKING MEMORY]\n\n{h_str}\n"
- if self.key_info: prompt += f"\n{self.key_info}"
+ if self.key_info: prompt += f"\n{self.key_info}"
if self.related_sop: prompt += f"\n有不清晰的地方请再次读取{self.related_sop}"
print(prompt)
return prompt
diff --git a/memory/ljqCtrl.py b/memory/ljqCtrl.py
index d12480f..debfbd4 100644
--- a/memory/ljqCtrl.py
+++ b/memory/ljqCtrl.py
@@ -1,11 +1,12 @@
# coding=utf-8
"""
+CRITICAL: 严禁在此工具链中 import pyautogui (会污染 win32api 导致逻辑冲突)。
ljqCtrl Quick Reference:
-- dpi_scale: float (Physical = Logical / dpi_scale)
-- Click(x, y=None): Click logical/physical coordinates
-- SetCursorPos(z): Move mouse to logical coordinate z=(x, y)
-- Press(cmd, staytime=0): Keyboard shortcuts (e.g. 'ctrl+c')
-- FindBlock(fn, wrect=None, threshold=0.8) -> (obj_center, is_found)
+- dpi_scale: float (Logical = Physical * dpi_scale)
+- Click(x, y): Use Physical Coordinates (from screenshots)
+- SetCursorPos(z): Use Physical Coordinates z=(x, y)
+- Press(cmd, staytime=0): Keyboard shortcuts (e.g. 'ctrl+v')
+- FindBlock(fn, wrect=None, threshold=0.8) -> (obj_center_phys, is_found)
- MouseDClick(staytime=0.05), MouseClick(staytime=0.05)
"""
diff --git a/memory/memory_management_sop.md b/memory/memory_management_sop.md
index 5a444b8..9a0546a 100644
--- a/memory/memory_management_sop.md
+++ b/memory/memory_management_sop.md
@@ -12,7 +12,7 @@
---
## 记忆层级架构
```
-L1: global_mem_insight.txt (极简索引层 - 严格控制 ≤50 行)
+L1: global_mem_insight.txt (极简索引层 - 严格控制 ≤30 行)
↓ 导航指向 (Pointer)
L2: global_mem.txt (事实库层 - 现短但会膨胀)
↓ 详细引用 (Reference)
diff --git a/sidercall.py b/sidercall.py
index 3512a1c..0365a82 100644
--- a/sidercall.py
+++ b/sidercall.py
@@ -56,7 +56,7 @@ class GeminiSession:
return iter([full_text]) if stream else full_text
class LLMSession:
- def __init__(self, api_key=oai_apikey, api_base=oai_apibase, model=oai_model, context_win=12000):
+ def __init__(self, api_key=oai_apikey, api_base=oai_apibase, model=oai_model, context_win=16000):
self.api_key = api_key
self.api_base = api_base
self.raw_msgs = []
@@ -202,18 +202,18 @@ class ToolClient:
if tools:
tools_json = json.dumps(tools, ensure_ascii=False, separators=(',', ':'))
tool_instruction = f"""
-### 交互协议 (必须严格遵守)
+### 交互协议 (必须严格遵守,持续有效)
请按照以下步骤思考并行动,标签之间需要回车换行:
1. **思考**: 在 `` 标签中先进行思考,分析现状和策略。
2. **总结**: 在 `` 中输出*极为简短*的高度概括的单行(<30字)物理快照,包括上次工具调用结果获取的新信息+本次工具调用意图和预期。此内容将进入长期工作记忆,记录关键信息,严禁输出无实际信息增量的描述。
3. **行动**: 如果需要调用工具,请在回复正文之后输出一个 **块**,然后结束,我会稍后给你返回块。
格式: ```\n{{"name": "工具名", "arguments": {{参数}}}}\n\n```
-### 可用工具库
+### 可用工具库(已挂载,持续有效)
{tools_json}
"""
if self.auto_save_tokens and self.last_tools == tools_json:
- tool_instruction = "\n### 交互协议保持不变,沿用之前的协议和工具库。\n"
+ tool_instruction = "\n### 工具库状态:持续有效(code_run/file_read等),**可正常调用**。调用协议沿用。\n"
else:
self.total_cd_tokens = 0
self.last_tools = tools_json
@@ -255,7 +255,11 @@ class ToolClient:
if json_str == '' and '```' in weaktoolstr and weaktoolstr.split('```')[0].strip().endswith('}'):
json_str = weaktoolstr.split('```')[0].strip()
remaining_text = remaining_text.replace(''+weaktoolstr, "")
-
+ elif '"name":' in remaining_text and '"arguments":' in remaining_text:
+ json_match = re.search(r"(\{.*\"name\":.*?\})", remaining_text, re.DOTALL | re.MULTILINE)
+ if json_match:
+ json_str = json_match.group(1).strip()
+ remaining_text = remaining_text.replace(json_str, "").strip()
if json_str:
try:
data = tryparse(json_str)
@@ -275,8 +279,14 @@ class ToolClient:
def tryparse(json_str):
try: return json.loads(json_str)
- except:
- return json.loads(json_str[:-1])
+ except: pass
+ json_str = json_str.strip().strip('`').replace('json\n', '', 1).strip()
+ try: return json.loads(json_str)
+ except: pass
+ try: return json.loads(json_str[:-1])
+ except: pass
+ if '}' in json_str: json_str = json_str[:json_str.rfind('}') + 1]
+ return json.loads(json_str)
if __name__ == "__main__":
import sys, os