refactor: restructure memory hierarchy and fix ljqCtrl coordinate logic
This commit is contained in:
@@ -75,7 +75,7 @@ def agent_runner_loop(client, system_prompt, user_input, handler, tools_schema,
|
|||||||
next_prompt += outcome.next_prompt
|
next_prompt += outcome.next_prompt
|
||||||
if (turn+1) % 5 == 0:
|
if (turn+1) % 5 == 0:
|
||||||
next_prompt += f"\n\n[DANGER] 已连续执行第 {turn+1} 轮。禁止无效重试。若无有效进展,必须切换策略:1. 探测物理边界 2. 请求用户协助。"
|
next_prompt += f"\n\n[DANGER] 已连续执行第 {turn+1} 轮。禁止无效重试。若无有效进展,必须切换策略:1. 探测物理边界 2. 请求用户协助。"
|
||||||
if turn == 23:
|
if (turn+1) % 25 == 0:
|
||||||
next_prompt += f"\n\n[DANGER] 已连续执行第 {turn+1} 轮。你必须总结情况进行ask_user,不允许继续重试。"
|
next_prompt += f"\n\n### [DANGER] 已连续执行第 {turn+1} 轮。你必须总结情况进行ask_user,不允许继续重试。"
|
||||||
messages = [{"role": "user", "content": next_prompt}]
|
messages = [{"role": "user", "content": next_prompt}]
|
||||||
return {'result': 'MAX_TURNS_EXCEEDED'}
|
return {'result': 'MAX_TURNS_EXCEEDED'}
|
||||||
@@ -81,7 +81,7 @@ class GeneraticAgent:
|
|||||||
self.handler = handler
|
self.handler = handler
|
||||||
self.llmclient.backend = self.llmclient.backends[self.llm_no]
|
self.llmclient.backend = self.llmclient.backends[self.llm_no]
|
||||||
gen = agent_runner_loop(self.llmclient, sys_prompt,
|
gen = agent_runner_loop(self.llmclient, sys_prompt,
|
||||||
raw_query, handler, TOOLS_SCHEMA, max_turns=25)
|
raw_query, handler, TOOLS_SCHEMA, max_turns=40)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
full_response = ""; last_pos = 0
|
full_response = ""; last_pos = 0
|
||||||
|
|||||||
@@ -34,23 +34,23 @@
|
|||||||
}},
|
}},
|
||||||
{"type": "function", "function": {
|
{"type": "function", "function": {
|
||||||
"name": "web_scan",
|
"name": "web_scan",
|
||||||
"description": "获取当前网页的清洗后内容,并列出所有已打开的标签页。支持切换标签页。在长页面中,可以使用 focus_item 进行语义过滤以提取关键信息。",
|
"description": "获取当前网页的清洗后内容,并列出所有已打开的标签页。支持切换标签页。切换页面后一般应先调用查看。",
|
||||||
"parameters": {"type": "object", "properties": {
|
"parameters": {"type": "object", "properties": {
|
||||||
"focus_item": {"type": "string", "description": "语义过滤指令,用于在长列表中优先保留与该关键词相关的项。"},
|
"focus_item": {"type": "string", "description": "语义过滤指令,用于在长列表中优先保留与该关键词相关的项。"},
|
||||||
"switch_tab_id": {"type": "string", "description": "可选的标签页 ID。如果提供,系统将在扫描前切换到该标签页。"}}}
|
"switch_tab_id": {"type": "string", "description": "可选的标签页 ID。如果提供,系统将在扫描前切换到该标签页。"}}}
|
||||||
}},
|
}},
|
||||||
{"type": "function", "function": {
|
{"type": "function", "function": {
|
||||||
"name": "web_execute_js",
|
"name": "web_execute_js",
|
||||||
"description": "万能网页操控工具。通过执行 JavaScript 脚本实现对浏览器的完全控制(如点击、滚动、提取特定数据)。这是 Web 场景下的首选工具。执行结果可选择保存到本地文件进行后续分析。",
|
"description": "万能网页操控工具。通过执行 JavaScript 脚本实现对浏览器的完全控制(如点击、滚动、提取特定数据)。鼓励在有把握情况下(记忆中有selector/做法等)精准使用以减少web_scan调用。执行结果可选择保存到本地文件进行后续分析。",
|
||||||
"parameters": {"type": "object", "properties": {
|
"parameters": {"type": "object", "properties": {
|
||||||
"script": {"type": "string", "description": "要执行的 JavaScript 代码。"},
|
"script": {"type": "string", "description": "要执行的 JavaScript 代码。"},
|
||||||
"save_to_file": {"type": "string", "description": "可选。将 JS 执行结果(js_return)保存到的文件路径。注意:该功能不支持 await 等异步结果。"}}, "required": ["script"]}
|
"save_to_file": {"type": "string", "description": "可选。将 JS 执行结果(js_return)保存到的文件路径。该功能不支持 await 等异步结果。"}}, "required": ["script"]}
|
||||||
}},
|
}},
|
||||||
{"type": "function", "function": {
|
{"type": "function", "function": {
|
||||||
"name": "update_working_mem",
|
"name": "update_working_mem",
|
||||||
"description": "更新当前任务的工作记忆。当任务目标细化、阶段切换或发现关键物理约束(坑)时调用,内容将作为物理锚点在后续每轮自动注入,强行锁定推理方向,防止长程任务逻辑漂移。",
|
"description": "更新当前任务的工作记忆。当任务目标细化、阶段切换或发现关键物理约束(坑)时调用,内容(不得超过200 tokens)将作为物理锚点在后续每轮自动注入,强行锁定推理方向,防止长程任务逻辑漂移。",
|
||||||
"parameters": {"type": "object", "properties": {
|
"parameters": {"type": "object", "properties": {
|
||||||
"key_info": {"type": "string", "description": "替换掉当前的极简关键信息(200tokens以内),包含当前进度和目标、已获取的关键信息及下一步必须绕过的坑,已经解决或完成的步骤可以去掉"},
|
"key_info": {"type": "string", "description": "替换掉当前的极简关键信息(<200 tokens),包含当前进度和目标、已获取的关键信息及下一步必须绕过的坑,已经解决或完成的步骤可以去掉"},
|
||||||
"related_sop": {"type": "string", "description": "相关sop名称,可以多个,必要时需要再读"}}}
|
"related_sop": {"type": "string", "description": "相关sop名称,可以多个,必要时需要再读"}}}
|
||||||
}},
|
}},
|
||||||
{"type": "function", "function": {
|
{"type": "function", "function": {
|
||||||
|
|||||||
2
ga.py
2
ga.py
@@ -458,7 +458,7 @@ class GenericAgentHandler(BaseHandler):
|
|||||||
def _get_anchor_prompt(self):
|
def _get_anchor_prompt(self):
|
||||||
h_str = "\n".join(self.history_info[-20:])
|
h_str = "\n".join(self.history_info[-20:])
|
||||||
prompt = f"\n### [WORKING MEMORY]\n<history>\n{h_str}\n</history>"
|
prompt = f"\n### [WORKING MEMORY]\n<history>\n{h_str}\n</history>"
|
||||||
if self.key_info: prompt += f"\n<sop_essentials>{self.key_info}</sop_essentials>"
|
if self.key_info: prompt += f"\n<key_info>{self.key_info}</key_info>"
|
||||||
if self.related_sop: prompt += f"\n有不清晰的地方请再次读取{self.related_sop}"
|
if self.related_sop: prompt += f"\n有不清晰的地方请再次读取{self.related_sop}"
|
||||||
print(prompt)
|
print(prompt)
|
||||||
return prompt
|
return prompt
|
||||||
|
|||||||
@@ -1,11 +1,12 @@
|
|||||||
# coding=utf-8
|
# coding=utf-8
|
||||||
"""
|
"""
|
||||||
|
CRITICAL: 严禁在此工具链中 import pyautogui (会污染 win32api 导致逻辑冲突)。
|
||||||
ljqCtrl Quick Reference:
|
ljqCtrl Quick Reference:
|
||||||
- dpi_scale: float (Physical = Logical / dpi_scale)
|
- dpi_scale: float (Logical = Physical * dpi_scale)
|
||||||
- Click(x, y=None): Click logical/physical coordinates
|
- Click(x, y): Use Physical Coordinates (from screenshots)
|
||||||
- SetCursorPos(z): Move mouse to logical coordinate z=(x, y)
|
- SetCursorPos(z): Use Physical Coordinates z=(x, y)
|
||||||
- Press(cmd, staytime=0): Keyboard shortcuts (e.g. 'ctrl+c')
|
- Press(cmd, staytime=0): Keyboard shortcuts (e.g. 'ctrl+v')
|
||||||
- FindBlock(fn, wrect=None, threshold=0.8) -> (obj_center, is_found)
|
- FindBlock(fn, wrect=None, threshold=0.8) -> (obj_center_phys, is_found)
|
||||||
- MouseDClick(staytime=0.05), MouseClick(staytime=0.05)
|
- MouseDClick(staytime=0.05), MouseClick(staytime=0.05)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|||||||
@@ -12,7 +12,7 @@
|
|||||||
---
|
---
|
||||||
## 记忆层级架构
|
## 记忆层级架构
|
||||||
```
|
```
|
||||||
L1: global_mem_insight.txt (极简索引层 - 严格控制 ≤50 行)
|
L1: global_mem_insight.txt (极简索引层 - 严格控制 ≤30 行)
|
||||||
↓ 导航指向 (Pointer)
|
↓ 导航指向 (Pointer)
|
||||||
L2: global_mem.txt (事实库层 - 现短但会膨胀)
|
L2: global_mem.txt (事实库层 - 现短但会膨胀)
|
||||||
↓ 详细引用 (Reference)
|
↓ 详细引用 (Reference)
|
||||||
|
|||||||
24
sidercall.py
24
sidercall.py
@@ -56,7 +56,7 @@ class GeminiSession:
|
|||||||
return iter([full_text]) if stream else full_text
|
return iter([full_text]) if stream else full_text
|
||||||
|
|
||||||
class LLMSession:
|
class LLMSession:
|
||||||
def __init__(self, api_key=oai_apikey, api_base=oai_apibase, model=oai_model, context_win=12000):
|
def __init__(self, api_key=oai_apikey, api_base=oai_apibase, model=oai_model, context_win=16000):
|
||||||
self.api_key = api_key
|
self.api_key = api_key
|
||||||
self.api_base = api_base
|
self.api_base = api_base
|
||||||
self.raw_msgs = []
|
self.raw_msgs = []
|
||||||
@@ -202,18 +202,18 @@ class ToolClient:
|
|||||||
if tools:
|
if tools:
|
||||||
tools_json = json.dumps(tools, ensure_ascii=False, separators=(',', ':'))
|
tools_json = json.dumps(tools, ensure_ascii=False, separators=(',', ':'))
|
||||||
tool_instruction = f"""
|
tool_instruction = f"""
|
||||||
### 交互协议 (必须严格遵守)
|
### 交互协议 (必须严格遵守,持续有效)
|
||||||
请按照以下步骤思考并行动,标签之间需要回车换行:
|
请按照以下步骤思考并行动,标签之间需要回车换行:
|
||||||
1. **思考**: 在 `<thinking>` 标签中先进行思考,分析现状和策略。
|
1. **思考**: 在 `<thinking>` 标签中先进行思考,分析现状和策略。
|
||||||
2. **总结**: 在 `<summary>` 中输出*极为简短*的高度概括的单行(<30字)物理快照,包括上次工具调用结果获取的新信息+本次工具调用意图和预期。此内容将进入长期工作记忆,记录关键信息,严禁输出无实际信息增量的描述。
|
2. **总结**: 在 `<summary>` 中输出*极为简短*的高度概括的单行(<30字)物理快照,包括上次工具调用结果获取的新信息+本次工具调用意图和预期。此内容将进入长期工作记忆,记录关键信息,严禁输出无实际信息增量的描述。
|
||||||
3. **行动**: 如果需要调用工具,请在回复正文之后输出一个 **<tool_use>块**,然后结束,我会稍后给你返回<tool_result>块。
|
3. **行动**: 如果需要调用工具,请在回复正文之后输出一个 **<tool_use>块**,然后结束,我会稍后给你返回<tool_result>块。
|
||||||
格式: ```<tool_use>\n{{"name": "工具名", "arguments": {{参数}}}}\n</tool_use>\n```
|
格式: ```<tool_use>\n{{"name": "工具名", "arguments": {{参数}}}}\n</tool_use>\n```
|
||||||
|
|
||||||
### 可用工具库
|
### 可用工具库(已挂载,持续有效)
|
||||||
{tools_json}
|
{tools_json}
|
||||||
"""
|
"""
|
||||||
if self.auto_save_tokens and self.last_tools == tools_json:
|
if self.auto_save_tokens and self.last_tools == tools_json:
|
||||||
tool_instruction = "\n### 交互协议保持不变,沿用之前的协议和工具库。\n"
|
tool_instruction = "\n### 工具库状态:持续有效(code_run/file_read等),**可正常调用**。调用协议沿用。\n"
|
||||||
else:
|
else:
|
||||||
self.total_cd_tokens = 0
|
self.total_cd_tokens = 0
|
||||||
self.last_tools = tools_json
|
self.last_tools = tools_json
|
||||||
@@ -255,7 +255,11 @@ class ToolClient:
|
|||||||
if json_str == '' and '```' in weaktoolstr and weaktoolstr.split('```')[0].strip().endswith('}'):
|
if json_str == '' and '```' in weaktoolstr and weaktoolstr.split('```')[0].strip().endswith('}'):
|
||||||
json_str = weaktoolstr.split('```')[0].strip()
|
json_str = weaktoolstr.split('```')[0].strip()
|
||||||
remaining_text = remaining_text.replace('<tool_use>'+weaktoolstr, "")
|
remaining_text = remaining_text.replace('<tool_use>'+weaktoolstr, "")
|
||||||
|
elif '"name":' in remaining_text and '"arguments":' in remaining_text:
|
||||||
|
json_match = re.search(r"(\{.*\"name\":.*?\})", remaining_text, re.DOTALL | re.MULTILINE)
|
||||||
|
if json_match:
|
||||||
|
json_str = json_match.group(1).strip()
|
||||||
|
remaining_text = remaining_text.replace(json_str, "").strip()
|
||||||
if json_str:
|
if json_str:
|
||||||
try:
|
try:
|
||||||
data = tryparse(json_str)
|
data = tryparse(json_str)
|
||||||
@@ -275,8 +279,14 @@ class ToolClient:
|
|||||||
|
|
||||||
def tryparse(json_str):
|
def tryparse(json_str):
|
||||||
try: return json.loads(json_str)
|
try: return json.loads(json_str)
|
||||||
except:
|
except: pass
|
||||||
return json.loads(json_str[:-1])
|
json_str = json_str.strip().strip('`').replace('json\n', '', 1).strip()
|
||||||
|
try: return json.loads(json_str)
|
||||||
|
except: pass
|
||||||
|
try: return json.loads(json_str[:-1])
|
||||||
|
except: pass
|
||||||
|
if '}' in json_str: json_str = json_str[:json_str.rfind('}') + 1]
|
||||||
|
return json.loads(json_str)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import sys, os
|
import sys, os
|
||||||
|
|||||||
Reference in New Issue
Block a user