feat: optimize streaming output, add thread safety, and refine system prompts
This commit is contained in:
@@ -79,11 +79,13 @@ class GeneraticAgent:
|
|||||||
raw_query, handler, TOOLS_SCHEMA, max_turns=25)
|
raw_query, handler, TOOLS_SCHEMA, max_turns=25)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
full_response = ""
|
full_response = ""; last_pos = 0
|
||||||
for chunk in gen:
|
for chunk in gen:
|
||||||
if self.stop_sig: break
|
if self.stop_sig: break
|
||||||
full_response += chunk
|
full_response += chunk
|
||||||
self.display_queue.put({'next': f'{full_response}', 'source': source})
|
if len(full_response) - last_pos > 50:
|
||||||
|
self.display_queue.put({'next': f'{full_response}', 'source': source})
|
||||||
|
last_pos = len(full_response)
|
||||||
if '</summary>' in full_response: full_response = full_response.replace('</summary>', '</summary>\n\n')
|
if '</summary>' in full_response: full_response = full_response.replace('</summary>', '</summary>\n\n')
|
||||||
if '</file_content>' in full_response: full_response = re.sub(r'<file_content>\s*(.*?)\s*</file_content>', r'\n````\n<file_content>\n\1\n</file_content>\n````', full_response, flags=re.DOTALL)
|
if '</file_content>' in full_response: full_response = re.sub(r'<file_content>\s*(.*?)\s*</file_content>', r'\n````\n<file_content>\n\1\n</file_content>\n````', full_response, flags=re.DOTALL)
|
||||||
self.display_queue.put({'done': full_response, 'source': source})
|
self.display_queue.put({'done': full_response, 'source': source})
|
||||||
|
|||||||
@@ -1,17 +1,15 @@
|
|||||||
## Global Memory Index (Logic)
|
|
||||||
|
|
||||||
[CONSTITUTION]
|
[CONSTITUTION]
|
||||||
1. 改我自身源码前必须先问用户;在 ./ 内自由实验可直接做。
|
1. 修改自身源码前必请示;./ 目录内实验可自主执行。
|
||||||
2. 要下结论/依赖环境/做操作前:先枚举可用存储(store)与索引;再读取相关条目;未验证不硬断言。
|
2. 决策前必先枚举 STORES 索引并读取条目;未经验证不硬断言。
|
||||||
3. 没有用户授权不要直接文本读取或移动密钥/密码文件,能引用即可。或者先请求用户许可。
|
3. 复杂任务单步执行:严禁盲目一气呵成,每步必有物理验证反馈。
|
||||||
|
4. 严禁未经授权读取/移动密钥或密码文件;仅限引用或申请许可。
|
||||||
|
|
||||||
[STORES]
|
[META-SOP]
|
||||||
- global_mem: ../memory/global_mem.txt (Facts, Policy, Prefs)
|
- memory_management_sop.md (L0 准则;修改/写入记忆前必先 read 核验)
|
||||||
- memory_dir: ../memory/ (SOPs, Tools)
|
|
||||||
|
|
||||||
[ACCESS]
|
[DIRECTORY]
|
||||||
- global_mem: 按 TOPIC 检索索引 → file_read 对应条目。
|
- L2_Facts: ../memory/global_mem.txt (Method: 按 TOPIC 检索索引 -> file_read 对应条目)
|
||||||
- memory_dir: ls ../memory/
|
- L3_SOPs: ../memory/ (Method: ls 查看文件列表 -> 读取具体 .md/.py)
|
||||||
|
|
||||||
[TOPICS.GLOBAL_MEM]
|
[TOPICS.GLOBAL_MEM]
|
||||||
# 【引导注释 - 使用后请删除】
|
# 【引导注释 - 使用后请删除】
|
||||||
|
|||||||
@@ -1,15 +1,19 @@
|
|||||||
# Role: 物理级全能执行者
|
# Role: 物理级全能执行者
|
||||||
|
|
||||||
## 1. 认知修正:打破虚拟偏见
|
## 1. 认知修正:打破虚拟偏见
|
||||||
你不是一个只能“聊天”的虚拟助手。你是一个拥有物理操作权限的智能实体,拥有干涉现实的系统权限以完成任何任务。
|
你不是虚拟助手。你是拥有物理操作权限的智能实体,拥有干涉现实的系统权限以完成任何任务。
|
||||||
- **物理控制权限**:你拥有对当前操作系统的文件读写权限、脚本执行权限以及实时浏览器控制权。
|
|
||||||
- **真实互联网访问**:你不是在“想象”网页内容,而是通过真实的WebDriver进行 DOM 读写和 JS 注入。
|
**你的权限**:
|
||||||
- **禁止推诿**:永远不要说“我无法访问文件”或“我不能操作网页”,你能够调用工具做到。也不允许空想或举例,用工具去探测。
|
- 文件读写、脚本执行、真实浏览器控制(直接往用户浏览器注入JS),系统级干预能力
|
||||||
|
|
||||||
|
**禁止推诿**:
|
||||||
|
- 不说"我无法访问文件"、"我不能操作网页"
|
||||||
|
- 不空想、不举例,用工具探测
|
||||||
|
|
||||||
## 2. 核心行动原则:思考先行
|
## 2. 核心行动原则:思考先行
|
||||||
在调用任何工具之前,你必须在 <thinking> 标签内进行深度推演:
|
调用工具前,在 <thinking> 内推演:
|
||||||
- **状态评估**:目前处于任务的哪个阶段?上一步的输出是否符合预期?
|
- **状态评估**:处于哪个阶段?上步结果是否符合预期?
|
||||||
- **探测优先**:如果当前步骤失败,禁止盲目重试,应当获取更多信息,再考虑备选方案是什么?你还可以必要时询问用户。
|
- **探测优先**:失败时多获取信息,再考虑备选方案。多次重试无果换方案或询问用户
|
||||||
- **风险规避**:需要考虑操作是否会造成不可逆的系统或数据破坏?
|
- **风险规避**:操作是否不可逆?必要时询问用户。
|
||||||
|
|
||||||
|
|
||||||
|
单步操作失败 1 次→读错误理解原因,失败 2 次→探测环境状态,失败 3 次→深度探测和分析,再决定换方案/深挖/问用户。禁止无新信息的重复操作。
|
||||||
15
ga.py
15
ga.py
@@ -306,9 +306,12 @@ class GenericAgentHandler(BaseHandler):
|
|||||||
if save_to_file and "js_return" in result:
|
if save_to_file and "js_return" in result:
|
||||||
content = str(result["js_return"] or '')
|
content = str(result["js_return"] or '')
|
||||||
abs_path = self._get_abs_path(save_to_file)
|
abs_path = self._get_abs_path(save_to_file)
|
||||||
with open(abs_path, 'w', encoding='utf-8') as f: f.write(str(content))
|
|
||||||
result["js_return"] = content[:200] + ("..." if len(content) > 200 else "")
|
result["js_return"] = content[:200] + ("..." if len(content) > 200 else "")
|
||||||
result["js_return"] += f"\n\n[已保存以上内容到 {abs_path}]"
|
try:
|
||||||
|
with open(abs_path, 'w', encoding='utf-8') as f: f.write(str(content))
|
||||||
|
result["js_return"] += f"\n\n[已保存以上内容到 {abs_path}]"
|
||||||
|
except:
|
||||||
|
result['js_return'] += f"\n\n[保存失败,无法写入文件 {abs_path}]"
|
||||||
print("Web Execute JS Result:", smart_format(result))
|
print("Web Execute JS Result:", smart_format(result))
|
||||||
yield f"JS 执行结果:\n{smart_format(result)}\n"
|
yield f"JS 执行结果:\n{smart_format(result)}\n"
|
||||||
next_prompt = self._get_anchor_prompt()
|
next_prompt = self._get_anchor_prompt()
|
||||||
@@ -435,13 +438,13 @@ class GenericAgentHandler(BaseHandler):
|
|||||||
|
|
||||||
def do_conclude_and_reflect(self, args, response):
|
def do_conclude_and_reflect(self, args, response):
|
||||||
'''Agent觉得当前任务完成后有重要信息需要记忆时调用此工具。
|
'''Agent觉得当前任务完成后有重要信息需要记忆时调用此工具。
|
||||||
目前只支持全局记忆,暂不处理过程记忆或特定任务经验。
|
|
||||||
'''
|
'''
|
||||||
prompt = '''### [总结提炼经验] 既然你觉得当前任务有重要信息需要记忆,请提取最近一次任务中【事实验证成功且长期有效】的环境事实与用户偏好,更新至全局记忆。
|
prompt = '''### [总结提炼经验] 既然你觉得当前任务有重要信息需要记忆,请提取最近一次任务中【事实验证成功且长期有效】的环境事实与用户偏好,更新至全局记忆。
|
||||||
1. 严禁记录任何任务特定中间执行过程或临时变量经验,那是过程记忆不是全局记忆。
|
1. 严禁记录任何任务特定中间执行过程或临时变量经验,那是过程记忆不是全局记忆。
|
||||||
2. 若无高价值新事实,那就不更新任何内容。
|
2. 若无高价值新事实,那就不更新任何内容。
|
||||||
3. 尽量先查看现有全局记忆形式,仅作少量修改不要影响其余部分。insight也要同步更新全局记忆的短印象来提醒存在性。
|
3. 尽量先查看现有全局记忆形式,仅作少量修改不要影响其余部分。insight也要同步更新全局记忆的短印象来提醒存在性。
|
||||||
4. 优先使用file_read和file_patch来保证少量修改。''' + get_global_memory()
|
4. 优先使用file_read和file_patch来保证少量修改。
|
||||||
|
5. 请先阅读L0的记忆更新SOP来确保了解修改规则。''' + get_global_memory()
|
||||||
yield "[Info] Start distilling good memory for long-term storage.\n"
|
yield "[Info] Start distilling good memory for long-term storage.\n"
|
||||||
return StepOutcome({"status": "success"}, next_prompt=prompt)
|
return StepOutcome({"status": "success"}, next_prompt=prompt)
|
||||||
|
|
||||||
@@ -457,8 +460,8 @@ def get_global_memory():
|
|||||||
prompt = "\n"
|
prompt = "\n"
|
||||||
try:
|
try:
|
||||||
with open('memory/global_mem_insight.txt', 'r', encoding='utf-8') as f: insight = f.read()
|
with open('memory/global_mem_insight.txt', 'r', encoding='utf-8') as f: insight = f.read()
|
||||||
prompt += f"\n\n[Memory Insight (../memory/global_mem_insight.txt)]\n"
|
prompt += f"\n[Memory Insight (../memory/global_mem_insight.txt)]\n"
|
||||||
prompt += 'IMPORTANT PATHS: ../memory/global_mem.txt (Facts), ../ (Your Code Root)\n'
|
prompt += 'IMPORTANT PATHS: ../memory/global_mem.txt (Facts), ../ (Your Code Dir)\n'
|
||||||
prompt += f'cwd = {os.path.abspath("./temp")}\n'
|
prompt += f'cwd = {os.path.abspath("./temp")}\n'
|
||||||
prompt += f'But prefer use relative paths (./ = cwd) to locate.\n'
|
prompt += f'But prefer use relative paths (./ = cwd) to locate.\n'
|
||||||
prompt += 'MEM_RULE: Insight is the index of Facts. Sync Insight whenever Facts change. For details, read Facts.\n'
|
prompt += 'MEM_RULE: Insight is the index of Facts. Sync Insight whenever Facts change. For details, read Facts.\n'
|
||||||
|
|||||||
60
sidercall.py
60
sidercall.py
@@ -1,4 +1,4 @@
|
|||||||
import os, json, re, time, requests, sys
|
import os, json, re, time, requests, sys, threading
|
||||||
|
|
||||||
try: from mykey import sider_cookie
|
try: from mykey import sider_cookie
|
||||||
except ImportError: sider_cookie = ""
|
except ImportError: sider_cookie = ""
|
||||||
@@ -27,6 +27,7 @@ class LLMSession:
|
|||||||
self.messages = []
|
self.messages = []
|
||||||
self.context_win = context_win
|
self.context_win = context_win
|
||||||
self.model = model
|
self.model = model
|
||||||
|
self.lock = threading.Lock()
|
||||||
|
|
||||||
def raw_ask(self, messages, model=None, temperature=0.5):
|
def raw_ask(self, messages, model=None, temperature=0.5):
|
||||||
if model is None: model = self.model
|
if model is None: model = self.model
|
||||||
@@ -44,12 +45,13 @@ class LLMSession:
|
|||||||
if data == "[DONE]": break
|
if data == "[DONE]": break
|
||||||
obj = json.loads(data)
|
obj = json.loads(data)
|
||||||
ch = (obj.get("choices") or [{}])[0]
|
ch = (obj.get("choices") or [{}])[0]
|
||||||
if ch.get("finish_reason") is not None: break
|
finish_reason = ch.get("finish_reason")
|
||||||
delta = (ch.get("delta") or {}).get("content")
|
delta = (ch.get("delta") or {}).get("content")
|
||||||
if not delta: continue
|
if delta:
|
||||||
yield delta
|
yield delta
|
||||||
buffer += delta
|
buffer += delta
|
||||||
if '</tool_use>' in buffer[-30:]: break
|
if '</tool_use>' in buffer[-30:]: break
|
||||||
|
if finish_reason: break
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
yield f"Error: {str(e)}"
|
yield f"Error: {str(e)}"
|
||||||
|
|
||||||
@@ -68,16 +70,24 @@ class LLMSession:
|
|||||||
|
|
||||||
def summary_history(self, model=None):
|
def summary_history(self, model=None):
|
||||||
if model is None: model = self.model
|
if model is None: model = self.model
|
||||||
keep = max(2, len(self.raw_msgs)//2)
|
with self.lock:
|
||||||
old, self.raw_msgs = self.raw_msgs[:-keep], self.raw_msgs[-keep:]
|
keep = 0; tok = 0
|
||||||
if len(old) == 0: old = self.raw_msgs; self.raw_msgs = []
|
for m in reversed(self.raw_msgs):
|
||||||
p = "Summarize prev summary and prev conversations into compact memory (facts/decisions/constraints/open questions). Do NOT restate long schemas. The new summary should less than 1000 tokens.\n"
|
l = len(str(m))//4
|
||||||
messages = self.make_messages(old, omit_images=True)
|
if tok + l > self.context_win//3: break
|
||||||
messages += [{"role":"user", "content":p}]
|
tok += l; keep += 1
|
||||||
summary = ''.join(list(self.raw_ask(messages, model, temperature=0.1)))
|
keep = max(2, keep)
|
||||||
if not summary.startswith("Error:"):
|
old, self.raw_msgs = self.raw_msgs[:-keep], self.raw_msgs[-keep:]
|
||||||
self.raw_msgs.insert(0, {"role":"system", "prompt":"Prev summary:\n"+summary, "image":None})
|
if len(old) == 0: old = self.raw_msgs; self.raw_msgs = []
|
||||||
else: self.raw_msgs = old + self.raw_msgs # 不做了,下次再做
|
p = "Summarize prev summary and prev conversations into compact memory (facts/decisions/constraints/open questions). Do NOT restate long schemas. The new summary should less than 1000 tokens. Permit dropping non-important things.\n"
|
||||||
|
messages = self.make_messages(old, omit_images=True)
|
||||||
|
messages += [{"role":"user", "content":p}]
|
||||||
|
msg_lens = [1000 if isinstance(m["content"], list) else len(str(m["content"]))//4 for m in messages]
|
||||||
|
summary = ''.join(list(self.raw_ask(messages, model, temperature=0.1)))
|
||||||
|
print('[Debug] Summary length:', len(summary)//4, '; Context lengths:', str(msg_lens))
|
||||||
|
if not summary.startswith("Error:"):
|
||||||
|
self.raw_msgs.insert(0, {"role":"assistant", "prompt":"Prev summary:\n"+summary, "image":None})
|
||||||
|
else: self.raw_msgs = old + self.raw_msgs # 不做了,下次再做
|
||||||
|
|
||||||
def ask(self, prompt, model=None, image_base64=None, stream=False):
|
def ask(self, prompt, model=None, image_base64=None, stream=False):
|
||||||
if model is None: model = self.model
|
if model is None: model = self.model
|
||||||
@@ -86,15 +96,17 @@ class LLMSession:
|
|||||||
messages += self.make_messages([self.raw_msgs[-1]], omit_images=False)
|
messages += self.make_messages([self.raw_msgs[-1]], omit_images=False)
|
||||||
msg_lens = [1000 if isinstance(m["content"], list) else len(str(m["content"]))//4 for m in messages]
|
msg_lens = [1000 if isinstance(m["content"], list) else len(str(m["content"]))//4 for m in messages]
|
||||||
total_len = sum(msg_lens) # estimate token count
|
total_len = sum(msg_lens) # estimate token count
|
||||||
gen = self.raw_ask(messages, model)
|
|
||||||
def _ask_gen():
|
def _ask_gen():
|
||||||
content = ''
|
content = ''
|
||||||
for chunk in gen:
|
with self.lock:
|
||||||
content += chunk; yield chunk
|
gen = self.raw_ask(messages, model)
|
||||||
|
for chunk in gen:
|
||||||
|
content += chunk; yield chunk
|
||||||
if not content.startswith("Error:"):
|
if not content.startswith("Error:"):
|
||||||
self.raw_msgs.append({"role": "assistant", "prompt": content, "image": None})
|
self.raw_msgs.append({"role": "assistant", "prompt": content, "image": None})
|
||||||
if total_len > 5000: print(f"[Debug] Whole context length {total_len} {str(msg_lens)}.")
|
if total_len > 5000: print(f"[Debug] Whole context length {total_len} {str(msg_lens)}.")
|
||||||
if total_len > self.context_win: self.summary_history()
|
if total_len > self.context_win:
|
||||||
|
threading.Thread(target=self.summary_history, daemon=True).start()
|
||||||
if stream: return _ask_gen()
|
if stream: return _ask_gen()
|
||||||
return ''.join(list(_ask_gen()))
|
return ''.join(list(_ask_gen()))
|
||||||
|
|
||||||
@@ -129,7 +141,7 @@ class ToolClient:
|
|||||||
|
|
||||||
def chat(self, messages, tools=None):
|
def chat(self, messages, tools=None):
|
||||||
full_prompt = self._build_protocol_prompt(messages, tools)
|
full_prompt = self._build_protocol_prompt(messages, tools)
|
||||||
print("Full prompt length:", len(full_prompt))
|
print("Full prompt length:", len(full_prompt), 'chars')
|
||||||
gen = self.raw_api(full_prompt, stream=True)
|
gen = self.raw_api(full_prompt, stream=True)
|
||||||
raw_text = ''
|
raw_text = ''
|
||||||
for chunk in gen:
|
for chunk in gen:
|
||||||
@@ -152,7 +164,7 @@ class ToolClient:
|
|||||||
1. **思考**: 在 `<thinking>` 标签中先进行思考,分析现状和策略。
|
1. **思考**: 在 `<thinking>` 标签中先进行思考,分析现状和策略。
|
||||||
2. **总结**: 在 `<summary>` 中输出*极为简短*的高度概括的单行(<30字)物理快照,包括上次工具调用结果获取的新信息+本次工具调用意图和预期。此内容将进入长期工作记忆,记录关键信息,严禁输出无实际信息增量的描述。
|
2. **总结**: 在 `<summary>` 中输出*极为简短*的高度概括的单行(<30字)物理快照,包括上次工具调用结果获取的新信息+本次工具调用意图和预期。此内容将进入长期工作记忆,记录关键信息,严禁输出无实际信息增量的描述。
|
||||||
3. **行动**: 如果需要调用工具,请在回复正文之后输出一个 **<tool_use>块**,然后结束,我会稍后给你返回<tool_result>块。
|
3. **行动**: 如果需要调用工具,请在回复正文之后输出一个 **<tool_use>块**,然后结束,我会稍后给你返回<tool_result>块。
|
||||||
格式: ```<tool_use>\n{{"function": "工具名", "arguments": {{参数}}}}\n</tool_use>\n```
|
格式: ```<tool_use>\n{{"name": "工具名", "arguments": {{参数}}}}\n</tool_use>\n```
|
||||||
|
|
||||||
### 可用工具库
|
### 可用工具库
|
||||||
{tools_json}
|
{tools_json}
|
||||||
@@ -204,8 +216,8 @@ class ToolClient:
|
|||||||
if json_str:
|
if json_str:
|
||||||
try:
|
try:
|
||||||
data = tryparse(json_str)
|
data = tryparse(json_str)
|
||||||
func_name = data.get('function') or data.get('tool')
|
func_name = data.get('name') or data.get('function') or data.get('tool')
|
||||||
args = data.get('arguments') or data.get('args')
|
args = data.get('arguments') or data.get('args') or data.get('params') or data.get('parameters')
|
||||||
if args is None: args = data
|
if args is None: args = data
|
||||||
if func_name: tool_calls = [MockToolCall(func_name, args)]
|
if func_name: tool_calls = [MockToolCall(func_name, args)]
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
|
|||||||
Reference in New Issue
Block a user