feat: upgrade architecture, add idle monitor, and include new assets

This commit is contained in:
Jiaqing Liang
2026-02-09 13:24:08 +08:00
parent 370654fed0
commit 3f1fad8ab9
6 changed files with 117 additions and 56 deletions

View File

@@ -1,17 +1,3 @@
[CONSTITUTION]
1. 修改自身源码前必请示;./ 目录内实验可自主执行允许安装py包和命令行工具。
2. 做决策前必先查记忆库;未查证的事实不得断言。
3. 复杂任务需分步执行并每步验证:严禁盲目一气呵成。三次重试失败必须请求干预。
4. 严禁未经授权读取/移动密钥或密码文件;仅限引用或申请许可。
5. 修改/写入任何记忆前必先读取META-SOP核验。
[META-SOP (L0)]
- ../memory/memory_management_sop.md
[DIRECTORY]
- L2_Facts: ../memory/global_mem.txt (Method: 按 TOPIC 检索索引 -> file_read 对应条目)
- L3_SOPs: ../memory/ (Method: ls 查看文件列表 -> 读取具体 .md/.py)
[TOPICS.GLOBAL_MEM] [TOPICS.GLOBAL_MEM]
# 【引导注释 - 使用后请删除】 # 【引导注释 - 使用后请删除】
# 请模仿以下概括性 Tag 格式记录 global_mem.txt 中的事实: # 请模仿以下概括性 Tag 格式记录 global_mem.txt 中的事实:

View File

@@ -0,0 +1,13 @@
[CONSTITUTION]
1. 修改自身源码前必请示;./ 目录内实验可自主执行允许安装py包和命令行工具。
2. 做决策前必先查记忆库;未查证的事实不得断言。
3. 复杂任务需分步执行并每步验证:严禁盲目一气呵成。三次重试失败必须请求干预。
4. 严禁未经授权读取/移动密钥或密码文件;仅限引用或申请许可。
5. 修改/写入任何记忆前必先读取META-SOP核验。
[META-SOP (L0)]
- ../memory/memory_management_sop.md
[DIRECTORY]
- L2_Facts: ../memory/global_mem.txt (Method: 按 TOPIC 检索索引 -> file_read 对应条目)
- L3_SOPs: ../memory/ (Method: ls 查看文件列表 -> 读取具体 .md/.py)

View File

@@ -30,7 +30,7 @@
"description": "用于文件的新建、全量覆盖或追加写入。对于精细的代码修改,应优先使用 file_patch。注意要写入的内容必须放在回复正文的 <file_content> 标签或代码块中。", "description": "用于文件的新建、全量覆盖或追加写入。对于精细的代码修改,应优先使用 file_patch。注意要写入的内容必须放在回复正文的 <file_content> 标签或代码块中。",
"parameters": {"type": "object", "properties": { "parameters": {"type": "object", "properties": {
"path": {"type": "string", "description": "文件路径。"}, "path": {"type": "string", "description": "文件路径。"},
"mode": {"type": "string", "enum": ["overwrite", "append"], "description": "写入模式overwrite覆盖默认或 append追加。", "default": "overwrite"}}, "required": ["path"]} "mode": {"type": "string", "enum": ["overwrite", "append", "prepend"], "description": "写入模式覆盖、追加或在开头追加。", "default": "overwrite"}}, "required": ["path"]}
}}, }},
{"type": "function", "function": { {"type": "function", "function": {
"name": "web_scan", "name": "web_scan",
@@ -47,11 +47,11 @@
"save_to_file": {"type": "string", "description": "可选。将 JS 执行结果js_return保存到的文件路径。注意该功能不支持 await 等异步结果。"}}, "required": ["script"]} "save_to_file": {"type": "string", "description": "可选。将 JS 执行结果js_return保存到的文件路径。注意该功能不支持 await 等异步结果。"}}, "required": ["script"]}
}}, }},
{"type": "function", "function": { {"type": "function", "function": {
"name": "update_plan", "name": "update_sop_plan",
"description": "更新任务的宏观计划和当前战略重心。仅在初始拆解多步任务或发生重大方案调整时使用。禁止用于记录细微调试步骤或纠错。", "description": "【在读取SOP后需要调用】准备开始执行此SOP。提取容易遗忘但关键的内容生成极简检查清单100tokens以内后续每轮自动注入context。特别是靠后的内容。",
"parameters": {"type": "object", "properties": { "parameters": {"type": "object", "properties": {
"plan": {"type": "string", "description": "完整的宏观任务路线图。"}, "keys": {"type": "string", "description": "每轮自动注入的必须注意的文本,可以是约束规则/执行步骤/需要注意的坑"},
"focus": {"type": "string", "description": "当前阶段的工作重点。"}}} "sop_path": {"type": "string", "description": "sop路径必要时可以再读"}}}
}}, }},
{"type": "function", "function": { {"type": "function", "function": {
"name": "ask_user", "name": "ask_user",
@@ -62,7 +62,7 @@
}}, }},
{"type": "function", "function": { {"type": "function", "function": {
"name": "conclude_and_reflect", "name": "conclude_and_reflect",
"description": "当模型认为当前任务(非处理记忆)执行完美,且有具有长期价值的环境事实用户偏好需要提炼并存入全局记忆时,调用此工具。注意:此工具调用即代表触发记忆提炼流程,如果已经在记忆提炼流程无需调用。", "description": "用户的任务完成后,若发现值得长期记忆的信息(环境事实/用户偏好/避坑经验),调用此工具触发记忆提炼。不允许在记忆更新或自主流程内调用。",
"parameters": {"type": "object", "properties": {}}} "parameters": {"type": "object", "properties": {}}}
} }
] ]

54
ga.py
View File

@@ -241,8 +241,8 @@ class GenericAgentHandler(BaseHandler):
''' '''
def __init__(self, parent, last_history=None, cwd='./'): def __init__(self, parent, last_history=None, cwd='./'):
self.parent = parent self.parent = parent
self.plan = "" self.sop_keys = ""
self.focus = "" self.sop_path = ""
self.cwd = cwd self.cwd = cwd
self.history_info = last_history if last_history else [] self.history_info = last_history if last_history else []
self.code_stop_signal = [] self.code_stop_signal = []
@@ -336,8 +336,8 @@ class GenericAgentHandler(BaseHandler):
需要将要写入的内容放在<file_content>标签内,或者放在代码块中。 需要将要写入的内容放在<file_content>标签内,或者放在代码块中。
''' '''
path = self._get_abs_path(args.get("path", "")) path = self._get_abs_path(args.get("path", ""))
mode = args.get("mode", "overwrite") mode = args.get("mode", "overwrite") # overwrite/append/prepend
action_str = "Appending to" if mode == "append" else "Writing" action_str = {"prepend": "Prepending to", "append": "Appending to"}.get(mode, "Overwriting")
yield f"[Action] {action_str} file: {os.path.basename(path)}\n" yield f"[Action] {action_str} file: {os.path.basename(path)}\n"
def extract_robust_content(text): def extract_robust_content(text):
@@ -353,10 +353,12 @@ class GenericAgentHandler(BaseHandler):
return StepOutcome({"status": "error", "msg": "No content found, if you want a blank, you should use code_run"}, next_prompt="\n") return StepOutcome({"status": "error", "msg": "No content found, if you want a blank, you should use code_run"}, next_prompt="\n")
new_content = blocks new_content = blocks
try: try:
write_mode = 'a' if mode == "append" else 'w' if mode == "prepend":
final_content = ("\n" + new_content) if mode == "append" else new_content old = open(path, 'r', encoding="utf-8").read() if os.path.exists(path) else ""
with open(path, write_mode, encoding="utf-8") as f: open(path, 'w', encoding="utf-8").write(new_content + old)
f.write(final_content) else:
with open(path, 'a' if mode == "append" else 'w', encoding="utf-8") as f:
f.write(new_content)
yield f"[Status] ✅ {mode.capitalize()} 成功 ({len(new_content)} bytes)\n" yield f"[Status] ✅ {mode.capitalize()} 成功 ({len(new_content)} bytes)\n"
next_prompt = self._get_anchor_prompt() next_prompt = self._get_anchor_prompt()
return StepOutcome({"status": "success", 'writed_bytes': len(new_content)}, return StepOutcome({"status": "success", 'writed_bytes': len(new_content)},
@@ -379,24 +381,20 @@ class GenericAgentHandler(BaseHandler):
tips = '由于设置了show_linenos以下返回信息为(行号|)内容 。\n' tips = '由于设置了show_linenos以下返回信息为(行号|)内容 。\n'
result = tips + result result = tips + result
next_prompt = self._get_anchor_prompt() next_prompt = self._get_anchor_prompt()
if 'memory' in path or 'sop' in path:
next_prompt += "\nPROTOCOL: 你正在读取记忆或SOP文件若决定按sop执行请先调用准备执行相关工具提取sop中的重点内容特别是靠后的进入工作记忆。"
return StepOutcome(result, next_prompt=next_prompt) return StepOutcome(result, next_prompt=next_prompt)
def do_update_plan(self, args, response): def do_update_sop_plan(self, args, response):
'''读取完sop后为整个任务设定后续需要临时记忆的重点。
''' '''
同步宏观任务进度与战略重心。 sop_keys = args.get("keys", "")
【设计意图】: sop_path = args.get("sop_path", "")
1. 仅在任务涉及多步逻辑(如:先搜索、再重构、后测试)时进行初始拆解。 if sop_keys: self.sop_keys = sop_keys
2. 仅在发生重大的方针变更时调用(例如:原定方案 A 物理不可行,需彻底转向方案 B if sop_path: self.sop_path = sop_path
3. 严禁用于记录细微的调试步骤或代码纠错。 yield f"[Info] Updated sop_keys and sop_path.\n"
简单任务无需使用。 yield f"sop_keys:\n{self.sop_keys}\n\n"
''' yield f"sop_path:\n{self.sop_path}\n\n"
new_plan = args.get("plan", "")
new_focus = args.get("focus", "")
if new_plan: self.plan = new_plan
if new_focus: self.focus = new_focus
yield f"[Info] Updated plan and focus.\n"
yield f"New Plan:\n{self.plan}\n\n"
yield f"New Focus:\n{self.focus}\n"
next_prompt = self._get_anchor_prompt() next_prompt = self._get_anchor_prompt()
return StepOutcome({"status": "success"}, next_prompt=next_prompt) return StepOutcome({"status": "success"}, next_prompt=next_prompt)
@@ -460,21 +458,23 @@ class GenericAgentHandler(BaseHandler):
def _get_anchor_prompt(self): def _get_anchor_prompt(self):
h_str = "\n".join(self.history_info[-20:]) h_str = "\n".join(self.history_info[-20:])
prompt = f"\n### [WORKING MEMORY]\n<history>\n{h_str}\n</history>" prompt = f"\n### [WORKING MEMORY]\n<history>\n{h_str}\n</history>"
if self.sop_keys: prompt += f"\n<sop_essentials>{self.sop_keys}</sop_essentials>"
if self.sop_path: prompt += f"\n有不清晰的地方请再次读取{self.sop_path}"
print(prompt) print(prompt)
if self.plan: prompt += f"\n<plan>{self.plan}</plan>"
if self.focus: prompt += f"\n<focus>{self.focus}</focus>"
return prompt return prompt
def get_global_memory(): def get_global_memory():
prompt = "\n" prompt = "\n"
try: try:
with open('memory/global_mem_insight.txt', 'r', encoding='utf-8') as f: insight = f.read() with open('memory/global_mem_insight.txt', 'r', encoding='utf-8') as f: insight = f.read()
prompt += f"\n[Memory Insight (../memory/global_mem_insight.txt)]\n" with open('assets/insight_fixed_structure.txt', 'r', encoding='utf-8') as f: structure = f.read()
prompt += f"\n[Memory]\n"
prompt += 'IMPORTANT PATHS: ../memory/global_mem.txt (Facts), ../ (Your Code Dir)\n' prompt += 'IMPORTANT PATHS: ../memory/global_mem.txt (Facts), ../ (Your Code Dir)\n'
prompt += f'cwd = {os.path.abspath("./temp")}\n' prompt += f'cwd = {os.path.abspath("./temp")}\n'
prompt += f'But prefer use relative paths (./ = cwd) to locate.\n' prompt += f'But prefer use relative paths (./ = cwd) to locate.\n'
prompt += 'MEM_RULE: Insight is the index of Facts. Sync Insight whenever Facts change. For details, read Facts.\n' prompt += 'MEM_RULE: Insight is the index. Sync Insight whenever Facts change. For details, read Facts.\n'
prompt += "EXT: ../memory/ may contain other task-specific memories.\n" prompt += "EXT: ../memory/ may contain other task-specific memories.\n"
prompt += structure + '\nglobal_mem_insight.txt:\n'
prompt += insight + "\n" prompt += insight + "\n"
except FileNotFoundError: pass except FileNotFoundError: pass
return prompt return prompt

View File

@@ -28,21 +28,58 @@ def start_streamlit(port):
atexit.register(proc.kill) atexit.register(proc.kill)
def inject(text): def inject(text):
"""注入输入到 Streamlit"""
window.evaluate_js(f""" window.evaluate_js(f"""
const input = document.querySelector('input[data-testid="stChatInputTextInput"]'); const textarea = document.querySelector('textarea[data-testid="stChatInputTextArea"]');
if (input) {{ if (textarea) {{
input.value = {repr(text)}; // 1. 用原生 setter 设置值(绕过 React
input.dispatchEvent(new Event('input', {{bubbles: true}})); const nativeTextAreaValueSetter = Object.getOwnPropertyDescriptor(
input.dispatchEvent(new KeyboardEvent('keydown', {{key: 'Enter', keyCode: 13, bubbles: true}})); window.HTMLTextAreaElement.prototype, 'value'
).set;
nativeTextAreaValueSetter.call(textarea, {repr(text)});
// 2. 触发 React 的 input 事件
textarea.dispatchEvent(new Event('input', {{ bubbles: true }}));
// 3. 触发 change 事件(有些组件需要)
textarea.dispatchEvent(new Event('change', {{ bubbles: true }}));
// 4. 延迟提交
setTimeout(() => {{
const btn = document.querySelector('[data-testid="stChatInputSubmitButton"]');
if (btn) {{
btn.click();
console.log('Submitted:', {repr(text)});
}}
}}, 200);
}} }}
""") """)
def get_last_reply_time():
last = window.evaluate_js("""
const el = document.getElementById('last-reply-time');
el ? parseInt(el.textContent) : 0;
""") or 0
return last or int(time.time())
def idle_monitor():
last_trigger_time = 0
while True:
time.sleep(5)
try:
now = time.time()
if now - last_trigger_time < 120: continue
last_reply = get_last_reply_time()
if now - last_reply > 1800:
print('[Idle Monitor] Detected idle state, injecting task...')
inject("[AUTO]🤖 用户已经离开超过30分钟作为自主智能体请阅读自动化sop执行自动任务。")
last_trigger_time = now
except Exception as e:
print(f'[Idle Monitor] Error: {e}')
if __name__ == '__main__': if __name__ == '__main__':
port = sys.argv[1] if len(sys.argv) > 1 else "8501" port = sys.argv[1] if len(sys.argv) > 1 else "8501"
t = threading.Thread(target=start_streamlit, args=(port,), daemon=True) t = threading.Thread(target=start_streamlit, args=(port,), daemon=True)
t.start() t.start()
monitor_thread = threading.Thread(target=idle_monitor, daemon=True)
monitor_thread.start()
if os.name == 'nt': if os.name == 'nt':
screen_width = get_screen_width() screen_width = get_screen_width()
x_pos = screen_width - WINDOW_WIDTH - RIGHT_PADDING x_pos = screen_width - WINDOW_WIDTH - RIGHT_PADDING

View File

@@ -23,10 +23,16 @@ agent = init()
st.title("🖥️ Cowork") st.title("🖥️ Cowork")
if 'autonomous_enabled' not in st.session_state:
st.session_state.autonomous_enabled = False
@st.fragment @st.fragment
def render_llm_switcher(): def render_sidebar():
current_idx = agent.llm_no current_idx = agent.llm_no
st.caption(f"LLM Core: {current_idx}: {agent.llmclient.backends[current_idx].default_model}", help="点击切换备用链路") st.caption(f"LLM Core: {current_idx}: {agent.llmclient.backends[current_idx].default_model}", help="点击切换备用链路")
last_reply_time = st.session_state.get('last_reply_time', 0)
if last_reply_time > 0:
st.caption(f"空闲时间:{int(time.time()) - last_reply_time}", help="当超过30分钟未收到回复时系统会自动任务")
if st.button("切换备用链路"): if st.button("切换备用链路"):
agent.next_llm() agent.next_llm()
st.rerun(scope="fragment") st.rerun(scope="fragment")
@@ -36,7 +42,25 @@ def render_llm_switcher():
if st.button("重新注入System Prompt"): if st.button("重新注入System Prompt"):
agent.llmclient.last_tools = '' agent.llmclient.last_tools = ''
st.toast("下次将重新注入System Prompt") st.toast("下次将重新注入System Prompt")
with st.sidebar: render_llm_switcher()
st.divider()
if st.button("开始空闲自主行动"):
st.session_state.last_reply_time = int(time.time()) - 1800
st.toast("已将上次回复时间设为1800秒前")
st.rerun()
if st.session_state.autonomous_enabled:
if st.button("⏸️ 禁止自主行动"):
st.session_state.autonomous_enabled = False
st.toast("⏸️ 已禁止自主行动")
st.rerun(scope="fragment")
st.caption("🟢 自主行动运行中会在你离开它30分钟后自动进行")
else:
if st.button("▶️ 允许自主行动", type="primary"):
st.session_state.autonomous_enabled = True
st.toast("✅ 已允许自主行动")
st.rerun(scope="fragment")
st.caption("🔴 自主行动已停止")
with st.sidebar: render_sidebar()
def agent_backend_stream(prompt): def agent_backend_stream(prompt):
@@ -67,5 +91,6 @@ if prompt := st.chat_input("请输入指令"):
st.session_state.messages.append({"role": "assistant", "content": response}) st.session_state.messages.append({"role": "assistant", "content": response})
st.session_state.last_reply_time = int(time.time()) st.session_state.last_reply_time = int(time.time())
st.markdown(f"""<div id="last-reply-time" style="display:none">{st.session_state.get('last_reply_time', int(time.time()))}</div>""", unsafe_allow_html=True) if st.session_state.autonomous_enabled:
st.markdown(f"""<div id="last-reply-time" style="display:none">{st.session_state.get('last_reply_time', int(time.time()))}</div>""", unsafe_allow_html=True)