feat: upgrade architecture, add idle monitor, and include new assets

2026-02-09 13:24:08 +08:00
parent 370654fed0
commit 3f1fad8ab9
6 changed files with 117 additions and 56 deletions
--- a/assets/global_mem_insight_template.txt
+++ b/assets/global_mem_insight_template.txt
@@ -1,17 +1,3 @@
 [CONSTITUTION]
 1. 修改自身源码前必请示；./ 目录内实验可自主执行，允许安装py包和命令行工具。
 2. 做决策前必先查记忆库；未查证的事实不得断言。
 3. 复杂任务需分步执行并每步验证：严禁盲目一气呵成。三次重试失败必须请求干预。
 4. 严禁未经授权读取/移动密钥或密码文件；仅限引用或申请许可。
 5. 修改/写入任何记忆前必先读取META-SOP核验。
 [META-SOP (L0)]
 - ../memory/memory_management_sop.md
 [DIRECTORY]
 - L2_Facts: ../memory/global_mem.txt (Method: 按 TOPIC 检索索引 -> file_read 对应条目)
 - L3_SOPs: ../memory/ (Method: ls 查看文件列表 -> 读取具体 .md/.py)
 [TOPICS.GLOBAL_MEM]
 # 【引导注释 - 使用后请删除】
 # 请模仿以下概括性 Tag 格式记录 global_mem.txt 中的事实：
--- a/assets/insight_fixed_structure.txt
+++ b/assets/insight_fixed_structure.txt
@@ -0,0 +1,13 @@
 [CONSTITUTION]
 1. 修改自身源码前必请示；./ 目录内实验可自主执行，允许安装py包和命令行工具。
 2. 做决策前必先查记忆库；未查证的事实不得断言。
 3. 复杂任务需分步执行并每步验证：严禁盲目一气呵成。三次重试失败必须请求干预。
 4. 严禁未经授权读取/移动密钥或密码文件；仅限引用或申请许可。
 5. 修改/写入任何记忆前必先读取META-SOP核验。
 [META-SOP (L0)]
 - ../memory/memory_management_sop.md
 [DIRECTORY]
 - L2_Facts: ../memory/global_mem.txt (Method: 按 TOPIC 检索索引 -> file_read 对应条目)
 - L3_SOPs: ../memory/ (Method: ls 查看文件列表 -> 读取具体 .md/.py)
--- a/assets/tools_schema.json
+++ b/assets/tools_schema.json
@@ -30,7 +30,7 @@
    "description": "用于文件的新建、全量覆盖或追加写入。对于精细的代码修改，应优先使用 file_patch。注意：要写入的内容必须放在回复正文的 <file_content> 标签或代码块中。",
    "parameters": {"type": "object", "properties": {
      "path": {"type": "string", "description": "文件路径。"},
-      "mode": {"type": "string", "enum": ["overwrite", "append"], "description": "写入模式：overwrite（覆盖，默认）或 append（追加）。", "default": "overwrite"}}, "required": ["path"]}
+      "mode": {"type": "string", "enum": ["overwrite", "append", "prepend"], "description": "写入模式覆盖、追加或在开头追加。", "default": "overwrite"}}, "required": ["path"]}
  }},
  {"type": "function", "function": {
    "name": "web_scan",
@@ -47,11 +47,11 @@
      "save_to_file": {"type": "string", "description": "可选。将 JS 执行结果（js_return）保存到的文件路径。注意：该功能不支持 await 等异步结果。"}}, "required": ["script"]}
  }},
  {"type": "function", "function": {
-    "name": "update_plan",
+    "name": "update_sop_plan",
-    "description": "更新任务的宏观计划和当前战略重心。仅在初始拆解多步任务或发生重大方案调整时使用。禁止用于记录细微调试步骤或纠错。",
+    "description": "【在读取SOP后需要调用】准备开始执行此SOP。提取容易遗忘但关键的内容，生成极简检查清单，100tokens以内，后续每轮自动注入context。特别是靠后的内容。",
    "parameters": {"type": "object", "properties": {
-      "plan": {"type": "string", "description": "完整的宏观任务路线图。"},
+      "keys": {"type": "string", "description": "每轮自动注入的必须注意的文本，可以是约束规则/执行步骤/需要注意的坑"},
-      "focus": {"type": "string", "description": "当前阶段的工作重点。"}}}
+      "sop_path": {"type": "string", "description": "sop路径，必要时可以再读"}}}
  }},
  {"type": "function", "function": {
    "name": "ask_user",
@@ -62,7 +62,7 @@
  }},
  {"type": "function", "function": {
    "name": "conclude_and_reflect",
-    "description": "当模型认为当前任务（非处理记忆）执行完美，且有具有长期价值的环境事实或用户偏好需要提炼并存入全局记忆时，调用此工具。注意：此工具调用即代表触发记忆提炼流程，如果已经在记忆提炼流程无需调用。",
+    "description": "用户的任务完成后，若发现值得长期记忆的信息（环境事实/用户偏好/避坑经验），调用此工具触发记忆提炼。不允许在记忆更新或自主流程内调用。",
    "parameters": {"type": "object", "properties": {}}}
  }
 ]
--- a/ga.py
+++ b/ga.py
@@ -241,8 +241,8 @@ class GenericAgentHandler(BaseHandler):
    '''
    def __init__(self, parent, last_history=None, cwd='./'):
        self.parent = parent
-        self.plan = ""
+        self.sop_keys = ""
-        self.focus = ""
+        self.sop_path = ""
        self.cwd = cwd
        self.history_info = last_history if last_history else []
        self.code_stop_signal = []
@@ -336,8 +336,8 @@ class GenericAgentHandler(BaseHandler):
        需要将要写入的内容放在<file_content>标签内，或者放在代码块中。
        '''
        path = self._get_abs_path(args.get("path", ""))
-        mode = args.get("mode", "overwrite") 
+        mode = args.get("mode", "overwrite")  # overwrite/append/prepend
-        action_str = "Appending to" if mode == "append" else "Writing"
+        action_str = {"prepend": "Prepending to", "append": "Appending to"}.get(mode, "Overwriting")
        yield f"[Action] {action_str} file: {os.path.basename(path)}\n"
        def extract_robust_content(text):
@@ -353,10 +353,12 @@ class GenericAgentHandler(BaseHandler):
            return StepOutcome({"status": "error", "msg": "No content found, if you want a blank, you should use code_run"}, next_prompt="\n")
        new_content = blocks
        try:
-            write_mode = 'a' if mode == "append" else 'w'
+            if mode == "prepend":
-            final_content = ("\n" + new_content) if mode == "append" else new_content
+                old = open(path, 'r', encoding="utf-8").read() if os.path.exists(path) else ""
-            with open(path, write_mode, encoding="utf-8") as f:
+                open(path, 'w', encoding="utf-8").write(new_content + old)
-                f.write(final_content)
+            else:
                with open(path, 'a' if mode == "append" else 'w', encoding="utf-8") as f:
                    f.write(new_content)
            yield f"[Status] ✅ {mode.capitalize()} 成功 ({len(new_content)} bytes)\n"
            next_prompt = self._get_anchor_prompt()
            return StepOutcome({"status": "success", 'writed_bytes': len(new_content)}, 
@@ -379,24 +381,20 @@ class GenericAgentHandler(BaseHandler):
            tips = '由于设置了show_linenos，以下返回信息为：(行号|)内容 。\n'
            result = tips + result 
        next_prompt = self._get_anchor_prompt()
        if 'memory' in path or 'sop' in path: 
            next_prompt += "\nPROTOCOL: 你正在读取记忆或SOP文件，若决定按sop执行请先调用准备执行相关工具，提取sop中的重点内容（特别是靠后的）进入工作记忆。"
        return StepOutcome(result, next_prompt=next_prompt)
-    def do_update_plan(self, args, response):
+    def do_update_sop_plan(self, args, response):
        '''读取完sop后，为整个任务设定后续需要临时记忆的重点。
        '''
-        同步宏观任务进度与战略重心。       
+        sop_keys = args.get("keys", "")
-        【设计意图】：
+        sop_path = args.get("sop_path", "")
-        1. 仅在任务涉及多步逻辑（如：先搜索、再重构、后测试）时进行初始拆解。
+        if sop_keys: self.sop_keys = sop_keys
-        2. 仅在发生重大的方针变更时调用（例如：原定方案 A 物理不可行，需彻底转向方案 B）。
+        if sop_path: self.sop_path = sop_path
-        3. 严禁用于记录细微的调试步骤或代码纠错。
+        yield f"[Info] Updated sop_keys and sop_path.\n"
-        简单任务无需使用。
+        yield f"sop_keys:\n{self.sop_keys}\n\n"
-        '''
+        yield f"sop_path:\n{self.sop_path}\n\n"
        new_plan = args.get("plan", "")
        new_focus = args.get("focus", "")
        if new_plan: self.plan = new_plan
        if new_focus: self.focus = new_focus
        yield f"[Info] Updated plan and focus.\n"
        yield f"New Plan:\n{self.plan}\n\n"
        yield f"New Focus:\n{self.focus}\n"
        next_prompt = self._get_anchor_prompt()
        return StepOutcome({"status": "success"}, next_prompt=next_prompt)
@@ -460,21 +458,23 @@ class GenericAgentHandler(BaseHandler):
    def _get_anchor_prompt(self):
        h_str = "\n".join(self.history_info[-20:])
        prompt = f"\n### [WORKING MEMORY]\n<history>\n{h_str}\n</history>"
        if self.sop_keys: prompt += f"\n<sop_essentials>{self.sop_keys}</sop_essentials>"
        if self.sop_path: prompt += f"\n有不清晰的地方请再次读取{self.sop_path}"
        print(prompt)
        if self.plan: prompt += f"\n<plan>{self.plan}</plan>"
        if self.focus: prompt += f"\n<focus>{self.focus}</focus>"
        return prompt
 def get_global_memory():
    prompt = "\n"
    try:
        with open('memory/global_mem_insight.txt', 'r', encoding='utf-8') as f: insight = f.read()
-        prompt += f"\n[Memory Insight (../memory/global_mem_insight.txt)]\n"
+        with open('assets/insight_fixed_structure.txt', 'r', encoding='utf-8') as f: structure = f.read()
        prompt += f"\n[Memory]\n"
        prompt += 'IMPORTANT PATHS: ../memory/global_mem.txt (Facts), ../ (Your Code Dir)\n'
        prompt += f'cwd = {os.path.abspath("./temp")}\n'
        prompt += f'But prefer use relative paths (./ = cwd) to locate.\n'
-        prompt += 'MEM_RULE: Insight is the index of Facts. Sync Insight whenever Facts change. For details, read Facts.\n'
+        prompt += 'MEM_RULE: Insight is the index. Sync Insight whenever Facts change. For details, read Facts.\n'
        prompt += "EXT: ../memory/ may contain other task-specific memories.\n"
        prompt += structure + '\nglobal_mem_insight.txt:\n'
        prompt += insight + "\n"
    except FileNotFoundError: pass
    return prompt
--- a/launch.pyw
+++ b/launch.pyw
@@ -28,21 +28,58 @@ def start_streamlit(port):
    atexit.register(proc.kill)
 def inject(text):
    """注入输入到 Streamlit"""
    window.evaluate_js(f"""
-        const input = document.querySelector('input[data-testid="stChatInputTextInput"]');
+        const textarea = document.querySelector('textarea[data-testid="stChatInputTextArea"]');
-        if (input) {{
+        if (textarea) {{
-            input.value = {repr(text)};
+            // 1. 用原生 setter 设置值（绕过 React）
-            input.dispatchEvent(new Event('input', {{bubbles: true}}));
+            const nativeTextAreaValueSetter = Object.getOwnPropertyDescriptor(
-            input.dispatchEvent(new KeyboardEvent('keydown', {{key: 'Enter', keyCode: 13, bubbles: true}}));
+                window.HTMLTextAreaElement.prototype, 'value'
            ).set;
            nativeTextAreaValueSetter.call(textarea, {repr(text)});
            // 2. 触发 React 的 input 事件
            textarea.dispatchEvent(new Event('input', {{ bubbles: true }}));
            // 3. 触发 change 事件（有些组件需要）
            textarea.dispatchEvent(new Event('change', {{ bubbles: true }}));
            // 4. 延迟提交
            setTimeout(() => {{
                const btn = document.querySelector('[data-testid="stChatInputSubmitButton"]');
                if (btn) {{
                    btn.click();
                    console.log('Submitted:', {repr(text)});
                }}
            }}, 200);
        }}
    """)
 def get_last_reply_time():
    last = window.evaluate_js("""
        const el = document.getElementById('last-reply-time');
        el ? parseInt(el.textContent) : 0;
    """) or 0
    return last or int(time.time())
 def idle_monitor():
    last_trigger_time = 0
    while True:
        time.sleep(5)
        try:
            now = time.time()
            if now - last_trigger_time < 120: continue
            last_reply = get_last_reply_time()
            if now - last_reply > 1800:
                print('[Idle Monitor] Detected idle state, injecting task...')
                inject("[AUTO]🤖 用户已经离开超过30分钟，作为自主智能体，请阅读自动化sop，执行自动任务。")
                last_trigger_time = now
        except Exception as e:
            print(f'[Idle Monitor] Error: {e}')
 if __name__ == '__main__':
    port = sys.argv[1] if len(sys.argv) > 1 else "8501"
    t = threading.Thread(target=start_streamlit, args=(port,), daemon=True)
    t.start()
    monitor_thread = threading.Thread(target=idle_monitor, daemon=True)
    monitor_thread.start()
    if os.name == 'nt':
        screen_width = get_screen_width()
        x_pos = screen_width - WINDOW_WIDTH - RIGHT_PADDING
--- a/stapp.py
+++ b/stapp.py
@@ -23,10 +23,16 @@ agent = init()
 st.title("🖥️ Cowork")
 if 'autonomous_enabled' not in st.session_state:
    st.session_state.autonomous_enabled = False
@st.fragment
-def render_llm_switcher():
+def render_sidebar():
    current_idx = agent.llm_no
    st.caption(f"LLM Core: {current_idx}: {agent.llmclient.backends[current_idx].default_model}", help="点击切换备用链路")
    last_reply_time = st.session_state.get('last_reply_time', 0)
    if last_reply_time > 0:
        st.caption(f"空闲时间：{int(time.time()) - last_reply_time}秒", help="当超过30分钟未收到回复时，系统会自动任务")
    if st.button("切换备用链路"):
        agent.next_llm()
        st.rerun(scope="fragment")
@@ -36,7 +42,25 @@ def render_llm_switcher():
    if st.button("重新注入System Prompt"):
        agent.llmclient.last_tools = ''
        st.toast("下次将重新注入System Prompt")
-with st.sidebar: render_llm_switcher()
+    
    st.divider()
    if st.button("开始空闲自主行动"):
        st.session_state.last_reply_time = int(time.time()) - 1800
        st.toast("已将上次回复时间设为1800秒前")
        st.rerun()
    if st.session_state.autonomous_enabled:
        if st.button("⏸️ 禁止自主行动"):
            st.session_state.autonomous_enabled = False
            st.toast("⏸️ 已禁止自主行动")
            st.rerun(scope="fragment")
        st.caption("🟢 自主行动运行中，会在你离开它30分钟后自动进行")
    else:
        if st.button("▶️ 允许自主行动", type="primary"):
            st.session_state.autonomous_enabled = True
            st.toast("✅ 已允许自主行动")
            st.rerun(scope="fragment")
        st.caption("🔴 自主行动已停止")
 with st.sidebar: render_sidebar()
 def agent_backend_stream(prompt):
@@ -67,5 +91,6 @@ if prompt := st.chat_input("请输入指令"):
    st.session_state.messages.append({"role": "assistant", "content": response})
    st.session_state.last_reply_time = int(time.time())
-st.markdown(f"""<div id="last-reply-time" style="display:none">{st.session_state.get('last_reply_time', int(time.time()))}</div>""", unsafe_allow_html=True)
+if st.session_state.autonomous_enabled:
    st.markdown(f"""<div id="last-reply-time" style="display:none">{st.session_state.get('last_reply_time', int(time.time()))}</div>""", unsafe_allow_html=True)