diff --git a/assets/insight_fixed_structure.txt b/assets/insight_fixed_structure.txt index 1683371..aa5e142 100644 --- a/assets/insight_fixed_structure.txt +++ b/assets/insight_fixed_structure.txt @@ -1,4 +1,4 @@ -Facts(L2): ../memory/global_mem.txt | CodeRoot: ../ | SOPs(L3): ../memory/*.md or *.py | META-SOP(L0): ../memory/memory_management_sop.md +Facts(L2): ../memory/global_mem.txt | GA CodeRoot: ../ | SOPs(L3): ../memory/*.md or *.py | META-SOP(L0): ../memory/memory_management_sop.md L1 Insight是极简索引,L2/L3变更时同步L1,索引必须极简。写记忆前先读META-SOP(L0)。 [CONSTITUTION] diff --git a/frontends/continue_cmd.py b/frontends/continue_cmd.py index 8f203ea..7f57dc4 100644 --- a/frontends/continue_cmd.py +++ b/frontends/continue_cmd.py @@ -1,15 +1,12 @@ """`/continue` command: list & restore past model_responses sessions. - Pure functions + one `install(cls)` monkey-patch entry. No side effects at import. """ import ast, glob, json, os, re, time - _LOG_GLOB = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'temp', 'model_responses', 'model_responses_*.txt') _BLOCK_RE = re.compile(r'^=== (Prompt|Response) ===.*?\n(.*?)(?=^=== (?:Prompt|Response) ===|\Z)', re.DOTALL | re.MULTILINE) - def _rel_time(mtime): d = int(time.time() - mtime) if d < 60: return f'{d}秒前' @@ -17,7 +14,6 @@ def _rel_time(mtime): if d < 86400: return f'{d // 3600}小时前' return f'{d // 86400}天前' - def _pairs(content): blocks, pairs, pending = _BLOCK_RE.findall(content or ''), [], None for label, body in blocks: @@ -26,7 +22,6 @@ def _pairs(content): pairs.append((pending, body.strip())); pending = None return pairs - def _first_user(pairs): for p, _ in pairs: try: msg = json.loads(p) @@ -43,7 +38,6 @@ def _first_user(pairs): if s and not s.startswith('###'): return s return '' - def _parse_native_history(pairs): history = [] for p, r in pairs: @@ -57,7 +51,6 @@ def _parse_native_history(pairs): history.append({'role': 'assistant', 'content': blocks}) return history - def list_sessions(exclude_pid=None): """Newest-first list of (path, mtime, first_user_text, n_rounds).""" files = glob.glob(_LOG_GLOB) @@ -74,12 +67,8 @@ def list_sessions(exclude_pid=None): out.append((f, os.path.getmtime(f), _first_user(pairs), len(pairs))) out.sort(key=lambda x: x[1], reverse=True) return out - - _MD_ESCAPE_RE = re.compile(r'([\\`*_\[\]])') - -def _escape_md(s): - return _MD_ESCAPE_RE.sub(r'\\\1', s) +def _escape_md(s): return _MD_ESCAPE_RE.sub(r'\\\1', s) def format_list(sessions, limit=20): if not sessions: return '❌ 没有可恢复的历史会话' @@ -89,7 +78,6 @@ def format_list(sessions, limit=20): lines.append(f'{i}. `{_rel_time(mtime)}` · **{n} 轮** · {preview}') return '\n'.join(lines) - def restore(agent, path): """Restore session at path. Returns (msg, is_full).""" try: content = open(path, encoding='utf-8', errors='replace').read() @@ -110,7 +98,6 @@ def restore(agent, path): n = sum(1 for l in summary if l.startswith('[USER]: ')) return f'⚠️ 非 native 格式,已降级恢复 {n} 轮摘要({name})\n(请输入新问题继续)', False - def handle(agent, query, display_queue): """Dispatch /continue or /continue N. Returns None if consumed else original query.""" s = (query or '').strip() @@ -128,8 +115,6 @@ def handle(agent, query, display_queue): display_queue.put({'done': msg, 'source': 'system'}) return None return query - - def install(cls): """Wrap cls._handle_slash_cmd so /continue is handled before original dispatch.""" orig = cls._handle_slash_cmd diff --git a/llmcore.py b/llmcore.py index 7966dab..dfdc0d3 100644 --- a/llmcore.py +++ b/llmcore.py @@ -238,6 +238,39 @@ def _parse_openai_sse(resp_lines, api_mode="chat_completions"): blocks.append({"type": "tool_use", "id": tc["id"], "name": tc["name"], "input": inp}) return blocks +def _parse_openai_json(data, api_mode="chat_completions"): + blocks = [] + if api_mode == "responses": + usage = data.get("usage", {}) + cached = (usage.get("input_tokens_details") or {}).get("cached_tokens", 0) + inp = usage.get("input_tokens", 0) + if inp: print(f"[Cache] input={inp} cached={cached}") + for item in (data.get("output") or []): + if item.get("type") == "message": + for p in (item.get("content") or []): + if p.get("type") in ("output_text", "text") and p.get("text"): + blocks.append({"type": "text", "text": p["text"]}); yield p["text"] + elif item.get("type") == "function_call": + try: args = json.loads(item.get("arguments", "")) if item.get("arguments") else {} + except: args = {"_raw": item.get("arguments", "")} + blocks.append({"type": "tool_use", "id": item.get("call_id", item.get("id", "")), + "name": item.get("name", ""), "input": args}) + else: + usage = data.get("usage") or {} + if usage: + cached = (usage.get("prompt_tokens_details") or {}).get("cached_tokens", 0) + print(f"[Cache] input={usage.get('prompt_tokens', 0)} cached={cached}") + msg = (data.get("choices") or [{}])[0].get("message", {}) + content = msg.get("content", "") + if content: + blocks.append({"type": "text", "text": content}); yield content + for tc in (msg.get("tool_calls") or []): + fn = tc.get("function", {}) + try: args = json.loads(fn.get("arguments", "")) if fn.get("arguments") else {} + except: args = {"_raw": fn.get("arguments", "")} + blocks.append({"type": "tool_use", "id": tc.get("id", ""), "name": fn.get("name", ""), "input": args}) + return blocks + def _stamp_oai_cache_markers(messages, model): """Add cache_control to last 2 user messages for Anthropic models via OAI-compatible relay.""" ml = model.lower() @@ -253,7 +286,8 @@ def _stamp_oai_cache_markers(messages, model): def _openai_stream(api_base, api_key, messages, model, api_mode='chat_completions', *, temperature=0.5, max_tokens=None, tools=None, reasoning_effort=None, - max_retries=0, connect_timeout=10, read_timeout=300, proxies=None): + max_retries=0, connect_timeout=10, read_timeout=300, proxies=None, + stream=True): """Shared OpenAI-compatible streaming request with retry. Yields text chunks, returns list[content_block].""" ml = model.lower() if 'kimi' in ml or 'moonshot' in ml: temperature = 1 @@ -261,27 +295,17 @@ def _openai_stream(api_base, api_key, messages, model, api_mode='chat_completion headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json", "Accept": "text/event-stream"} if api_mode == "responses": url = auto_make_url(api_base, "responses") - payload = {"model": model, "input": _to_responses_input(messages), "stream": True, "prompt_cache_key": _RESP_CACHE_KEY} + payload = {"model": model, "input": _to_responses_input(messages), "stream": stream, "prompt_cache_key": _RESP_CACHE_KEY} if reasoning_effort: payload["reasoning"] = {"effort": reasoning_effort} else: url = auto_make_url(api_base, "chat/completions") _stamp_oai_cache_markers(messages, model) - payload = {"model": model, "messages": messages, "stream": True, "stream_options": {"include_usage": True}} + payload = {"model": model, "messages": messages, "stream": stream} + if stream: payload["stream_options"] = {"include_usage": True} if temperature != 1: payload["temperature"] = temperature if max_tokens: payload["max_tokens"] = max_tokens if reasoning_effort: payload["reasoning_effort"] = reasoning_effort - if tools: - if api_mode == "responses": - # Responses API: flatten {type, function: {name, ...}} -> {type, name, ...} - resp_tools = [] - for t in tools: - if t.get("type") == "function" and "function" in t: - rt = {"type": "function"} - rt.update(t["function"]) - resp_tools.append(rt) - else: resp_tools.append(t) - payload["tools"] = resp_tools - else: payload["tools"] = tools + if tools: payload["tools"] = _prepare_oai_tools(tools, api_mode) RETRYABLE = {408, 409, 425, 429, 500, 502, 503, 504, 529} def _delay(resp, attempt): try: ra = float((resp.headers or {}).get("retry-after")) @@ -290,48 +314,44 @@ def _openai_stream(api_base, api_key, messages, model, api_mode='chat_completion for attempt in range(max_retries + 1): streamed = False try: - with requests.post(url, headers=headers, json=payload, stream=True, + with requests.post(url, headers=headers, json=payload, stream=stream, timeout=(connect_timeout, read_timeout), proxies=proxies) as r: if r.status_code >= 400: if r.status_code in RETRYABLE and attempt < max_retries: d = _delay(r, attempt) print(f"[LLM Retry] HTTP {r.status_code}, retry in {d:.1f}s ({attempt+1}/{max_retries+1})") time.sleep(d); continue - # Read error body before raise (stream mode closes connection after raise) - err_body = "" - try: err_body = r.text.strip()[:1200] + body = "" + try: body = r.text.strip()[:500] except: pass - try: r.raise_for_status() - except requests.HTTPError as e: - e._err_body = err_body; raise - gen = _parse_openai_sse(r.iter_lines(), api_mode) + err = f"Error: HTTP {r.status_code}" + (f": {body}" if body else "") + yield err; return [{"type": "text", "text": err}] + gen = _parse_openai_sse(r.iter_lines(), api_mode) if stream else _parse_openai_json(r.json(), api_mode) try: while True: streamed = True; yield next(gen) except StopIteration as e: return e.value or [] - except requests.HTTPError as e: - resp = getattr(e, "response", None); status = getattr(resp, "status_code", None) - if status in RETRYABLE and attempt < max_retries and not streamed: - d = _delay(resp, attempt) - print(f"[LLM Retry] HTTP {status}, retry in {d:.1f}s ({attempt+1}/{max_retries+1})") - time.sleep(d); continue - body = ""; rid = ""; ra = ""; ct = "" - try: body = getattr(e, '_err_body', '') or (resp.text or "").strip()[:1200] - except: pass - try: h = resp.headers or {}; rid = h.get("x-request-id","") or h.get("request-id",""); ra = h.get("retry-after",""); ct = h.get("content-type","") - except: pass - err = f"Error: HTTP {status} {e}; content_type: {ct or ''}; retry_after: {ra or ''}; request_id: {rid or ''}; body: {body or ''}" - yield err; return [{"type": "text", "text": err}] except (requests.Timeout, requests.ConnectionError) as e: if attempt < max_retries and not streamed: d = _delay(None, attempt) print(f"[LLM Retry] {type(e).__name__}, retry in {d:.1f}s ({attempt+1}/{max_retries+1})") time.sleep(d); continue - err = f"Error: {type(e).__name__}: {e}" + err = f"Error: {type(e).__name__}" yield err; return [{"type": "text", "text": err}] except Exception as e: - err = f"Error: {e}" + err = f"Error: {type(e).__name__}: {e}" yield err; return [{"type": "text", "text": err}] + +def _prepare_oai_tools(tools, api_mode="chat_completions"): + if api_mode == "responses": + resp_tools = [] + for t in tools: + if t.get("type") == "function" and "function" in t: + rt = {"type": "function"}; rt.update(t["function"]) + resp_tools.append(rt) + else: resp_tools.append(t) + return resp_tools + return tools def _to_responses_input(messages): result = [] @@ -492,7 +512,7 @@ class LLMSession(BaseSession): def raw_ask(self, messages): return (yield from _openai_stream(self.api_base, self.api_key, messages, self.model, self.api_mode, temperature=self.temperature, reasoning_effort=self.reasoning_effort, - max_tokens=self.max_tokens, max_retries=self.max_retries, + max_tokens=self.max_tokens, max_retries=self.max_retries, stream=self.stream, connect_timeout=self.connect_timeout, read_timeout=self.read_timeout, proxies=self.proxies)) def make_messages(self, raw_list): return _msgs_claude2oai(raw_list) @@ -601,10 +621,10 @@ class NativeOAISession(NativeClaudeSession): """OpenAI streaming. yields text chunks, generator return = list[content_block]""" msgs = ([{"role": "system", "content": self.system}] if self.system else []) + _msgs_claude2oai(messages) return (yield from _openai_stream(self.api_base, self.api_key, msgs, self.model, self.api_mode, - temperature=self.temperature, max_tokens=self.max_tokens, + temperature=self.temperature, max_tokens=self.max_tokens, tools=self.tools, reasoning_effort=self.reasoning_effort, max_retries=self.max_retries, connect_timeout=self.connect_timeout, - read_timeout=self.read_timeout, proxies=self.proxies)) + read_timeout=self.read_timeout, proxies=self.proxies, stream=self.stream)) def openai_tools_to_claude(tools): """[{type:'function', function:{name,description,parameters}}] → [{name,description,input_schema}].""" @@ -823,7 +843,7 @@ class MixinSession: self.model = getattr(self._sessions[0], 'model', None) self._cur_idx, self._switched_at = 0, 0.0 def __getattr__(self, name): return getattr(self._sessions[0], name) - _BROADCAST_ATTRS = frozenset({'system', 'tools', 'temperature', 'max_tokens', 'reasoning_effort'}) + _BROADCAST_ATTRS = frozenset({'system', 'tools', 'temperature', 'max_tokens', 'reasoning_effort', 'history'}) def __setattr__(self, name, value): if name in self._BROADCAST_ATTRS: for s in self._sessions: