refactor: simplify HTTP error handling in _openai_stream, add non-stream support, broadcast history in MixinSession

This commit is contained in:
Liang Jiaqing
2026-04-19 20:57:07 +08:00
parent d1f4ee9aaf
commit 86ca4625ad
3 changed files with 64 additions and 59 deletions

View File

@@ -1,4 +1,4 @@
Facts(L2): ../memory/global_mem.txt | CodeRoot: ../ | SOPs(L3): ../memory/*.md or *.py | META-SOP(L0): ../memory/memory_management_sop.md Facts(L2): ../memory/global_mem.txt | GA CodeRoot: ../ | SOPs(L3): ../memory/*.md or *.py | META-SOP(L0): ../memory/memory_management_sop.md
L1 Insight是极简索引L2/L3变更时同步L1索引必须极简。写记忆前先读META-SOP(L0)。 L1 Insight是极简索引L2/L3变更时同步L1索引必须极简。写记忆前先读META-SOP(L0)。
[CONSTITUTION] [CONSTITUTION]

View File

@@ -1,15 +1,12 @@
"""`/continue` command: list & restore past model_responses sessions. """`/continue` command: list & restore past model_responses sessions.
Pure functions + one `install(cls)` monkey-patch entry. No side effects at import. Pure functions + one `install(cls)` monkey-patch entry. No side effects at import.
""" """
import ast, glob, json, os, re, time import ast, glob, json, os, re, time
_LOG_GLOB = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), _LOG_GLOB = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
'temp', 'model_responses', 'model_responses_*.txt') 'temp', 'model_responses', 'model_responses_*.txt')
_BLOCK_RE = re.compile(r'^=== (Prompt|Response) ===.*?\n(.*?)(?=^=== (?:Prompt|Response) ===|\Z)', _BLOCK_RE = re.compile(r'^=== (Prompt|Response) ===.*?\n(.*?)(?=^=== (?:Prompt|Response) ===|\Z)',
re.DOTALL | re.MULTILINE) re.DOTALL | re.MULTILINE)
def _rel_time(mtime): def _rel_time(mtime):
d = int(time.time() - mtime) d = int(time.time() - mtime)
if d < 60: return f'{d}秒前' if d < 60: return f'{d}秒前'
@@ -17,7 +14,6 @@ def _rel_time(mtime):
if d < 86400: return f'{d // 3600}小时前' if d < 86400: return f'{d // 3600}小时前'
return f'{d // 86400}天前' return f'{d // 86400}天前'
def _pairs(content): def _pairs(content):
blocks, pairs, pending = _BLOCK_RE.findall(content or ''), [], None blocks, pairs, pending = _BLOCK_RE.findall(content or ''), [], None
for label, body in blocks: for label, body in blocks:
@@ -26,7 +22,6 @@ def _pairs(content):
pairs.append((pending, body.strip())); pending = None pairs.append((pending, body.strip())); pending = None
return pairs return pairs
def _first_user(pairs): def _first_user(pairs):
for p, _ in pairs: for p, _ in pairs:
try: msg = json.loads(p) try: msg = json.loads(p)
@@ -43,7 +38,6 @@ def _first_user(pairs):
if s and not s.startswith('###'): return s if s and not s.startswith('###'): return s
return '' return ''
def _parse_native_history(pairs): def _parse_native_history(pairs):
history = [] history = []
for p, r in pairs: for p, r in pairs:
@@ -57,7 +51,6 @@ def _parse_native_history(pairs):
history.append({'role': 'assistant', 'content': blocks}) history.append({'role': 'assistant', 'content': blocks})
return history return history
def list_sessions(exclude_pid=None): def list_sessions(exclude_pid=None):
"""Newest-first list of (path, mtime, first_user_text, n_rounds).""" """Newest-first list of (path, mtime, first_user_text, n_rounds)."""
files = glob.glob(_LOG_GLOB) files = glob.glob(_LOG_GLOB)
@@ -74,12 +67,8 @@ def list_sessions(exclude_pid=None):
out.append((f, os.path.getmtime(f), _first_user(pairs), len(pairs))) out.append((f, os.path.getmtime(f), _first_user(pairs), len(pairs)))
out.sort(key=lambda x: x[1], reverse=True) out.sort(key=lambda x: x[1], reverse=True)
return out return out
_MD_ESCAPE_RE = re.compile(r'([\\`*_\[\]])') _MD_ESCAPE_RE = re.compile(r'([\\`*_\[\]])')
def _escape_md(s): return _MD_ESCAPE_RE.sub(r'\\\1', s)
def _escape_md(s):
return _MD_ESCAPE_RE.sub(r'\\\1', s)
def format_list(sessions, limit=20): def format_list(sessions, limit=20):
if not sessions: return '❌ 没有可恢复的历史会话' if not sessions: return '❌ 没有可恢复的历史会话'
@@ -89,7 +78,6 @@ def format_list(sessions, limit=20):
lines.append(f'{i}. `{_rel_time(mtime)}` · **{n} 轮** · {preview}') lines.append(f'{i}. `{_rel_time(mtime)}` · **{n} 轮** · {preview}')
return '\n'.join(lines) return '\n'.join(lines)
def restore(agent, path): def restore(agent, path):
"""Restore session at path. Returns (msg, is_full).""" """Restore session at path. Returns (msg, is_full)."""
try: content = open(path, encoding='utf-8', errors='replace').read() try: content = open(path, encoding='utf-8', errors='replace').read()
@@ -110,7 +98,6 @@ def restore(agent, path):
n = sum(1 for l in summary if l.startswith('[USER]: ')) n = sum(1 for l in summary if l.startswith('[USER]: '))
return f'⚠️ 非 native 格式,已降级恢复 {n} 轮摘要({name}\n(请输入新问题继续)', False return f'⚠️ 非 native 格式,已降级恢复 {n} 轮摘要({name}\n(请输入新问题继续)', False
def handle(agent, query, display_queue): def handle(agent, query, display_queue):
"""Dispatch /continue or /continue N. Returns None if consumed else original query.""" """Dispatch /continue or /continue N. Returns None if consumed else original query."""
s = (query or '').strip() s = (query or '').strip()
@@ -128,8 +115,6 @@ def handle(agent, query, display_queue):
display_queue.put({'done': msg, 'source': 'system'}) display_queue.put({'done': msg, 'source': 'system'})
return None return None
return query return query
def install(cls): def install(cls):
"""Wrap cls._handle_slash_cmd so /continue is handled before original dispatch.""" """Wrap cls._handle_slash_cmd so /continue is handled before original dispatch."""
orig = cls._handle_slash_cmd orig = cls._handle_slash_cmd

View File

@@ -238,6 +238,39 @@ def _parse_openai_sse(resp_lines, api_mode="chat_completions"):
blocks.append({"type": "tool_use", "id": tc["id"], "name": tc["name"], "input": inp}) blocks.append({"type": "tool_use", "id": tc["id"], "name": tc["name"], "input": inp})
return blocks return blocks
def _parse_openai_json(data, api_mode="chat_completions"):
blocks = []
if api_mode == "responses":
usage = data.get("usage", {})
cached = (usage.get("input_tokens_details") or {}).get("cached_tokens", 0)
inp = usage.get("input_tokens", 0)
if inp: print(f"[Cache] input={inp} cached={cached}")
for item in (data.get("output") or []):
if item.get("type") == "message":
for p in (item.get("content") or []):
if p.get("type") in ("output_text", "text") and p.get("text"):
blocks.append({"type": "text", "text": p["text"]}); yield p["text"]
elif item.get("type") == "function_call":
try: args = json.loads(item.get("arguments", "")) if item.get("arguments") else {}
except: args = {"_raw": item.get("arguments", "")}
blocks.append({"type": "tool_use", "id": item.get("call_id", item.get("id", "")),
"name": item.get("name", ""), "input": args})
else:
usage = data.get("usage") or {}
if usage:
cached = (usage.get("prompt_tokens_details") or {}).get("cached_tokens", 0)
print(f"[Cache] input={usage.get('prompt_tokens', 0)} cached={cached}")
msg = (data.get("choices") or [{}])[0].get("message", {})
content = msg.get("content", "")
if content:
blocks.append({"type": "text", "text": content}); yield content
for tc in (msg.get("tool_calls") or []):
fn = tc.get("function", {})
try: args = json.loads(fn.get("arguments", "")) if fn.get("arguments") else {}
except: args = {"_raw": fn.get("arguments", "")}
blocks.append({"type": "tool_use", "id": tc.get("id", ""), "name": fn.get("name", ""), "input": args})
return blocks
def _stamp_oai_cache_markers(messages, model): def _stamp_oai_cache_markers(messages, model):
"""Add cache_control to last 2 user messages for Anthropic models via OAI-compatible relay.""" """Add cache_control to last 2 user messages for Anthropic models via OAI-compatible relay."""
ml = model.lower() ml = model.lower()
@@ -253,7 +286,8 @@ def _stamp_oai_cache_markers(messages, model):
def _openai_stream(api_base, api_key, messages, model, api_mode='chat_completions', *, def _openai_stream(api_base, api_key, messages, model, api_mode='chat_completions', *,
temperature=0.5, max_tokens=None, tools=None, reasoning_effort=None, temperature=0.5, max_tokens=None, tools=None, reasoning_effort=None,
max_retries=0, connect_timeout=10, read_timeout=300, proxies=None): max_retries=0, connect_timeout=10, read_timeout=300, proxies=None,
stream=True):
"""Shared OpenAI-compatible streaming request with retry. Yields text chunks, returns list[content_block].""" """Shared OpenAI-compatible streaming request with retry. Yields text chunks, returns list[content_block]."""
ml = model.lower() ml = model.lower()
if 'kimi' in ml or 'moonshot' in ml: temperature = 1 if 'kimi' in ml or 'moonshot' in ml: temperature = 1
@@ -261,27 +295,17 @@ def _openai_stream(api_base, api_key, messages, model, api_mode='chat_completion
headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json", "Accept": "text/event-stream"} headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json", "Accept": "text/event-stream"}
if api_mode == "responses": if api_mode == "responses":
url = auto_make_url(api_base, "responses") url = auto_make_url(api_base, "responses")
payload = {"model": model, "input": _to_responses_input(messages), "stream": True, "prompt_cache_key": _RESP_CACHE_KEY} payload = {"model": model, "input": _to_responses_input(messages), "stream": stream, "prompt_cache_key": _RESP_CACHE_KEY}
if reasoning_effort: payload["reasoning"] = {"effort": reasoning_effort} if reasoning_effort: payload["reasoning"] = {"effort": reasoning_effort}
else: else:
url = auto_make_url(api_base, "chat/completions") url = auto_make_url(api_base, "chat/completions")
_stamp_oai_cache_markers(messages, model) _stamp_oai_cache_markers(messages, model)
payload = {"model": model, "messages": messages, "stream": True, "stream_options": {"include_usage": True}} payload = {"model": model, "messages": messages, "stream": stream}
if stream: payload["stream_options"] = {"include_usage": True}
if temperature != 1: payload["temperature"] = temperature if temperature != 1: payload["temperature"] = temperature
if max_tokens: payload["max_tokens"] = max_tokens if max_tokens: payload["max_tokens"] = max_tokens
if reasoning_effort: payload["reasoning_effort"] = reasoning_effort if reasoning_effort: payload["reasoning_effort"] = reasoning_effort
if tools: if tools: payload["tools"] = _prepare_oai_tools(tools, api_mode)
if api_mode == "responses":
# Responses API: flatten {type, function: {name, ...}} -> {type, name, ...}
resp_tools = []
for t in tools:
if t.get("type") == "function" and "function" in t:
rt = {"type": "function"}
rt.update(t["function"])
resp_tools.append(rt)
else: resp_tools.append(t)
payload["tools"] = resp_tools
else: payload["tools"] = tools
RETRYABLE = {408, 409, 425, 429, 500, 502, 503, 504, 529} RETRYABLE = {408, 409, 425, 429, 500, 502, 503, 504, 529}
def _delay(resp, attempt): def _delay(resp, attempt):
try: ra = float((resp.headers or {}).get("retry-after")) try: ra = float((resp.headers or {}).get("retry-after"))
@@ -290,49 +314,45 @@ def _openai_stream(api_base, api_key, messages, model, api_mode='chat_completion
for attempt in range(max_retries + 1): for attempt in range(max_retries + 1):
streamed = False streamed = False
try: try:
with requests.post(url, headers=headers, json=payload, stream=True, with requests.post(url, headers=headers, json=payload, stream=stream,
timeout=(connect_timeout, read_timeout), proxies=proxies) as r: timeout=(connect_timeout, read_timeout), proxies=proxies) as r:
if r.status_code >= 400: if r.status_code >= 400:
if r.status_code in RETRYABLE and attempt < max_retries: if r.status_code in RETRYABLE and attempt < max_retries:
d = _delay(r, attempt) d = _delay(r, attempt)
print(f"[LLM Retry] HTTP {r.status_code}, retry in {d:.1f}s ({attempt+1}/{max_retries+1})") print(f"[LLM Retry] HTTP {r.status_code}, retry in {d:.1f}s ({attempt+1}/{max_retries+1})")
time.sleep(d); continue time.sleep(d); continue
# Read error body before raise (stream mode closes connection after raise) body = ""
err_body = "" try: body = r.text.strip()[:500]
try: err_body = r.text.strip()[:1200]
except: pass except: pass
try: r.raise_for_status() err = f"Error: HTTP {r.status_code}" + (f": {body}" if body else "")
except requests.HTTPError as e: yield err; return [{"type": "text", "text": err}]
e._err_body = err_body; raise gen = _parse_openai_sse(r.iter_lines(), api_mode) if stream else _parse_openai_json(r.json(), api_mode)
gen = _parse_openai_sse(r.iter_lines(), api_mode)
try: try:
while True: streamed = True; yield next(gen) while True: streamed = True; yield next(gen)
except StopIteration as e: except StopIteration as e:
return e.value or [] return e.value or []
except requests.HTTPError as e:
resp = getattr(e, "response", None); status = getattr(resp, "status_code", None)
if status in RETRYABLE and attempt < max_retries and not streamed:
d = _delay(resp, attempt)
print(f"[LLM Retry] HTTP {status}, retry in {d:.1f}s ({attempt+1}/{max_retries+1})")
time.sleep(d); continue
body = ""; rid = ""; ra = ""; ct = ""
try: body = getattr(e, '_err_body', '') or (resp.text or "").strip()[:1200]
except: pass
try: h = resp.headers or {}; rid = h.get("x-request-id","") or h.get("request-id",""); ra = h.get("retry-after",""); ct = h.get("content-type","")
except: pass
err = f"Error: HTTP {status} {e}; content_type: {ct or '<empty>'}; retry_after: {ra or '<empty>'}; request_id: {rid or '<empty>'}; body: {body or '<empty>'}"
yield err; return [{"type": "text", "text": err}]
except (requests.Timeout, requests.ConnectionError) as e: except (requests.Timeout, requests.ConnectionError) as e:
if attempt < max_retries and not streamed: if attempt < max_retries and not streamed:
d = _delay(None, attempt) d = _delay(None, attempt)
print(f"[LLM Retry] {type(e).__name__}, retry in {d:.1f}s ({attempt+1}/{max_retries+1})") print(f"[LLM Retry] {type(e).__name__}, retry in {d:.1f}s ({attempt+1}/{max_retries+1})")
time.sleep(d); continue time.sleep(d); continue
err = f"Error: {type(e).__name__}: {e}" err = f"Error: {type(e).__name__}"
yield err; return [{"type": "text", "text": err}] yield err; return [{"type": "text", "text": err}]
except Exception as e: except Exception as e:
err = f"Error: {e}" err = f"Error: {type(e).__name__}: {e}"
yield err; return [{"type": "text", "text": err}] yield err; return [{"type": "text", "text": err}]
def _prepare_oai_tools(tools, api_mode="chat_completions"):
if api_mode == "responses":
resp_tools = []
for t in tools:
if t.get("type") == "function" and "function" in t:
rt = {"type": "function"}; rt.update(t["function"])
resp_tools.append(rt)
else: resp_tools.append(t)
return resp_tools
return tools
def _to_responses_input(messages): def _to_responses_input(messages):
result = [] result = []
for msg in messages: for msg in messages:
@@ -492,7 +512,7 @@ class LLMSession(BaseSession):
def raw_ask(self, messages): def raw_ask(self, messages):
return (yield from _openai_stream(self.api_base, self.api_key, messages, self.model, self.api_mode, return (yield from _openai_stream(self.api_base, self.api_key, messages, self.model, self.api_mode,
temperature=self.temperature, reasoning_effort=self.reasoning_effort, temperature=self.temperature, reasoning_effort=self.reasoning_effort,
max_tokens=self.max_tokens, max_retries=self.max_retries, max_tokens=self.max_tokens, max_retries=self.max_retries, stream=self.stream,
connect_timeout=self.connect_timeout, read_timeout=self.read_timeout, proxies=self.proxies)) connect_timeout=self.connect_timeout, read_timeout=self.read_timeout, proxies=self.proxies))
def make_messages(self, raw_list): return _msgs_claude2oai(raw_list) def make_messages(self, raw_list): return _msgs_claude2oai(raw_list)
@@ -604,7 +624,7 @@ class NativeOAISession(NativeClaudeSession):
temperature=self.temperature, max_tokens=self.max_tokens, temperature=self.temperature, max_tokens=self.max_tokens,
tools=self.tools, reasoning_effort=self.reasoning_effort, tools=self.tools, reasoning_effort=self.reasoning_effort,
max_retries=self.max_retries, connect_timeout=self.connect_timeout, max_retries=self.max_retries, connect_timeout=self.connect_timeout,
read_timeout=self.read_timeout, proxies=self.proxies)) read_timeout=self.read_timeout, proxies=self.proxies, stream=self.stream))
def openai_tools_to_claude(tools): def openai_tools_to_claude(tools):
"""[{type:'function', function:{name,description,parameters}}] → [{name,description,input_schema}].""" """[{type:'function', function:{name,description,parameters}}] → [{name,description,input_schema}]."""
@@ -823,7 +843,7 @@ class MixinSession:
self.model = getattr(self._sessions[0], 'model', None) self.model = getattr(self._sessions[0], 'model', None)
self._cur_idx, self._switched_at = 0, 0.0 self._cur_idx, self._switched_at = 0, 0.0
def __getattr__(self, name): return getattr(self._sessions[0], name) def __getattr__(self, name): return getattr(self._sessions[0], name)
_BROADCAST_ATTRS = frozenset({'system', 'tools', 'temperature', 'max_tokens', 'reasoning_effort'}) _BROADCAST_ATTRS = frozenset({'system', 'tools', 'temperature', 'max_tokens', 'reasoning_effort', 'history'})
def __setattr__(self, name, value): def __setattr__(self, name, value):
if name in self._BROADCAST_ATTRS: if name in self._BROADCAST_ATTRS:
for s in self._sessions: for s in self._sessions: