refactor: simplify HTTP error handling in _openai_stream, add non-stream support, broadcast history in MixinSession
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
Facts(L2): ../memory/global_mem.txt | CodeRoot: ../ | SOPs(L3): ../memory/*.md or *.py | META-SOP(L0): ../memory/memory_management_sop.md
|
||||
Facts(L2): ../memory/global_mem.txt | GA CodeRoot: ../ | SOPs(L3): ../memory/*.md or *.py | META-SOP(L0): ../memory/memory_management_sop.md
|
||||
L1 Insight是极简索引,L2/L3变更时同步L1,索引必须极简。写记忆前先读META-SOP(L0)。
|
||||
|
||||
[CONSTITUTION]
|
||||
|
||||
@@ -1,15 +1,12 @@
|
||||
"""`/continue` command: list & restore past model_responses sessions.
|
||||
|
||||
Pure functions + one `install(cls)` monkey-patch entry. No side effects at import.
|
||||
"""
|
||||
import ast, glob, json, os, re, time
|
||||
|
||||
_LOG_GLOB = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
|
||||
'temp', 'model_responses', 'model_responses_*.txt')
|
||||
_BLOCK_RE = re.compile(r'^=== (Prompt|Response) ===.*?\n(.*?)(?=^=== (?:Prompt|Response) ===|\Z)',
|
||||
re.DOTALL | re.MULTILINE)
|
||||
|
||||
|
||||
def _rel_time(mtime):
|
||||
d = int(time.time() - mtime)
|
||||
if d < 60: return f'{d}秒前'
|
||||
@@ -17,7 +14,6 @@ def _rel_time(mtime):
|
||||
if d < 86400: return f'{d // 3600}小时前'
|
||||
return f'{d // 86400}天前'
|
||||
|
||||
|
||||
def _pairs(content):
|
||||
blocks, pairs, pending = _BLOCK_RE.findall(content or ''), [], None
|
||||
for label, body in blocks:
|
||||
@@ -26,7 +22,6 @@ def _pairs(content):
|
||||
pairs.append((pending, body.strip())); pending = None
|
||||
return pairs
|
||||
|
||||
|
||||
def _first_user(pairs):
|
||||
for p, _ in pairs:
|
||||
try: msg = json.loads(p)
|
||||
@@ -43,7 +38,6 @@ def _first_user(pairs):
|
||||
if s and not s.startswith('###'): return s
|
||||
return ''
|
||||
|
||||
|
||||
def _parse_native_history(pairs):
|
||||
history = []
|
||||
for p, r in pairs:
|
||||
@@ -57,7 +51,6 @@ def _parse_native_history(pairs):
|
||||
history.append({'role': 'assistant', 'content': blocks})
|
||||
return history
|
||||
|
||||
|
||||
def list_sessions(exclude_pid=None):
|
||||
"""Newest-first list of (path, mtime, first_user_text, n_rounds)."""
|
||||
files = glob.glob(_LOG_GLOB)
|
||||
@@ -74,12 +67,8 @@ def list_sessions(exclude_pid=None):
|
||||
out.append((f, os.path.getmtime(f), _first_user(pairs), len(pairs)))
|
||||
out.sort(key=lambda x: x[1], reverse=True)
|
||||
return out
|
||||
|
||||
|
||||
_MD_ESCAPE_RE = re.compile(r'([\\`*_\[\]])')
|
||||
|
||||
def _escape_md(s):
|
||||
return _MD_ESCAPE_RE.sub(r'\\\1', s)
|
||||
def _escape_md(s): return _MD_ESCAPE_RE.sub(r'\\\1', s)
|
||||
|
||||
def format_list(sessions, limit=20):
|
||||
if not sessions: return '❌ 没有可恢复的历史会话'
|
||||
@@ -89,7 +78,6 @@ def format_list(sessions, limit=20):
|
||||
lines.append(f'{i}. `{_rel_time(mtime)}` · **{n} 轮** · {preview}')
|
||||
return '\n'.join(lines)
|
||||
|
||||
|
||||
def restore(agent, path):
|
||||
"""Restore session at path. Returns (msg, is_full)."""
|
||||
try: content = open(path, encoding='utf-8', errors='replace').read()
|
||||
@@ -110,7 +98,6 @@ def restore(agent, path):
|
||||
n = sum(1 for l in summary if l.startswith('[USER]: '))
|
||||
return f'⚠️ 非 native 格式,已降级恢复 {n} 轮摘要({name})\n(请输入新问题继续)', False
|
||||
|
||||
|
||||
def handle(agent, query, display_queue):
|
||||
"""Dispatch /continue or /continue N. Returns None if consumed else original query."""
|
||||
s = (query or '').strip()
|
||||
@@ -128,8 +115,6 @@ def handle(agent, query, display_queue):
|
||||
display_queue.put({'done': msg, 'source': 'system'})
|
||||
return None
|
||||
return query
|
||||
|
||||
|
||||
def install(cls):
|
||||
"""Wrap cls._handle_slash_cmd so /continue is handled before original dispatch."""
|
||||
orig = cls._handle_slash_cmd
|
||||
|
||||
104
llmcore.py
104
llmcore.py
@@ -238,6 +238,39 @@ def _parse_openai_sse(resp_lines, api_mode="chat_completions"):
|
||||
blocks.append({"type": "tool_use", "id": tc["id"], "name": tc["name"], "input": inp})
|
||||
return blocks
|
||||
|
||||
def _parse_openai_json(data, api_mode="chat_completions"):
|
||||
blocks = []
|
||||
if api_mode == "responses":
|
||||
usage = data.get("usage", {})
|
||||
cached = (usage.get("input_tokens_details") or {}).get("cached_tokens", 0)
|
||||
inp = usage.get("input_tokens", 0)
|
||||
if inp: print(f"[Cache] input={inp} cached={cached}")
|
||||
for item in (data.get("output") or []):
|
||||
if item.get("type") == "message":
|
||||
for p in (item.get("content") or []):
|
||||
if p.get("type") in ("output_text", "text") and p.get("text"):
|
||||
blocks.append({"type": "text", "text": p["text"]}); yield p["text"]
|
||||
elif item.get("type") == "function_call":
|
||||
try: args = json.loads(item.get("arguments", "")) if item.get("arguments") else {}
|
||||
except: args = {"_raw": item.get("arguments", "")}
|
||||
blocks.append({"type": "tool_use", "id": item.get("call_id", item.get("id", "")),
|
||||
"name": item.get("name", ""), "input": args})
|
||||
else:
|
||||
usage = data.get("usage") or {}
|
||||
if usage:
|
||||
cached = (usage.get("prompt_tokens_details") or {}).get("cached_tokens", 0)
|
||||
print(f"[Cache] input={usage.get('prompt_tokens', 0)} cached={cached}")
|
||||
msg = (data.get("choices") or [{}])[0].get("message", {})
|
||||
content = msg.get("content", "")
|
||||
if content:
|
||||
blocks.append({"type": "text", "text": content}); yield content
|
||||
for tc in (msg.get("tool_calls") or []):
|
||||
fn = tc.get("function", {})
|
||||
try: args = json.loads(fn.get("arguments", "")) if fn.get("arguments") else {}
|
||||
except: args = {"_raw": fn.get("arguments", "")}
|
||||
blocks.append({"type": "tool_use", "id": tc.get("id", ""), "name": fn.get("name", ""), "input": args})
|
||||
return blocks
|
||||
|
||||
def _stamp_oai_cache_markers(messages, model):
|
||||
"""Add cache_control to last 2 user messages for Anthropic models via OAI-compatible relay."""
|
||||
ml = model.lower()
|
||||
@@ -253,7 +286,8 @@ def _stamp_oai_cache_markers(messages, model):
|
||||
|
||||
def _openai_stream(api_base, api_key, messages, model, api_mode='chat_completions', *,
|
||||
temperature=0.5, max_tokens=None, tools=None, reasoning_effort=None,
|
||||
max_retries=0, connect_timeout=10, read_timeout=300, proxies=None):
|
||||
max_retries=0, connect_timeout=10, read_timeout=300, proxies=None,
|
||||
stream=True):
|
||||
"""Shared OpenAI-compatible streaming request with retry. Yields text chunks, returns list[content_block]."""
|
||||
ml = model.lower()
|
||||
if 'kimi' in ml or 'moonshot' in ml: temperature = 1
|
||||
@@ -261,27 +295,17 @@ def _openai_stream(api_base, api_key, messages, model, api_mode='chat_completion
|
||||
headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json", "Accept": "text/event-stream"}
|
||||
if api_mode == "responses":
|
||||
url = auto_make_url(api_base, "responses")
|
||||
payload = {"model": model, "input": _to_responses_input(messages), "stream": True, "prompt_cache_key": _RESP_CACHE_KEY}
|
||||
payload = {"model": model, "input": _to_responses_input(messages), "stream": stream, "prompt_cache_key": _RESP_CACHE_KEY}
|
||||
if reasoning_effort: payload["reasoning"] = {"effort": reasoning_effort}
|
||||
else:
|
||||
url = auto_make_url(api_base, "chat/completions")
|
||||
_stamp_oai_cache_markers(messages, model)
|
||||
payload = {"model": model, "messages": messages, "stream": True, "stream_options": {"include_usage": True}}
|
||||
payload = {"model": model, "messages": messages, "stream": stream}
|
||||
if stream: payload["stream_options"] = {"include_usage": True}
|
||||
if temperature != 1: payload["temperature"] = temperature
|
||||
if max_tokens: payload["max_tokens"] = max_tokens
|
||||
if reasoning_effort: payload["reasoning_effort"] = reasoning_effort
|
||||
if tools:
|
||||
if api_mode == "responses":
|
||||
# Responses API: flatten {type, function: {name, ...}} -> {type, name, ...}
|
||||
resp_tools = []
|
||||
for t in tools:
|
||||
if t.get("type") == "function" and "function" in t:
|
||||
rt = {"type": "function"}
|
||||
rt.update(t["function"])
|
||||
resp_tools.append(rt)
|
||||
else: resp_tools.append(t)
|
||||
payload["tools"] = resp_tools
|
||||
else: payload["tools"] = tools
|
||||
if tools: payload["tools"] = _prepare_oai_tools(tools, api_mode)
|
||||
RETRYABLE = {408, 409, 425, 429, 500, 502, 503, 504, 529}
|
||||
def _delay(resp, attempt):
|
||||
try: ra = float((resp.headers or {}).get("retry-after"))
|
||||
@@ -290,48 +314,44 @@ def _openai_stream(api_base, api_key, messages, model, api_mode='chat_completion
|
||||
for attempt in range(max_retries + 1):
|
||||
streamed = False
|
||||
try:
|
||||
with requests.post(url, headers=headers, json=payload, stream=True,
|
||||
with requests.post(url, headers=headers, json=payload, stream=stream,
|
||||
timeout=(connect_timeout, read_timeout), proxies=proxies) as r:
|
||||
if r.status_code >= 400:
|
||||
if r.status_code in RETRYABLE and attempt < max_retries:
|
||||
d = _delay(r, attempt)
|
||||
print(f"[LLM Retry] HTTP {r.status_code}, retry in {d:.1f}s ({attempt+1}/{max_retries+1})")
|
||||
time.sleep(d); continue
|
||||
# Read error body before raise (stream mode closes connection after raise)
|
||||
err_body = ""
|
||||
try: err_body = r.text.strip()[:1200]
|
||||
body = ""
|
||||
try: body = r.text.strip()[:500]
|
||||
except: pass
|
||||
try: r.raise_for_status()
|
||||
except requests.HTTPError as e:
|
||||
e._err_body = err_body; raise
|
||||
gen = _parse_openai_sse(r.iter_lines(), api_mode)
|
||||
err = f"Error: HTTP {r.status_code}" + (f": {body}" if body else "")
|
||||
yield err; return [{"type": "text", "text": err}]
|
||||
gen = _parse_openai_sse(r.iter_lines(), api_mode) if stream else _parse_openai_json(r.json(), api_mode)
|
||||
try:
|
||||
while True: streamed = True; yield next(gen)
|
||||
except StopIteration as e:
|
||||
return e.value or []
|
||||
except requests.HTTPError as e:
|
||||
resp = getattr(e, "response", None); status = getattr(resp, "status_code", None)
|
||||
if status in RETRYABLE and attempt < max_retries and not streamed:
|
||||
d = _delay(resp, attempt)
|
||||
print(f"[LLM Retry] HTTP {status}, retry in {d:.1f}s ({attempt+1}/{max_retries+1})")
|
||||
time.sleep(d); continue
|
||||
body = ""; rid = ""; ra = ""; ct = ""
|
||||
try: body = getattr(e, '_err_body', '') or (resp.text or "").strip()[:1200]
|
||||
except: pass
|
||||
try: h = resp.headers or {}; rid = h.get("x-request-id","") or h.get("request-id",""); ra = h.get("retry-after",""); ct = h.get("content-type","")
|
||||
except: pass
|
||||
err = f"Error: HTTP {status} {e}; content_type: {ct or '<empty>'}; retry_after: {ra or '<empty>'}; request_id: {rid or '<empty>'}; body: {body or '<empty>'}"
|
||||
yield err; return [{"type": "text", "text": err}]
|
||||
except (requests.Timeout, requests.ConnectionError) as e:
|
||||
if attempt < max_retries and not streamed:
|
||||
d = _delay(None, attempt)
|
||||
print(f"[LLM Retry] {type(e).__name__}, retry in {d:.1f}s ({attempt+1}/{max_retries+1})")
|
||||
time.sleep(d); continue
|
||||
err = f"Error: {type(e).__name__}: {e}"
|
||||
err = f"Error: {type(e).__name__}"
|
||||
yield err; return [{"type": "text", "text": err}]
|
||||
except Exception as e:
|
||||
err = f"Error: {e}"
|
||||
err = f"Error: {type(e).__name__}: {e}"
|
||||
yield err; return [{"type": "text", "text": err}]
|
||||
|
||||
def _prepare_oai_tools(tools, api_mode="chat_completions"):
|
||||
if api_mode == "responses":
|
||||
resp_tools = []
|
||||
for t in tools:
|
||||
if t.get("type") == "function" and "function" in t:
|
||||
rt = {"type": "function"}; rt.update(t["function"])
|
||||
resp_tools.append(rt)
|
||||
else: resp_tools.append(t)
|
||||
return resp_tools
|
||||
return tools
|
||||
|
||||
def _to_responses_input(messages):
|
||||
result = []
|
||||
@@ -492,7 +512,7 @@ class LLMSession(BaseSession):
|
||||
def raw_ask(self, messages):
|
||||
return (yield from _openai_stream(self.api_base, self.api_key, messages, self.model, self.api_mode,
|
||||
temperature=self.temperature, reasoning_effort=self.reasoning_effort,
|
||||
max_tokens=self.max_tokens, max_retries=self.max_retries,
|
||||
max_tokens=self.max_tokens, max_retries=self.max_retries, stream=self.stream,
|
||||
connect_timeout=self.connect_timeout, read_timeout=self.read_timeout, proxies=self.proxies))
|
||||
def make_messages(self, raw_list): return _msgs_claude2oai(raw_list)
|
||||
|
||||
@@ -601,10 +621,10 @@ class NativeOAISession(NativeClaudeSession):
|
||||
"""OpenAI streaming. yields text chunks, generator return = list[content_block]"""
|
||||
msgs = ([{"role": "system", "content": self.system}] if self.system else []) + _msgs_claude2oai(messages)
|
||||
return (yield from _openai_stream(self.api_base, self.api_key, msgs, self.model, self.api_mode,
|
||||
temperature=self.temperature, max_tokens=self.max_tokens,
|
||||
temperature=self.temperature, max_tokens=self.max_tokens,
|
||||
tools=self.tools, reasoning_effort=self.reasoning_effort,
|
||||
max_retries=self.max_retries, connect_timeout=self.connect_timeout,
|
||||
read_timeout=self.read_timeout, proxies=self.proxies))
|
||||
read_timeout=self.read_timeout, proxies=self.proxies, stream=self.stream))
|
||||
|
||||
def openai_tools_to_claude(tools):
|
||||
"""[{type:'function', function:{name,description,parameters}}] → [{name,description,input_schema}]."""
|
||||
@@ -823,7 +843,7 @@ class MixinSession:
|
||||
self.model = getattr(self._sessions[0], 'model', None)
|
||||
self._cur_idx, self._switched_at = 0, 0.0
|
||||
def __getattr__(self, name): return getattr(self._sessions[0], name)
|
||||
_BROADCAST_ATTRS = frozenset({'system', 'tools', 'temperature', 'max_tokens', 'reasoning_effort'})
|
||||
_BROADCAST_ATTRS = frozenset({'system', 'tools', 'temperature', 'max_tokens', 'reasoning_effort', 'history'})
|
||||
def __setattr__(self, name, value):
|
||||
if name in self._BROADCAST_ATTRS:
|
||||
for s in self._sessions:
|
||||
|
||||
Reference in New Issue
Block a user