Add retry/backoff and richer error logs for relay calls

This commit is contained in:
remy
2026-03-02 00:38:27 +10:00
parent 998382aebe
commit df3b347653
3 changed files with 116 additions and 50 deletions

View File

@@ -34,7 +34,16 @@ class GeneraticAgent:
if not any(x in k for x in ['api', 'config', 'cookie']): continue if not any(x in k for x in ['api', 'config', 'cookie']): continue
try: try:
if 'claude' in k: llm_sessions += [ClaudeSession(api_key=cfg['apikey'], api_base=cfg['apibase'], model=cfg['model'])] if 'claude' in k: llm_sessions += [ClaudeSession(api_key=cfg['apikey'], api_base=cfg['apibase'], model=cfg['model'])]
if 'oai' in k: llm_sessions += [LLMSession(api_key=cfg['apikey'], api_base=cfg['apibase'], model=cfg['model'], proxy=cfg.get('proxy'), api_mode=cfg.get('api_mode', 'chat_completions'))] if 'oai' in k: llm_sessions += [LLMSession(
api_key=cfg['apikey'],
api_base=cfg['apibase'],
model=cfg['model'],
proxy=cfg.get('proxy'),
api_mode=cfg.get('api_mode', 'chat_completions'),
max_retries=cfg.get('max_retries', 2),
connect_timeout=cfg.get('connect_timeout', 10),
read_timeout=cfg.get('read_timeout', 120),
)]
if 'xai' in k: llm_sessions += [XaiSession(cfg, mykeys.get('proxy', ''))] if 'xai' in k: llm_sessions += [XaiSession(cfg, mykeys.get('proxy', ''))]
if 'sider' in k: llm_sessions += [SiderLLMSession(cfg, default_model=x) for x in \ if 'sider' in k: llm_sessions += [SiderLLMSession(cfg, default_model=x) for x in \
["gemini-3.0-flash", "claude-haiku-4.5", "kimi-k2"]] ["gemini-3.0-flash", "claude-haiku-4.5", "kimi-k2"]]

View File

@@ -3,7 +3,10 @@ oai_config = {
'apikey':'sk-uklURcj', 'apikey':'sk-uklURcj',
'apibase':"http://113.145.139.147:3001", 'apibase':"http://113.145.139.147:3001",
'model':"openai/gpt-5.1", 'model':"openai/gpt-5.1",
'api_mode':"chat_completions" # optional: "chat_completions" | "responses" 'api_mode':"chat_completions", # optional: "chat_completions" | "responses"
'max_retries': 2, # optional: retries for 429/timeout/5xx
'connect_timeout': 10, # optional: seconds
'read_timeout': 120 # optional: seconds (stream read)
} }
# or # or

View File

@@ -86,11 +86,15 @@ class ClaudeSession:
return _ask_gen() if stream else ''.join(list(_ask_gen())) return _ask_gen() if stream else ''.join(list(_ask_gen()))
class LLMSession: class LLMSession:
def __init__(self, api_key, api_base, model, context_win=12000, proxy=None, api_mode="chat_completions"): def __init__(self, api_key, api_base, model, context_win=12000, proxy=None, api_mode="chat_completions",
max_retries=2, connect_timeout=10, read_timeout=120):
self.api_key = api_key; self.api_base = api_base.rstrip('/'); self.default_model = model self.api_key = api_key; self.api_base = api_base.rstrip('/'); self.default_model = model
self.context_win = context_win; self.raw_msgs = []; self.messages = [] self.context_win = context_win; self.raw_msgs = []; self.messages = []
self.proxies = {"http": proxy, "https": proxy} if proxy else None self.proxies = {"http": proxy, "https": proxy} if proxy else None
self.lock = threading.Lock() self.lock = threading.Lock()
self.max_retries = max(0, int(max_retries))
self.connect_timeout = max(1, int(connect_timeout))
self.read_timeout = max(5, int(read_timeout))
mode = str(api_mode or "chat_completions").strip().lower().replace('-', '_') mode = str(api_mode or "chat_completions").strip().lower().replace('-', '_')
if mode in ["responses", "response"]: self.api_mode = "responses" if mode in ["responses", "response"]: self.api_mode = "responses"
else: self.api_mode = "chat_completions" else: self.api_mode = "chat_completions"
@@ -99,6 +103,18 @@ class LLMSession:
if self.api_base.endswith('/v1'): return f"{self.api_base}/{path.lstrip('/')}" if self.api_base.endswith('/v1'): return f"{self.api_base}/{path.lstrip('/')}"
return f"{self.api_base}/v1/{path.lstrip('/')}" return f"{self.api_base}/v1/{path.lstrip('/')}"
def _retry_delay(self, resp, attempt):
retry_after = None
try:
if resp is not None:
retry_after = (resp.headers or {}).get("retry-after")
if retry_after is not None:
retry_after = float(retry_after)
except:
retry_after = None
if retry_after is None: retry_after = min(30.0, 1.5 * (2 ** attempt))
return max(0.5, float(retry_after))
def _to_responses_input(self, messages): def _to_responses_input(self, messages):
result = [] result = []
for msg in messages: for msg in messages:
@@ -132,53 +148,91 @@ class LLMSession:
else: else:
url = self._endpoint("chat/completions") url = self._endpoint("chat/completions")
payload = {"model": model, "messages": messages, "temperature": temperature, "stream": True} payload = {"model": model, "messages": messages, "temperature": temperature, "stream": True}
try: for attempt in range(self.max_retries + 1):
with requests.post(url, headers=headers, json=payload, stream=True, timeout=(5, 60), proxies=self.proxies) as r: streamed_any = False
r.raise_for_status() try:
buffer = ''; seen_delta = False with requests.post(url, headers=headers, json=payload, stream=True,
for line in r.iter_lines(): timeout=(self.connect_timeout, self.read_timeout), proxies=self.proxies) as r:
line = line.decode("utf-8") if isinstance(line, bytes) else line if r.status_code >= 400:
if not line or not line.startswith("data:"): continue retryable = r.status_code in [408, 409, 425, 429, 500, 502, 503, 504]
data = line[5:].lstrip() if retryable and attempt < self.max_retries:
if data == "[DONE]": break delay = self._retry_delay(r, attempt)
try: obj = json.loads(data) print(f"[LLM Retry] HTTP {r.status_code}, retry in {delay:.1f}s ({attempt+1}/{self.max_retries+1})")
except: continue time.sleep(delay)
if self.api_mode == "responses": continue
etype = obj.get("type", "") r.raise_for_status()
delta = obj.get("delta", "") if etype == "response.output_text.delta" else "" buffer = ''; seen_delta = False
if delta: for line in r.iter_lines():
seen_delta = True line = line.decode("utf-8") if isinstance(line, bytes) else line
yield delta; buffer += delta if not line or not line.startswith("data:"): continue
elif etype == "response.output_text.done" and not seen_delta: data = line[5:].lstrip()
text = obj.get("text", "") if data == "[DONE]": break
if text: try: obj = json.loads(data)
yield text; buffer += text except: continue
elif etype == "error": if self.api_mode == "responses":
err = obj.get("error", {}) etype = obj.get("type", "")
emsg = err.get("message", str(err)) if isinstance(err, dict) else str(err) delta = obj.get("delta", "") if etype == "response.output_text.delta" else ""
if emsg: if delta:
yield f"Error: {emsg}" streamed_any = True; seen_delta = True
return yield delta; buffer += delta
elif etype == "response.completed": elif etype == "response.output_text.done" and not seen_delta:
break text = obj.get("text", "")
else: if text:
ch = (obj.get("choices") or [{}])[0] streamed_any = True
finish_reason = ch.get("finish_reason") yield text; buffer += text
delta = (ch.get("delta") or {}).get("content") elif etype == "error":
if delta: err = obj.get("error", {})
yield delta; buffer += delta emsg = err.get("message", str(err)) if isinstance(err, dict) else str(err)
if finish_reason: break if emsg:
if '</tool_use>' in buffer[-30:]: break yield f"Error: {emsg}"
except requests.HTTPError as e: return
resp = getattr(e, "response", None) elif etype == "response.completed":
status = getattr(resp, "status_code", "unknown") break
body = "" else:
try: body = (resp.text or "").strip() ch = (obj.get("choices") or [{}])[0]
except: body = "" finish_reason = ch.get("finish_reason")
body = body[:1200] if body else "<empty>" delta = (ch.get("delta") or {}).get("content")
yield f"Error: HTTP {status} {str(e)}; body: {body}" if delta:
except Exception as e: streamed_any = True
yield f"Error: {str(e)}" yield delta; buffer += delta
if finish_reason: break
if '</tool_use>' in buffer[-30:]: break
return
except requests.HTTPError as e:
resp = getattr(e, "response", None)
status = getattr(resp, "status_code", "unknown")
retryable = isinstance(status, int) and status in [408, 409, 425, 429, 500, 502, 503, 504]
if retryable and attempt < self.max_retries and not streamed_any:
delay = self._retry_delay(resp, attempt)
print(f"[LLM Retry] HTTP {status}, retry in {delay:.1f}s ({attempt+1}/{self.max_retries+1})")
time.sleep(delay)
continue
body = ""
try: body = (resp.text or "").strip()
except: body = ""
body = body[:1200] if body else "<empty>"
rid = ""
retry_after = ""
ct = ""
try:
h = resp.headers or {}
rid = h.get("x-request-id") or h.get("request-id") or ""
retry_after = h.get("retry-after") or ""
ct = h.get("content-type") or ""
except: pass
yield f"Error: HTTP {status} {str(e)}; content_type: {ct or '<empty>'}; retry_after: {retry_after or '<empty>'}; request_id: {rid or '<empty>'}; body: {body}"
return
except (requests.Timeout, requests.ConnectionError) as e:
if attempt < self.max_retries and not streamed_any:
delay = self._retry_delay(None, attempt)
print(f"[LLM Retry] {type(e).__name__}, retry in {delay:.1f}s ({attempt+1}/{self.max_retries+1})")
time.sleep(delay)
continue
yield f"Error: {type(e).__name__}: {str(e)}"
return
except Exception as e:
yield f"Error: {str(e)}"
return
def make_messages(self, raw_list, omit_images=True): def make_messages(self, raw_list, omit_images=True):
compress_history_tags(raw_list) compress_history_tags(raw_list)