From 998382aebefeed2b4baa12728e7e23724dd59329 Mon Sep 17 00:00:00 2001 From: remy Date: Mon, 2 Mar 2026 00:17:25 +1000 Subject: [PATCH 1/2] Add responses api_mode support for OpenAI relay --- agentmain.py | 4 +- mykey_template.py | 3 +- sidercall.py | 101 +++++++++++++++++++++++++++++++++++++--------- 3 files changed, 87 insertions(+), 21 deletions(-) diff --git a/agentmain.py b/agentmain.py index e3dd6bd..bd2e837 100644 --- a/agentmain.py +++ b/agentmain.py @@ -34,7 +34,7 @@ class GeneraticAgent: if not any(x in k for x in ['api', 'config', 'cookie']): continue try: if 'claude' in k: llm_sessions += [ClaudeSession(api_key=cfg['apikey'], api_base=cfg['apibase'], model=cfg['model'])] - if 'oai' in k: llm_sessions += [LLMSession(api_key=cfg['apikey'], api_base=cfg['apibase'], model=cfg['model'], proxy=cfg.get('proxy'))] + if 'oai' in k: llm_sessions += [LLMSession(api_key=cfg['apikey'], api_base=cfg['apibase'], model=cfg['model'], proxy=cfg.get('proxy'), api_mode=cfg.get('api_mode', 'chat_completions'))] if 'xai' in k: llm_sessions += [XaiSession(cfg, mykeys.get('proxy', ''))] if 'sider' in k: llm_sessions += [SiderLLMSession(cfg, default_model=x) for x in \ ["gemini-3.0-flash", "claude-haiku-4.5", "kimi-k2"]] @@ -168,4 +168,4 @@ if __name__ == '__main__': if 'done' in item: print(); break except KeyboardInterrupt: agent.abort() - print('\n[Interrupted]') \ No newline at end of file + print('\n[Interrupted]') diff --git a/mykey_template.py b/mykey_template.py index aea5f7e..74419e7 100644 --- a/mykey_template.py +++ b/mykey_template.py @@ -2,7 +2,8 @@ oai_config = { 'apikey':'sk-uklURcj', 'apibase':"http://113.145.139.147:3001", - 'model':"openai/gpt-5.1" + 'model':"openai/gpt-5.1", + 'api_mode':"chat_completions" # optional: "chat_completions" | "responses" } # or diff --git a/sidercall.py b/sidercall.py index 6c41283..a10696d 100644 --- a/sidercall.py +++ b/sidercall.py @@ -86,33 +86,97 @@ class ClaudeSession: return _ask_gen() if stream else ''.join(list(_ask_gen())) class LLMSession: - def __init__(self, api_key, api_base, model, context_win=12000, proxy=None): - self.api_key = api_key; self.api_base = api_base; self.default_model = model + def __init__(self, api_key, api_base, model, context_win=12000, proxy=None, api_mode="chat_completions"): + self.api_key = api_key; self.api_base = api_base.rstrip('/'); self.default_model = model self.context_win = context_win; self.raw_msgs = []; self.messages = [] self.proxies = {"http": proxy, "https": proxy} if proxy else None self.lock = threading.Lock() + mode = str(api_mode or "chat_completions").strip().lower().replace('-', '_') + if mode in ["responses", "response"]: self.api_mode = "responses" + else: self.api_mode = "chat_completions" + + def _endpoint(self, path): + if self.api_base.endswith('/v1'): return f"{self.api_base}/{path.lstrip('/')}" + return f"{self.api_base}/v1/{path.lstrip('/')}" + + def _to_responses_input(self, messages): + result = [] + for msg in messages: + role = str(msg.get("role", "user")).lower() + if role not in ["user", "assistant", "system", "developer"]: role = "user" + content = msg.get("content", "") + text_type = "output_text" if role == "assistant" else "input_text" + parts = [] + if isinstance(content, str): + if content: parts.append({"type": text_type, "text": content}) + elif isinstance(content, list): + for part in content: + if not isinstance(part, dict): continue + ptype = part.get("type") + if ptype == "text": + text = part.get("text", "") + if text: parts.append({"type": text_type, "text": text}) + elif ptype == "image_url": + url = (part.get("image_url") or {}).get("url", "") + if url and role != "assistant": parts.append({"type": "input_image", "image_url": url}) + if len(parts) == 0: parts = [{"type": text_type, "text": str(content)}] + result.append({"role": role, "content": parts}) + return result def raw_ask(self, messages, model=None, temperature=0.5): if model is None: model = self.default_model headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json", "Accept": "text/event-stream"} - payload = {"model": model, "messages": messages, "temperature": temperature, "stream": True} + if self.api_mode == "responses": + url = self._endpoint("responses") + payload = {"model": model, "input": self._to_responses_input(messages), "temperature": temperature, "stream": True} + else: + url = self._endpoint("chat/completions") + payload = {"model": model, "messages": messages, "temperature": temperature, "stream": True} try: - with requests.post(f"{self.api_base}/v1/chat/completions", headers=headers, - json=payload, stream=True, timeout=(5, 60), proxies=self.proxies) as r: + with requests.post(url, headers=headers, json=payload, stream=True, timeout=(5, 60), proxies=self.proxies) as r: r.raise_for_status() - buffer = '' + buffer = ''; seen_delta = False for line in r.iter_lines(): - line = line.decode("utf-8") + line = line.decode("utf-8") if isinstance(line, bytes) else line if not line or not line.startswith("data:"): continue data = line[5:].lstrip() if data == "[DONE]": break - obj = json.loads(data); ch = (obj.get("choices") or [{}])[0] - finish_reason = ch.get("finish_reason") - delta = (ch.get("delta") or {}).get("content") - if delta: - yield delta; buffer += delta - if '' in buffer[-30:]: break - if finish_reason: break + try: obj = json.loads(data) + except: continue + if self.api_mode == "responses": + etype = obj.get("type", "") + delta = obj.get("delta", "") if etype == "response.output_text.delta" else "" + if delta: + seen_delta = True + yield delta; buffer += delta + elif etype == "response.output_text.done" and not seen_delta: + text = obj.get("text", "") + if text: + yield text; buffer += text + elif etype == "error": + err = obj.get("error", {}) + emsg = err.get("message", str(err)) if isinstance(err, dict) else str(err) + if emsg: + yield f"Error: {emsg}" + return + elif etype == "response.completed": + break + else: + ch = (obj.get("choices") or [{}])[0] + finish_reason = ch.get("finish_reason") + delta = (ch.get("delta") or {}).get("content") + if delta: + yield delta; buffer += delta + if finish_reason: break + if '' in buffer[-30:]: break + except requests.HTTPError as e: + resp = getattr(e, "response", None) + status = getattr(resp, "status_code", "unknown") + body = "" + try: body = (resp.text or "").strip() + except: body = "" + body = body[:1200] if body else "" + yield f"Error: HTTP {status} {str(e)}; body: {body}" except Exception as e: yield f"Error: {str(e)}" @@ -121,10 +185,11 @@ class LLMSession: messages = [] for i, msg in enumerate(raw_list): prompt = msg['prompt'] - if omit_images and msg['image']: messages.append({"role": msg['role'], "content": "[Image omitted, if you needed it, ask me]\n" + prompt}) - elif not omit_images and msg['image']: + image = msg.get('image') + if omit_images and image: messages.append({"role": msg['role'], "content": "[Image omitted, if you needed it, ask me]\n" + prompt}) + elif not omit_images and image: messages.append({"role": msg['role'], "content": [ - {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{msg['image']}"}}, + {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image}"}}, {"type": "text", "text": prompt} ]}) else: messages.append({"role": msg['role'], "content": prompt}) @@ -420,4 +485,4 @@ if __name__ == "__main__": response = get_final(llmclient.chat( messages=[{"role": "user", "content": "10.176.45.12"}] )) - print(response.content) \ No newline at end of file + print(response.content) From df3b3476535172039f721febaa0a33bfe488c938 Mon Sep 17 00:00:00 2001 From: remy Date: Mon, 2 Mar 2026 00:38:27 +1000 Subject: [PATCH 2/2] Add retry/backoff and richer error logs for relay calls --- agentmain.py | 11 +++- mykey_template.py | 5 +- sidercall.py | 150 +++++++++++++++++++++++++++++++--------------- 3 files changed, 116 insertions(+), 50 deletions(-) diff --git a/agentmain.py b/agentmain.py index bd2e837..dead59c 100644 --- a/agentmain.py +++ b/agentmain.py @@ -34,7 +34,16 @@ class GeneraticAgent: if not any(x in k for x in ['api', 'config', 'cookie']): continue try: if 'claude' in k: llm_sessions += [ClaudeSession(api_key=cfg['apikey'], api_base=cfg['apibase'], model=cfg['model'])] - if 'oai' in k: llm_sessions += [LLMSession(api_key=cfg['apikey'], api_base=cfg['apibase'], model=cfg['model'], proxy=cfg.get('proxy'), api_mode=cfg.get('api_mode', 'chat_completions'))] + if 'oai' in k: llm_sessions += [LLMSession( + api_key=cfg['apikey'], + api_base=cfg['apibase'], + model=cfg['model'], + proxy=cfg.get('proxy'), + api_mode=cfg.get('api_mode', 'chat_completions'), + max_retries=cfg.get('max_retries', 2), + connect_timeout=cfg.get('connect_timeout', 10), + read_timeout=cfg.get('read_timeout', 120), + )] if 'xai' in k: llm_sessions += [XaiSession(cfg, mykeys.get('proxy', ''))] if 'sider' in k: llm_sessions += [SiderLLMSession(cfg, default_model=x) for x in \ ["gemini-3.0-flash", "claude-haiku-4.5", "kimi-k2"]] diff --git a/mykey_template.py b/mykey_template.py index 74419e7..20de599 100644 --- a/mykey_template.py +++ b/mykey_template.py @@ -3,7 +3,10 @@ oai_config = { 'apikey':'sk-uklURcj', 'apibase':"http://113.145.139.147:3001", 'model':"openai/gpt-5.1", - 'api_mode':"chat_completions" # optional: "chat_completions" | "responses" + 'api_mode':"chat_completions", # optional: "chat_completions" | "responses" + 'max_retries': 2, # optional: retries for 429/timeout/5xx + 'connect_timeout': 10, # optional: seconds + 'read_timeout': 120 # optional: seconds (stream read) } # or diff --git a/sidercall.py b/sidercall.py index a10696d..0a6a87a 100644 --- a/sidercall.py +++ b/sidercall.py @@ -86,11 +86,15 @@ class ClaudeSession: return _ask_gen() if stream else ''.join(list(_ask_gen())) class LLMSession: - def __init__(self, api_key, api_base, model, context_win=12000, proxy=None, api_mode="chat_completions"): + def __init__(self, api_key, api_base, model, context_win=12000, proxy=None, api_mode="chat_completions", + max_retries=2, connect_timeout=10, read_timeout=120): self.api_key = api_key; self.api_base = api_base.rstrip('/'); self.default_model = model self.context_win = context_win; self.raw_msgs = []; self.messages = [] self.proxies = {"http": proxy, "https": proxy} if proxy else None self.lock = threading.Lock() + self.max_retries = max(0, int(max_retries)) + self.connect_timeout = max(1, int(connect_timeout)) + self.read_timeout = max(5, int(read_timeout)) mode = str(api_mode or "chat_completions").strip().lower().replace('-', '_') if mode in ["responses", "response"]: self.api_mode = "responses" else: self.api_mode = "chat_completions" @@ -99,6 +103,18 @@ class LLMSession: if self.api_base.endswith('/v1'): return f"{self.api_base}/{path.lstrip('/')}" return f"{self.api_base}/v1/{path.lstrip('/')}" + def _retry_delay(self, resp, attempt): + retry_after = None + try: + if resp is not None: + retry_after = (resp.headers or {}).get("retry-after") + if retry_after is not None: + retry_after = float(retry_after) + except: + retry_after = None + if retry_after is None: retry_after = min(30.0, 1.5 * (2 ** attempt)) + return max(0.5, float(retry_after)) + def _to_responses_input(self, messages): result = [] for msg in messages: @@ -132,53 +148,91 @@ class LLMSession: else: url = self._endpoint("chat/completions") payload = {"model": model, "messages": messages, "temperature": temperature, "stream": True} - try: - with requests.post(url, headers=headers, json=payload, stream=True, timeout=(5, 60), proxies=self.proxies) as r: - r.raise_for_status() - buffer = ''; seen_delta = False - for line in r.iter_lines(): - line = line.decode("utf-8") if isinstance(line, bytes) else line - if not line or not line.startswith("data:"): continue - data = line[5:].lstrip() - if data == "[DONE]": break - try: obj = json.loads(data) - except: continue - if self.api_mode == "responses": - etype = obj.get("type", "") - delta = obj.get("delta", "") if etype == "response.output_text.delta" else "" - if delta: - seen_delta = True - yield delta; buffer += delta - elif etype == "response.output_text.done" and not seen_delta: - text = obj.get("text", "") - if text: - yield text; buffer += text - elif etype == "error": - err = obj.get("error", {}) - emsg = err.get("message", str(err)) if isinstance(err, dict) else str(err) - if emsg: - yield f"Error: {emsg}" - return - elif etype == "response.completed": - break - else: - ch = (obj.get("choices") or [{}])[0] - finish_reason = ch.get("finish_reason") - delta = (ch.get("delta") or {}).get("content") - if delta: - yield delta; buffer += delta - if finish_reason: break - if '' in buffer[-30:]: break - except requests.HTTPError as e: - resp = getattr(e, "response", None) - status = getattr(resp, "status_code", "unknown") - body = "" - try: body = (resp.text or "").strip() - except: body = "" - body = body[:1200] if body else "" - yield f"Error: HTTP {status} {str(e)}; body: {body}" - except Exception as e: - yield f"Error: {str(e)}" + for attempt in range(self.max_retries + 1): + streamed_any = False + try: + with requests.post(url, headers=headers, json=payload, stream=True, + timeout=(self.connect_timeout, self.read_timeout), proxies=self.proxies) as r: + if r.status_code >= 400: + retryable = r.status_code in [408, 409, 425, 429, 500, 502, 503, 504] + if retryable and attempt < self.max_retries: + delay = self._retry_delay(r, attempt) + print(f"[LLM Retry] HTTP {r.status_code}, retry in {delay:.1f}s ({attempt+1}/{self.max_retries+1})") + time.sleep(delay) + continue + r.raise_for_status() + buffer = ''; seen_delta = False + for line in r.iter_lines(): + line = line.decode("utf-8") if isinstance(line, bytes) else line + if not line or not line.startswith("data:"): continue + data = line[5:].lstrip() + if data == "[DONE]": break + try: obj = json.loads(data) + except: continue + if self.api_mode == "responses": + etype = obj.get("type", "") + delta = obj.get("delta", "") if etype == "response.output_text.delta" else "" + if delta: + streamed_any = True; seen_delta = True + yield delta; buffer += delta + elif etype == "response.output_text.done" and not seen_delta: + text = obj.get("text", "") + if text: + streamed_any = True + yield text; buffer += text + elif etype == "error": + err = obj.get("error", {}) + emsg = err.get("message", str(err)) if isinstance(err, dict) else str(err) + if emsg: + yield f"Error: {emsg}" + return + elif etype == "response.completed": + break + else: + ch = (obj.get("choices") or [{}])[0] + finish_reason = ch.get("finish_reason") + delta = (ch.get("delta") or {}).get("content") + if delta: + streamed_any = True + yield delta; buffer += delta + if finish_reason: break + if '' in buffer[-30:]: break + return + except requests.HTTPError as e: + resp = getattr(e, "response", None) + status = getattr(resp, "status_code", "unknown") + retryable = isinstance(status, int) and status in [408, 409, 425, 429, 500, 502, 503, 504] + if retryable and attempt < self.max_retries and not streamed_any: + delay = self._retry_delay(resp, attempt) + print(f"[LLM Retry] HTTP {status}, retry in {delay:.1f}s ({attempt+1}/{self.max_retries+1})") + time.sleep(delay) + continue + body = "" + try: body = (resp.text or "").strip() + except: body = "" + body = body[:1200] if body else "" + rid = "" + retry_after = "" + ct = "" + try: + h = resp.headers or {} + rid = h.get("x-request-id") or h.get("request-id") or "" + retry_after = h.get("retry-after") or "" + ct = h.get("content-type") or "" + except: pass + yield f"Error: HTTP {status} {str(e)}; content_type: {ct or ''}; retry_after: {retry_after or ''}; request_id: {rid or ''}; body: {body}" + return + except (requests.Timeout, requests.ConnectionError) as e: + if attempt < self.max_retries and not streamed_any: + delay = self._retry_delay(None, attempt) + print(f"[LLM Retry] {type(e).__name__}, retry in {delay:.1f}s ({attempt+1}/{self.max_retries+1})") + time.sleep(delay) + continue + yield f"Error: {type(e).__name__}: {str(e)}" + return + except Exception as e: + yield f"Error: {str(e)}" + return def make_messages(self, raw_list, omit_images=True): compress_history_tags(raw_list)