Merge pull request #1 from redreamality/fix/oai-responses-api-mode
Add responses api_mode support for OpenAI relay
This commit is contained in:
11
agentmain.py
11
agentmain.py
@@ -34,7 +34,16 @@ class GeneraticAgent:
|
|||||||
if not any(x in k for x in ['api', 'config', 'cookie']): continue
|
if not any(x in k for x in ['api', 'config', 'cookie']): continue
|
||||||
try:
|
try:
|
||||||
if 'claude' in k: llm_sessions += [ClaudeSession(api_key=cfg['apikey'], api_base=cfg['apibase'], model=cfg['model'])]
|
if 'claude' in k: llm_sessions += [ClaudeSession(api_key=cfg['apikey'], api_base=cfg['apibase'], model=cfg['model'])]
|
||||||
if 'oai' in k: llm_sessions += [LLMSession(api_key=cfg['apikey'], api_base=cfg['apibase'], model=cfg['model'], proxy=cfg.get('proxy'))]
|
if 'oai' in k: llm_sessions += [LLMSession(
|
||||||
|
api_key=cfg['apikey'],
|
||||||
|
api_base=cfg['apibase'],
|
||||||
|
model=cfg['model'],
|
||||||
|
proxy=cfg.get('proxy'),
|
||||||
|
api_mode=cfg.get('api_mode', 'chat_completions'),
|
||||||
|
max_retries=cfg.get('max_retries', 2),
|
||||||
|
connect_timeout=cfg.get('connect_timeout', 10),
|
||||||
|
read_timeout=cfg.get('read_timeout', 120),
|
||||||
|
)]
|
||||||
if 'xai' in k: llm_sessions += [XaiSession(cfg, mykeys.get('proxy', ''))]
|
if 'xai' in k: llm_sessions += [XaiSession(cfg, mykeys.get('proxy', ''))]
|
||||||
if 'sider' in k: llm_sessions += [SiderLLMSession(cfg, default_model=x) for x in \
|
if 'sider' in k: llm_sessions += [SiderLLMSession(cfg, default_model=x) for x in \
|
||||||
["gemini-3.0-flash", "claude-haiku-4.5", "kimi-k2"]]
|
["gemini-3.0-flash", "claude-haiku-4.5", "kimi-k2"]]
|
||||||
|
|||||||
@@ -2,7 +2,11 @@
|
|||||||
oai_config = {
|
oai_config = {
|
||||||
'apikey':'sk-uklURcj',
|
'apikey':'sk-uklURcj',
|
||||||
'apibase':"http://113.145.139.147:3001",
|
'apibase':"http://113.145.139.147:3001",
|
||||||
'model':"openai/gpt-5.1"
|
'model':"openai/gpt-5.1",
|
||||||
|
'api_mode':"chat_completions", # optional: "chat_completions" | "responses"
|
||||||
|
'max_retries': 2, # optional: retries for 429/timeout/5xx
|
||||||
|
'connect_timeout': 10, # optional: seconds
|
||||||
|
'read_timeout': 120 # optional: seconds (stream read)
|
||||||
}
|
}
|
||||||
|
|
||||||
# or
|
# or
|
||||||
|
|||||||
167
sidercall.py
167
sidercall.py
@@ -86,45 +86,164 @@ class ClaudeSession:
|
|||||||
return _ask_gen() if stream else ''.join(list(_ask_gen()))
|
return _ask_gen() if stream else ''.join(list(_ask_gen()))
|
||||||
|
|
||||||
class LLMSession:
|
class LLMSession:
|
||||||
def __init__(self, api_key, api_base, model, context_win=12000, proxy=None):
|
def __init__(self, api_key, api_base, model, context_win=12000, proxy=None, api_mode="chat_completions",
|
||||||
self.api_key = api_key; self.api_base = api_base; self.default_model = model
|
max_retries=2, connect_timeout=10, read_timeout=120):
|
||||||
|
self.api_key = api_key; self.api_base = api_base.rstrip('/'); self.default_model = model
|
||||||
self.context_win = context_win; self.raw_msgs = []; self.messages = []
|
self.context_win = context_win; self.raw_msgs = []; self.messages = []
|
||||||
self.proxies = {"http": proxy, "https": proxy} if proxy else None
|
self.proxies = {"http": proxy, "https": proxy} if proxy else None
|
||||||
self.lock = threading.Lock()
|
self.lock = threading.Lock()
|
||||||
|
self.max_retries = max(0, int(max_retries))
|
||||||
|
self.connect_timeout = max(1, int(connect_timeout))
|
||||||
|
self.read_timeout = max(5, int(read_timeout))
|
||||||
|
mode = str(api_mode or "chat_completions").strip().lower().replace('-', '_')
|
||||||
|
if mode in ["responses", "response"]: self.api_mode = "responses"
|
||||||
|
else: self.api_mode = "chat_completions"
|
||||||
|
|
||||||
|
def _endpoint(self, path):
|
||||||
|
if self.api_base.endswith('/v1'): return f"{self.api_base}/{path.lstrip('/')}"
|
||||||
|
return f"{self.api_base}/v1/{path.lstrip('/')}"
|
||||||
|
|
||||||
|
def _retry_delay(self, resp, attempt):
|
||||||
|
retry_after = None
|
||||||
|
try:
|
||||||
|
if resp is not None:
|
||||||
|
retry_after = (resp.headers or {}).get("retry-after")
|
||||||
|
if retry_after is not None:
|
||||||
|
retry_after = float(retry_after)
|
||||||
|
except:
|
||||||
|
retry_after = None
|
||||||
|
if retry_after is None: retry_after = min(30.0, 1.5 * (2 ** attempt))
|
||||||
|
return max(0.5, float(retry_after))
|
||||||
|
|
||||||
|
def _to_responses_input(self, messages):
|
||||||
|
result = []
|
||||||
|
for msg in messages:
|
||||||
|
role = str(msg.get("role", "user")).lower()
|
||||||
|
if role not in ["user", "assistant", "system", "developer"]: role = "user"
|
||||||
|
content = msg.get("content", "")
|
||||||
|
text_type = "output_text" if role == "assistant" else "input_text"
|
||||||
|
parts = []
|
||||||
|
if isinstance(content, str):
|
||||||
|
if content: parts.append({"type": text_type, "text": content})
|
||||||
|
elif isinstance(content, list):
|
||||||
|
for part in content:
|
||||||
|
if not isinstance(part, dict): continue
|
||||||
|
ptype = part.get("type")
|
||||||
|
if ptype == "text":
|
||||||
|
text = part.get("text", "")
|
||||||
|
if text: parts.append({"type": text_type, "text": text})
|
||||||
|
elif ptype == "image_url":
|
||||||
|
url = (part.get("image_url") or {}).get("url", "")
|
||||||
|
if url and role != "assistant": parts.append({"type": "input_image", "image_url": url})
|
||||||
|
if len(parts) == 0: parts = [{"type": text_type, "text": str(content)}]
|
||||||
|
result.append({"role": role, "content": parts})
|
||||||
|
return result
|
||||||
|
|
||||||
def raw_ask(self, messages, model=None, temperature=0.5):
|
def raw_ask(self, messages, model=None, temperature=0.5):
|
||||||
if model is None: model = self.default_model
|
if model is None: model = self.default_model
|
||||||
headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json", "Accept": "text/event-stream"}
|
headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json", "Accept": "text/event-stream"}
|
||||||
payload = {"model": model, "messages": messages, "temperature": temperature, "stream": True}
|
if self.api_mode == "responses":
|
||||||
try:
|
url = self._endpoint("responses")
|
||||||
with requests.post(f"{self.api_base}/v1/chat/completions", headers=headers,
|
payload = {"model": model, "input": self._to_responses_input(messages), "temperature": temperature, "stream": True}
|
||||||
json=payload, stream=True, timeout=(5, 60), proxies=self.proxies) as r:
|
else:
|
||||||
r.raise_for_status()
|
url = self._endpoint("chat/completions")
|
||||||
buffer = ''
|
payload = {"model": model, "messages": messages, "temperature": temperature, "stream": True}
|
||||||
for line in r.iter_lines():
|
for attempt in range(self.max_retries + 1):
|
||||||
line = line.decode("utf-8")
|
streamed_any = False
|
||||||
if not line or not line.startswith("data:"): continue
|
try:
|
||||||
data = line[5:].lstrip()
|
with requests.post(url, headers=headers, json=payload, stream=True,
|
||||||
if data == "[DONE]": break
|
timeout=(self.connect_timeout, self.read_timeout), proxies=self.proxies) as r:
|
||||||
obj = json.loads(data); ch = (obj.get("choices") or [{}])[0]
|
if r.status_code >= 400:
|
||||||
finish_reason = ch.get("finish_reason")
|
retryable = r.status_code in [408, 409, 425, 429, 500, 502, 503, 504]
|
||||||
delta = (ch.get("delta") or {}).get("content")
|
if retryable and attempt < self.max_retries:
|
||||||
if delta:
|
delay = self._retry_delay(r, attempt)
|
||||||
yield delta; buffer += delta
|
print(f"[LLM Retry] HTTP {r.status_code}, retry in {delay:.1f}s ({attempt+1}/{self.max_retries+1})")
|
||||||
|
time.sleep(delay)
|
||||||
|
continue
|
||||||
|
r.raise_for_status()
|
||||||
|
buffer = ''; seen_delta = False
|
||||||
|
for line in r.iter_lines():
|
||||||
|
line = line.decode("utf-8") if isinstance(line, bytes) else line
|
||||||
|
if not line or not line.startswith("data:"): continue
|
||||||
|
data = line[5:].lstrip()
|
||||||
|
if data == "[DONE]": break
|
||||||
|
try: obj = json.loads(data)
|
||||||
|
except: continue
|
||||||
|
if self.api_mode == "responses":
|
||||||
|
etype = obj.get("type", "")
|
||||||
|
delta = obj.get("delta", "") if etype == "response.output_text.delta" else ""
|
||||||
|
if delta:
|
||||||
|
streamed_any = True; seen_delta = True
|
||||||
|
yield delta; buffer += delta
|
||||||
|
elif etype == "response.output_text.done" and not seen_delta:
|
||||||
|
text = obj.get("text", "")
|
||||||
|
if text:
|
||||||
|
streamed_any = True
|
||||||
|
yield text; buffer += text
|
||||||
|
elif etype == "error":
|
||||||
|
err = obj.get("error", {})
|
||||||
|
emsg = err.get("message", str(err)) if isinstance(err, dict) else str(err)
|
||||||
|
if emsg:
|
||||||
|
yield f"Error: {emsg}"
|
||||||
|
return
|
||||||
|
elif etype == "response.completed":
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
ch = (obj.get("choices") or [{}])[0]
|
||||||
|
finish_reason = ch.get("finish_reason")
|
||||||
|
delta = (ch.get("delta") or {}).get("content")
|
||||||
|
if delta:
|
||||||
|
streamed_any = True
|
||||||
|
yield delta; buffer += delta
|
||||||
|
if finish_reason: break
|
||||||
if '</tool_use>' in buffer[-30:]: break
|
if '</tool_use>' in buffer[-30:]: break
|
||||||
if finish_reason: break
|
return
|
||||||
except Exception as e:
|
except requests.HTTPError as e:
|
||||||
yield f"Error: {str(e)}"
|
resp = getattr(e, "response", None)
|
||||||
|
status = getattr(resp, "status_code", "unknown")
|
||||||
|
retryable = isinstance(status, int) and status in [408, 409, 425, 429, 500, 502, 503, 504]
|
||||||
|
if retryable and attempt < self.max_retries and not streamed_any:
|
||||||
|
delay = self._retry_delay(resp, attempt)
|
||||||
|
print(f"[LLM Retry] HTTP {status}, retry in {delay:.1f}s ({attempt+1}/{self.max_retries+1})")
|
||||||
|
time.sleep(delay)
|
||||||
|
continue
|
||||||
|
body = ""
|
||||||
|
try: body = (resp.text or "").strip()
|
||||||
|
except: body = ""
|
||||||
|
body = body[:1200] if body else "<empty>"
|
||||||
|
rid = ""
|
||||||
|
retry_after = ""
|
||||||
|
ct = ""
|
||||||
|
try:
|
||||||
|
h = resp.headers or {}
|
||||||
|
rid = h.get("x-request-id") or h.get("request-id") or ""
|
||||||
|
retry_after = h.get("retry-after") or ""
|
||||||
|
ct = h.get("content-type") or ""
|
||||||
|
except: pass
|
||||||
|
yield f"Error: HTTP {status} {str(e)}; content_type: {ct or '<empty>'}; retry_after: {retry_after or '<empty>'}; request_id: {rid or '<empty>'}; body: {body}"
|
||||||
|
return
|
||||||
|
except (requests.Timeout, requests.ConnectionError) as e:
|
||||||
|
if attempt < self.max_retries and not streamed_any:
|
||||||
|
delay = self._retry_delay(None, attempt)
|
||||||
|
print(f"[LLM Retry] {type(e).__name__}, retry in {delay:.1f}s ({attempt+1}/{self.max_retries+1})")
|
||||||
|
time.sleep(delay)
|
||||||
|
continue
|
||||||
|
yield f"Error: {type(e).__name__}: {str(e)}"
|
||||||
|
return
|
||||||
|
except Exception as e:
|
||||||
|
yield f"Error: {str(e)}"
|
||||||
|
return
|
||||||
|
|
||||||
def make_messages(self, raw_list, omit_images=True):
|
def make_messages(self, raw_list, omit_images=True):
|
||||||
compress_history_tags(raw_list)
|
compress_history_tags(raw_list)
|
||||||
messages = []
|
messages = []
|
||||||
for i, msg in enumerate(raw_list):
|
for i, msg in enumerate(raw_list):
|
||||||
prompt = msg['prompt']
|
prompt = msg['prompt']
|
||||||
if omit_images and msg['image']: messages.append({"role": msg['role'], "content": "[Image omitted, if you needed it, ask me]\n" + prompt})
|
image = msg.get('image')
|
||||||
elif not omit_images and msg['image']:
|
if omit_images and image: messages.append({"role": msg['role'], "content": "[Image omitted, if you needed it, ask me]\n" + prompt})
|
||||||
|
elif not omit_images and image:
|
||||||
messages.append({"role": msg['role'], "content": [
|
messages.append({"role": msg['role'], "content": [
|
||||||
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{msg['image']}"}},
|
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image}"}},
|
||||||
{"type": "text", "text": prompt} ]})
|
{"type": "text", "text": prompt} ]})
|
||||||
else:
|
else:
|
||||||
messages.append({"role": msg['role'], "content": prompt})
|
messages.append({"role": msg['role'], "content": prompt})
|
||||||
|
|||||||
Reference in New Issue
Block a user