refactor: remove XAI/Gemini sessions, add MixinSession fallback, improve error handling

- Remove GeminiSession and XaiSession from llmcore.py
- Add MixinSession: multi-endpoint fallback with exponential backoff
- ga.py: handle empty/incomplete/max_tokens responses
- Update mykey_template.py and GETTING_STARTED.md accordingly
This commit is contained in:
Liang Jiaqing
2026-03-28 08:14:35 +08:00
parent 38e2a460e3
commit a5dbce396a
5 changed files with 52 additions and 82 deletions

View File

@@ -91,7 +91,7 @@ native_claude_config = {
} }
``` ```
> 💡 还支持 `native_oai_config`OpenAI 标准工具调用)、`xai_config`Grok、`sider_cookie`Sider详见 `mykey_template.py` 中的注释。 > 💡 还支持 `native_oai_config`OpenAI 标准工具调用)、`sider_cookie`Sider详见 `mykey_template.py` 中的注释。
### 关键规则 ### 关键规则

View File

@@ -5,7 +5,7 @@ if sys.stderr is None: sys.stderr = open(os.devnull, "w")
elif hasattr(sys.stderr, 'reconfigure'): sys.stderr.reconfigure(errors='replace') elif hasattr(sys.stderr, 'reconfigure'): sys.stderr.reconfigure(errors='replace')
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
from llmcore import SiderLLMSession, LLMSession, ToolClient, ClaudeSession, XaiSession, NativeToolClient, NativeClaudeSession, build_multimodal_content, NativeOAISession from llmcore import SiderLLMSession, LLMSession, ToolClient, ClaudeSession, MixinSession, NativeToolClient, NativeClaudeSession, build_multimodal_content, NativeOAISession
from agent_loop import agent_runner_loop from agent_loop import agent_runner_loop
from ga import GenericAgentHandler, smart_format, get_global_memory, format_error from ga import GenericAgentHandler, smart_format, get_global_memory, format_error
@@ -48,10 +48,14 @@ class GeneraticAgent:
elif 'native' in k and 'oai' in k: llm_sessions += [NativeToolClient(NativeOAISession(cfg=cfg))] elif 'native' in k and 'oai' in k: llm_sessions += [NativeToolClient(NativeOAISession(cfg=cfg))]
elif 'claude' in k: llm_sessions += [ToolClient(ClaudeSession(cfg=cfg))] elif 'claude' in k: llm_sessions += [ToolClient(ClaudeSession(cfg=cfg))]
elif 'oai' in k: llm_sessions += [ToolClient(LLMSession(cfg=cfg))] elif 'oai' in k: llm_sessions += [ToolClient(LLMSession(cfg=cfg))]
elif 'xai' in k: llm_sessions += [ToolClient(XaiSession(cfg=cfg))]
elif 'sider' in k: llm_sessions += [ToolClient(SiderLLMSession(cfg={'apikey': cfg, 'model': x})) for x in \ elif 'sider' in k: llm_sessions += [ToolClient(SiderLLMSession(cfg={'apikey': cfg, 'model': x})) for x in \
["gemini-3.0-flash", "gpt-5.4"]] ["gemini-3.0-flash", "gpt-5.4"]]
elif 'mixin' in k: llm_sessions += [{'mixin_cfg': cfg}]
except: pass except: pass
for i, s in enumerate(llm_sessions):
if isinstance(s, dict) and 'mixin_cfg' in s:
try: llm_sessions[i] = ToolClient(MixinSession(llm_sessions, s['mixin_cfg']))
except Exception as e: print(f'[WARN] Failed to init MixinSession with cfg {s["mixin_cfg"]}: {e}')
self.llmclients = llm_sessions self.llmclients = llm_sessions
self.lock = threading.Lock() self.lock = threading.Lock()
self.history = [] self.history = []

8
ga.py
View File

@@ -441,11 +441,13 @@ class GenericAgentHandler(BaseHandler):
二次确认仅在回复几乎只包含<thinking>/<summary>和一段大代码块时触发。 二次确认仅在回复几乎只包含<thinking>/<summary>和一段大代码块时触发。
''' '''
content = getattr(response, 'content', '') or "" content = getattr(response, 'content', '') or ""
# 1. 空回复保护:要求模型重新生成内容或调用工具
if not response or not content.strip(): if not response or not content.strip():
yield "[Warn] LLM returned an empty response. Retrying...\n" yield "[Warn] LLM returned an empty response. Retrying...\n"
next_prompt = "[System] 回复为空,请重新生成内容或调用工具。" return StepOutcome({}, next_prompt="[System] Blank response, regenerate and tooluse", should_exit=False)
return StepOutcome({}, next_prompt=next_prompt, should_exit=False) if '流异常中断,未收到完整响应 !!!]' in content:
return StepOutcome({}, next_prompt="[System] Incomplete response. Regenerate and tooluse.", should_exit=False)
if 'max_tokens !!!]' in content:
return StepOutcome({}, next_prompt="[System] max_tokens limit reached. Use multi small steps to do it.", should_exit=False)
# 2. 检测“包含较大代码块但未调用工具”的情况 # 2. 检测“包含较大代码块但未调用工具”的情况
# 这里通过三引号代码块 + 最少字符数的方式粗略判断“大段代码” # 这里通过三引号代码块 + 最少字符数的方式粗略判断“大段代码”
code_block_pattern = r"```[a-zA-Z0-9_]*\n[\s\S]{100,}?```" code_block_pattern = r"```[a-zA-Z0-9_]*\n[\s\S]{100,}?```"

View File

@@ -456,75 +456,6 @@ class LLMSession:
if stream: return _ask_gen() if stream: return _ask_gen()
return ''.join(list(_ask_gen())) return ''.join(list(_ask_gen()))
class GeminiSession:
def __init__(self, cfg):
self.api_key = cfg.get('apikey')
if not self.api_key: raise ValueError("google_api_key 未配置或为空,请在 mykey.py 中设置")
self.default_model = cfg.get('model', 'gemini-2.0-flash-001')
p = cfg.get('proxy', proxy)
self.proxies = {"http":p, "https":p} if p else None
def ask(self, prompt, model=None, stream=False):
if model is None: model = self.default_model
url = f"https://generativelanguage.googleapis.com/v1/models/{model}:generateContent?key={self.api_key}"
headers = {"Content-Type":"application/json"}
data = {"contents":[{"role":"user","parts":[{"text":prompt}]}]}
try:
kw = {"headers":headers, "json":data, "timeout":60, 'proxies': self.proxies}
r = requests.post(url, **kw)
except Exception as e:
return f"[GeminiError] request failed: {e}"
if r.status_code != 200:
body = r.text[:500].replace("\n"," ")
return f"[GeminiError] HTTP {r.status_code}: {body}"
try:
obj = r.json(); cands = obj.get("candidates") or []
if not cands: return "[GeminiError] empty candidates"
parts = (cands[0].get("content") or {}).get("parts") or []
full_text = "".join(p.get("text","") for p in parts)
except Exception as e:
return f"[GeminiError] invalid response format: {e}"
return iter([full_text]) if stream else full_text
class XaiSession:
def __init__(self, cfg):
import xai_sdk
from xai_sdk.chat import user, system
self._user, self._system = user, system
self.default_model = cfg.get('model', 'grok-4-1-fast-non-reasoning')
self._last_response_id = None # 多轮对话链
os.environ["XAI_API_KEY"] = cfg['apikey']
proxy = cfg.get('proxy', 'http://127.0.0.1:2082')
if not proxy.startswith("http"): proxy = f"http://{proxy}"
os.environ.setdefault("grpc_proxy", proxy)
self._client = xai_sdk.Client()
def ask(self, prompt, model=None, system_prompt=None, stream=False):
"""发送消息自动串联多轮对话stream=True返回生成器"""
mdl = model or self.default_model
try:
kw = dict(model=mdl, store_messages=True)
if self._last_response_id: kw["previous_response_id"] = self._last_response_id
chat = self._client.chat.create(**kw)
if system_prompt: chat.append(self._system(system_prompt))
chat.append(self._user(prompt))
if stream: return self._stream(chat)
resp = chat.sample()
self._last_response_id = resp.id
return resp.content
except Exception as e:
err = f"[XaiError] {e}"
return iter([err]) if stream else err
def _stream(self, chat):
try:
last_resp = None
for resp, chunk in chat.stream():
last_resp = resp
if chunk and chunk.content: yield chunk.content
if last_resp and hasattr(last_resp, 'id'): self._last_response_id = last_resp.id
except Exception as e:
yield f"[XaiError] {e}"
def reset(self): self._last_response_id = None
class NativeOAISession: class NativeOAISession:
def __init__(self, cfg): def __init__(self, cfg):
@@ -869,6 +800,41 @@ def tryparse(json_str):
return json.loads(json_str) return json.loads(json_str)
class MixinSession:
"""Multi-session fallback with exponential backoff on Error: detection."""
def __init__(self, all_sessions, cfg):
self._retries, self._base_delay = cfg.get('max_retries', 3), cfg.get('base_delay', 1.5)
self._sessions = [all_sessions[i].backend for i in cfg.get('llm_nos', [])]
assert 'Native' not in self._sessions[0].__class__.__name__
assert len(set(type(s) for s in self._sessions)) == 1, f'MixinSession: all sessions must be same type, got {[type(s).__name__ for s in self._sessions]}'
self._orig_raw_asks = [s.raw_ask for s in self._sessions]
self._sessions[0].raw_ask = self._raw_ask
self.default_model = getattr(self._sessions[0], 'default_model', None)
def __getattr__(self, name): return getattr(self._sessions[0], name)
@property
def primary(self): return self._sessions[0]
def _raw_ask(self, *args, **kwargs):
last_err = None
for attempt in range(self._retries + 1):
gen = self._orig_raw_asks[attempt % len(self._sessions)](*args, **kwargs)
try: first = next(gen)
except StopIteration as e: return e.value or []
if isinstance(first, str) and first.startswith('Error:'):
last_err = first
for _ in gen: pass # drain
if attempt < self._retries:
delay = min(30, self._base_delay * (2 ** attempt))
print(f'[MixinSession] {first[:80]}, retry {attempt+1}/{self._retries} in {delay:.1f}s')
time.sleep(delay); continue
else:
yield first
try:
while True: yield next(gen)
except StopIteration as e: return e.value or []
yield last_err or 'Error: all retries exhausted'
return [{'type': 'text', 'text': last_err}]
class NativeToolClient: class NativeToolClient:
THINKING_PROMPT = """ THINKING_PROMPT = """
### 行动规范(持续有效) ### 行动规范(持续有效)

View File

@@ -6,6 +6,11 @@
# 填完整路径 'http://host:2001/v1/chat/completions' → 直接使用,不再拼接 # 填完整路径 'http://host:2001/v1/chat/completions' → 直接使用,不再拼接
# ══════════════════════════════════════════════════════════════════════════════ # ══════════════════════════════════════════════════════════════════════════════
# ── Mixin (实验性) ───────────────────────────────────────────────────────────────
# key命名含 'mixin' 触发 MixinSession多key/endpoint自动fallback + 指数退避重试
# 约束引用的session须同类型不支持Native
# mixin_config = {'llm_nos': [1, 2], 'max_retries': 3, 'base_delay': 1.5} # 序号含自身此处mixin=0
# ── OpenAI-compatible (chat/completions or responses API) ────────────────────── # ── OpenAI-compatible (chat/completions or responses API) ──────────────────────
# key命名含 'oai' 触发 LLMSession # key命名含 'oai' 触发 LLMSession
oai_config = { oai_config = {
@@ -63,13 +68,6 @@ native_oai_config = {
# key命名含 'sider' 触发 SiderLLMSession需安装 sider_ai_api 包) # key命名含 'sider' 触发 SiderLLMSession需安装 sider_ai_api 包)
#sider_cookie = 'token=Bearer%20eyJhbGciOiJIUz...' #sider_cookie = 'token=Bearer%20eyJhbGciOiJIUz...'
# ── xAI Grok ────────────────────────────────────────────────────────────────────
# key命名含 'xai' 触发 XaiSession需安装 xai_sdk 包)
# xai_config = {
# 'apikey': 'xai-...',
# 'model': 'grok-4-1-fast-non-reasoning',
# 'proxy': 'http://127.0.0.1:2082',
# }
# If you need them # If you need them
# tg_bot_token = '84102K2gYZ...' # tg_bot_token = '84102K2gYZ...'