diff --git a/frontends/desktop_pet_v2.pyw b/frontends/desktop_pet_v2.pyw index 0a59932..1eb96ff 100644 --- a/frontends/desktop_pet_v2.pyw +++ b/frontends/desktop_pet_v2.pyw @@ -774,4 +774,4 @@ if __name__ == '__main__': pet = MacPet() pet.run() else: - pet = WinPet() + pet = WinPet('vita') diff --git a/frontends/qtapp.py b/frontends/qtapp.py index 1a37567..ce69c34 100644 --- a/frontends/qtapp.py +++ b/frontends/qtapp.py @@ -1222,7 +1222,7 @@ class ChatPanel(QWidget): for idx, tc in enumerate(self.agent.llmclients): b = tc.backend - name = f"{type(b).__name__}/{b.default_model}" + name = f"{type(b).__name__}/{b.model}" is_current = idx == self.agent.llm_no row = QWidget() @@ -1296,9 +1296,9 @@ class ChatPanel(QWidget): reply = backend.ask("你好", stream=False) text = str(reply).strip() if reply else "" ok = len(text) > 0 and not text.startswith("Error") and not text.startswith("[") - print(f"[HealthCheck] Backend #{idx} {type(backend).__name__}/{backend.default_model}: {'OK' if ok else 'FAIL'} -> {text[:60]}") + print(f"[HealthCheck] Backend #{idx} {type(backend).__name__}/{backend.model}: {'OK' if ok else 'FAIL'} -> {text[:60]}") except Exception as e: - print(f"[HealthCheck] Backend #{idx} {type(backend).__name__}/{backend.default_model}: ERROR -> {e}") + print(f"[HealthCheck] Backend #{idx} {type(backend).__name__}/{backend.model}: ERROR -> {e}") ok = False if hasattr(backend, 'raw_msgs') and backend.raw_msgs: backend.raw_msgs = [m for m in backend.raw_msgs if m.get("prompt") != "你好"] diff --git a/frontends/stapp.py b/frontends/stapp.py index 8096a33..baf312f 100644 --- a/frontends/stapp.py +++ b/frontends/stapp.py @@ -48,7 +48,11 @@ def render_sidebar(): pet_script = os.path.join(os.path.dirname(__file__), 'desktop_pet_v2.pyw') if not os.path.exists(pet_script): pet_script = os.path.join(os.path.dirname(__file__), 'desktop_pet.pyw') subprocess.Popen([sys.executable, pet_script], **kwargs) - def _pet_req(q): threading.Thread(target=lambda: urlopen(f'http://127.0.0.1:51983/?{q}', timeout=2), daemon=True).start() + def _pet_req(q): + def _do(): + try: urlopen(f'http://127.0.0.1:51983/?{q}', timeout=2) + except Exception: pass + threading.Thread(target=_do, daemon=True).start() agent._pet_req = _pet_req if not hasattr(agent, '_turn_end_hooks'): agent._turn_end_hooks = {} def _pet_hook(ctx): diff --git a/llmcore.py b/llmcore.py index e89f821..8eaff4c 100644 --- a/llmcore.py +++ b/llmcore.py @@ -94,9 +94,9 @@ class SiderLLMSession: def __init__(self, cfg): from sider_ai_api import Session # 不使用sider的话没必要安装这个包 self._core = Session(cookie=cfg['apikey'], proxies=proxies) - self.default_model = cfg.get('model', 'gemini-3.0-flash') + self.model = cfg.get('model', 'gemini-3.0-flash') def ask(self, prompt, stream=False): - model = self.default_model + model = self.model if len(prompt) > 28000: print(f"[Warn] Prompt too long ({len(prompt)} chars), truncating.") prompt = prompt[-28000:] @@ -425,33 +425,46 @@ class BaseSession: def __init__(self, cfg): self.api_key = cfg['apikey'] self.api_base = cfg['apibase'].rstrip('/') - self.default_model = cfg.get('model', '') + self.model = cfg.get('model', '') self.context_win = cfg.get('context_win', 24000) self.history = [] self.lock = threading.Lock() self.system = "" - self.name = cfg.get('name', self.default_model) + self.name = cfg.get('name', self.model) proxy = cfg.get('proxy') self.proxies = {"http": proxy, "https": proxy} if proxy else None self.max_retries = max(0, int(cfg.get('max_retries', 1))) self.connect_timeout = max(1, int(cfg.get('timeout', 5))) self.read_timeout = max(5, int(cfg.get('read_timeout', 30))) - effort = cfg.get('reasoning_effort') - effort = None if effort is None else str(effort).strip().lower() - self.reasoning_effort = effort if effort in ('none', 'minimal', 'low', 'medium', 'high', 'xhigh') else None - if effort and not self.reasoning_effort: print(f"[WARN] Invalid reasoning_effort {effort!r}, ignored.") + def _enum(key, valid): + v = cfg.get(key); v = None if v is None else str(v).strip().lower() + return v if not v or v in valid else print(f"[WARN] Invalid {key} {v!r}, ignored.") + self.reasoning_effort = _enum('reasoning_effort', {'none', 'minimal', 'low', 'medium', 'high', 'xhigh'}) + self.thinking_type = _enum('thinking_type', {'adaptive', 'enabled', 'disabled'}) + self.thinking_budget_tokens = cfg.get('thinking_budget_tokens') mode = str(cfg.get('api_mode', 'chat_completions')).strip().lower().replace('-', '_') self.api_mode = 'responses' if mode in ('responses', 'response') else 'chat_completions' self.temperature = cfg.get('temperature', 1.0) self.max_tokens = cfg.get('max_tokens', 8192) + def _apply_claude_thinking(self, payload): + if self.thinking_type: + thinking = {"type": self.thinking_type} + if self.thinking_type == 'enabled': + if self.thinking_budget_tokens is None: print("[WARN] thinking_type='enabled' requires thinking_budget_tokens, ignored.") + else: + thinking["budget_tokens"] = self.thinking_budget_tokens; payload["thinking"] = thinking + else: payload["thinking"] = thinking + if self.reasoning_effort: + effort = {'low': 'low', 'medium': 'medium', 'high': 'high', 'xhigh': 'max'}.get(self.reasoning_effort) + if effort: payload["output_config"] = {"effort": effort} + else: print(f"[WARN] reasoning_effort {self.reasoning_effort!r} is unsupported for Claude output_config.effort, ignored.") def ask(self, prompt, stream=False): def _ask_gen(): - content = '' with self.lock: self.history.append({"role": "user", "content": [{"type": "text", "text": prompt}]}) trim_messages_history(self.history, self.context_win) messages = self.make_messages(self.history) - content_blocks = None + content_blocks = None; content = '' gen = self.raw_ask(messages) try: while True: chunk = next(gen); content += chunk; yield chunk @@ -466,10 +479,9 @@ class BaseSession: class ClaudeSession(BaseSession): def raw_ask(self, messages): - model = self.default_model headers = {"x-api-key": self.api_key, "Content-Type": "application/json", "anthropic-version": "2023-06-01", "anthropic-beta": "prompt-caching-2024-07-31"} - payload = {"model": model, "messages": messages, "temperature": self.temperature, "max_tokens": self.max_tokens, "stream": True} - if self.reasoning_effort: payload["reasoning_effort"] = self.reasoning_effort + payload = {"model": self.model, "messages": messages, "temperature": self.temperature, "max_tokens": self.max_tokens, "stream": True} + self._apply_claude_thinking(payload) if self.system: payload["system"] = [{"type": "text", "text": self.system, "cache_control": {"type": "persistent"}}] try: with requests.post(auto_make_url(self.api_base, "messages"), headers=headers, json=payload, stream=True, timeout=(self.connect_timeout, self.read_timeout)) as r: @@ -487,7 +499,7 @@ class ClaudeSession(BaseSession): class LLMSession(BaseSession): def raw_ask(self, messages): - return (yield from _openai_stream(self.api_base, self.api_key, messages, self.default_model, self.api_mode, + return (yield from _openai_stream(self.api_base, self.api_key, messages, self.model, self.api_mode, temperature=self.temperature, reasoning_effort=self.reasoning_effort, max_tokens=self.max_tokens, max_retries=self.max_retries, connect_timeout=self.connect_timeout, read_timeout=self.read_timeout, proxies=self.proxies)) @@ -521,7 +533,7 @@ class NativeClaudeSession(BaseSession): self.tools = None def raw_ask(self, messages): messages = _fix_messages(messages) - model = self.default_model + model = self.model beta_parts = ["claude-code-20250219", "interleaved-thinking-2025-05-14", "redact-thinking-2026-02-12", "prompt-caching-scope-2026-01-05"] if "[1m]" in model.lower(): beta_parts.insert(1, "context-1m-2025-08-07"); model = model.replace("[1m]", "").replace("[1M]", "") @@ -531,7 +543,7 @@ class NativeClaudeSession(BaseSession): if self.api_key.startswith("sk-ant-"): headers["x-api-key"] = self.api_key else: headers["authorization"] = f"Bearer {self.api_key}" payload = {"model": model, "messages": messages, "temperature": self.temperature, "max_tokens": self.max_tokens, "stream": True} - if self.reasoning_effort: payload["reasoning_effort"] = self.reasoning_effort + self._apply_claude_thinking(payload) payload["metadata"] = {"user_id": json.dumps({"device_id": self._device_id, "account_uuid": self._account_uuid, "session_id": self._session_id}, separators=(',', ':'))} if self.tools: claude_tools = openai_tools_to_claude(self.tools) @@ -587,7 +599,7 @@ class NativeOAISession(NativeClaudeSession): def raw_ask(self, messages): """OpenAI streaming. yields text chunks, generator return = list[content_block]""" msgs = ([{"role": "system", "content": self.system}] if self.system else []) + _msgs_claude2oai(messages) - return (yield from _openai_stream(self.api_base, self.api_key, msgs, self.default_model, self.api_mode, + return (yield from _openai_stream(self.api_base, self.api_key, msgs, self.model, self.api_mode, temperature=self.temperature, max_tokens=self.max_tokens, tools=self.tools, reasoning_effort=self.reasoning_effort, max_retries=self.max_retries, connect_timeout=self.connect_timeout, @@ -643,8 +655,6 @@ class ToolClient: _write_llm_log('Response', raw_text) return self._parse_mixed_response(raw_text) - #def _should_use_structured_messages(self, messages): return isinstance(self.backend, LLMSession) and any(isinstance(m.get("content"), list) for m in messages) - def _estimate_content_len(self, content): if isinstance(content, str): return len(content) if isinstance(content, list): @@ -802,7 +812,7 @@ class MixinSession: import copy; self._sessions[0] = copy.copy(self._sessions[0]) self._orig_raw_asks = [s.raw_ask for s in self._sessions] self._sessions[0].raw_ask = self._raw_ask - self.default_model = getattr(self._sessions[0], 'default_model', None) + self.model = getattr(self._sessions[0], 'model', None) self._cur_idx, self._switched_at = 0, 0.0 def __getattr__(self, name): return getattr(self._sessions[0], name) _BROADCAST_ATTRS = frozenset({'system', 'tools', 'temperature', 'max_tokens', 'reasoning_effort'}) diff --git a/tests/test_minimax.py b/tests/test_minimax.py index 67544de..19de58c 100644 --- a/tests/test_minimax.py +++ b/tests/test_minimax.py @@ -231,7 +231,7 @@ class TestMiniMaxLLMSessionConfig(unittest.TestCase): 'read_timeout': 120, } session = LLMSession(cfg) - self.assertEqual(session.default_model, 'MiniMax-M2.7') + self.assertEqual(session.model, 'MiniMax-M2.7') self.assertEqual(session.api_base, 'https://api.minimax.io/v1') self.assertEqual(session.context_win, 50000) self.assertEqual(session.max_retries, 2) @@ -246,7 +246,7 @@ class TestMiniMaxLLMSessionConfig(unittest.TestCase): 'model': 'MiniMax-M2.7-highspeed', } session = LLMSession(cfg) - self.assertEqual(session.default_model, 'MiniMax-M2.7-highspeed') + self.assertEqual(session.model, 'MiniMax-M2.7-highspeed') class TestMiniMaxNativeToolClientThinkTag(unittest.TestCase):