From cac3ba4769bfb0aaaed94ded742a4dbf2d326895 Mon Sep 17 00:00:00 2001 From: Liang Jiaqing Date: Sat, 25 Apr 2026 10:33:42 +0800 Subject: [PATCH 1/2] =?UTF-8?q?feat:=20reasoning/thinking=E4=BA=92?= =?UTF-8?q?=E9=80=9A=E9=80=82=E9=85=8D=20+=20history=E7=AA=97=E5=8F=A3?= =?UTF-8?q?=E6=89=A9=E5=A4=A7=20+=20summary=E6=8F=90=E7=A4=BA=E5=BC=BA?= =?UTF-8?q?=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- assets/sys_prompt.txt | 2 +- ga.py | 4 ++-- llmcore.py | 26 ++++++++++++++++++++------ 3 files changed, 23 insertions(+), 9 deletions(-) diff --git a/assets/sys_prompt.txt b/assets/sys_prompt.txt index e7b9ef1..def4a57 100644 --- a/assets/sys_prompt.txt +++ b/assets/sys_prompt.txt @@ -1,6 +1,6 @@ # Role: 物理级全能执行者 你拥有文件读写、脚本执行、用户浏览器JS注入、系统级干预的物理操作权限。禁止推诿"无法操作"——不空想,用工具探测。 ## 行动原则 -调用工具前在 内推演:当前阶段、上步结果是否符合预期、下步策略。 +调用工具前在内推演:当前阶段、上步结果是否符合预期、下步策略;内输出极简总结。 - 探测优先:失败时先充分获取信息(日志/状态/上下文),关键信息存入工作记忆,再决定重试或换方案。不可逆操作先询问用户。 - 失败升级:1次→读错误理解原因,2次→探测环境状态,3次→深度分析后换方案或问用户。禁止无新信息的重复操作。 diff --git a/ga.py b/ga.py index 869f8ee..428d92f 100644 --- a/ga.py +++ b/ga.py @@ -504,7 +504,7 @@ class GenericAgentHandler(BaseHandler): def _get_anchor_prompt(self, skip=False): if skip: return "\n" - h_str = "\n".join(self.history_info[-20:]) + h_str = "\n".join(self.history_info[-40:]) prompt = f"\n### [WORKING MEMORY]\n\n{h_str}\n" prompt += f"\nCurrent turn: {self.current_turn}\n" if self.working.get('key_info'): prompt += f"\n{self.working.get('key_info')}" @@ -523,7 +523,7 @@ class GenericAgentHandler(BaseHandler): clean_args = {k: v for k, v in args.items() if not k.startswith('_')} summary = f"调用工具{tool_name}, args: {clean_args}" if tool_name == 'no_tool': summary = "直接回答了用户问题" - next_prompt += "\n[DANGER] 上一轮遗漏了,需要按协议在中输出极简单行摘要!" + next_prompt += "\n[DANGER] 你遗漏了,必须按协议一直在每次回复中用中输出极简单行摘要!" summary = smart_format(summary, max_str_len=100) self.history_info.append(f'[Agent] {summary}') if turn % 65 == 0 and 'plan' not in str(self.working.get('related_sop')): diff --git a/llmcore.py b/llmcore.py index 2a84677..01ca87c 100644 --- a/llmcore.py +++ b/llmcore.py @@ -229,6 +229,7 @@ def _parse_openai_sse(resp_lines, api_mode="chat_completions"): return blocks else: tc_buf = {} # index -> {id, name, args} + reasoning_text = "" for line in resp_lines: if not line: continue line = line.decode('utf-8', errors='replace') if isinstance(line, bytes) else line @@ -239,6 +240,8 @@ def _parse_openai_sse(resp_lines, api_mode="chat_completions"): except: continue ch = (evt.get("choices") or [{}])[0] delta = ch.get("delta") or {} + if delta.get("reasoning_content"): + reasoning_text += delta["reasoning_content"] if delta.get("content"): text = delta["content"]; content_text += text; yield text for tc in (delta.get("tool_calls") or []): @@ -253,6 +256,7 @@ def _parse_openai_sse(resp_lines, api_mode="chat_completions"): usage = evt.get("usage") if usage: _record_usage(usage, api_mode) blocks = [] + if reasoning_text: blocks.append({"type": "thinking", "thinking": reasoning_text}) if content_text: blocks.append({"type": "text", "text": content_text}) for idx in sorted(tc_buf): tc = tc_buf[idx] @@ -294,6 +298,9 @@ def _parse_openai_json(data, api_mode="chat_completions"): else: _record_usage(data.get("usage") or {}, api_mode) msg = (data.get("choices") or [{}])[0].get("message", {}) + reasoning = msg.get("reasoning_content", "") + if reasoning: + blocks.append({"type": "thinking", "thinking": reasoning}) content = msg.get("content", "") if content: blocks.append({"type": "text", "text": content}); yield content @@ -412,8 +419,8 @@ def _to_responses_input(messages): elif ptype == "image_url": url = (part.get("image_url") or {}).get("url", "") if url and role != "assistant": parts.append({"type": "input_image", "image_url": url}) - if len(parts) == 0: parts = [{"type": text_type, "text": str(content) or '[empty]'}] - result.append({"role": role, "content": parts}) + if len(parts) == 0 and not isinstance(content, list): parts = [{"type": text_type, "text": str(content) or '[empty]'}] + if parts: result.append({"role": role, "content": parts}) pending = [] for tc in (msg.get("tool_calls") or []): f = tc.get("function", {}) @@ -430,16 +437,18 @@ def _msgs_claude2oai(messages): content = msg.get("content", "") blocks = content if isinstance(content, list) else [{"type": "text", "text": str(content)}] if role == "assistant": - text_parts, tool_calls = [], [] + text_parts, tool_calls, reasoning = [], [], "" for b in blocks: if not isinstance(b, dict): continue - if b.get("type") == "text" and b.get("text"): text_parts.append({"type": "text", "text": b.get("text", "")}) + if b.get("type") == "thinking" and b.get("thinking"): reasoning = b["thinking"] + elif b.get("type") == "text" and b.get("text"): text_parts.append({"type": "text", "text": b.get("text", "")}) elif b.get("type") == "tool_use": tool_calls.append({ "id": b.get("id") or '', "type": "function", "function": {"name": b.get("name", ""), "arguments": json.dumps(b.get("input", {}), ensure_ascii=False)} }) m = {"role": "assistant"} + if reasoning: m["reasoning_content"] = reasoning if text_parts: m["content"] = text_parts else: m["content"] = "" if tool_calls: m["tool_calls"] = tool_calls @@ -525,6 +534,11 @@ class BaseSession: if not content.startswith("!!!Error:"): self.history.append({"role": "assistant", "content": [{"type": "text", "text": content}]}) return _ask_gen() if stream else ''.join(list(_ask_gen())) +def _keep_claude_block(b): return not isinstance(b, dict) or b.get("type") != "thinking" or b.get("signature") +def _drop_unsigned_thinking(messages): + for m in messages: m["content"] = [b for b in m["content"] if _keep_claude_block(b)] + return messages + class ClaudeSession(BaseSession): def raw_ask(self, messages): headers = {"x-api-key": self.api_key, "Content-Type": "application/json", "anthropic-version": "2023-06-01", "anthropic-beta": "prompt-caching-2024-07-31"} @@ -540,7 +554,7 @@ class ClaudeSession(BaseSession): yield (err := f"!!!Error: {e}") return [{"type": "text", "text": err}] def make_messages(self, raw_list): - msgs = [{"role": m['role'], "content": list(m['content'])} for m in raw_list] + msgs = _drop_unsigned_thinking([{"role": m['role'], "content": list(m['content'])} for m in raw_list]) user_idxs = [i for i, m in enumerate(msgs) if m['role'] == 'user'] for idx in user_idxs[-2:]: msgs[idx]["content"][-1] = dict(msgs[idx]["content"][-1], cache_control={"type": "ephemeral"}) @@ -582,7 +596,7 @@ class NativeClaudeSession(BaseSession): self._device_id = uuid.uuid4().hex + uuid.uuid4().hex[:32] self.tools = None def raw_ask(self, messages): - messages = _fix_messages(messages) + messages = _drop_unsigned_thinking(_fix_messages(messages)) model = self.model beta_parts = ["claude-code-20250219", "interleaved-thinking-2025-05-14", "redact-thinking-2026-02-12", "prompt-caching-scope-2026-01-05"] if "[1m]" in model.lower(): From 08181be4bf7131655fdc67f82179adebb2010e26 Mon Sep 17 00:00:00 2001 From: Liang Jiaqing Date: Sat, 25 Apr 2026 10:54:44 +0800 Subject: [PATCH 2/2] fix: use correct token limit params for OpenAI APIs --- llmcore.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llmcore.py b/llmcore.py index 01ca87c..37a3bd1 100644 --- a/llmcore.py +++ b/llmcore.py @@ -337,6 +337,7 @@ def _openai_stream(api_base, api_key, messages, model, api_mode='chat_completion payload = {"model": model, "input": _to_responses_input(messages), "stream": stream, "prompt_cache_key": _RESP_CACHE_KEY, "instructions": system or "You are an Omnipotent Executor."} if reasoning_effort: payload["reasoning"] = {"effort": reasoning_effort} + if max_tokens: payload["max_output_tokens"] = max_tokens else: url = auto_make_url(api_base, "chat/completions") if system: messages = [{"role": "system", "content": system}] + messages @@ -344,7 +345,7 @@ def _openai_stream(api_base, api_key, messages, model, api_mode='chat_completion payload = {"model": model, "messages": messages, "stream": stream} if stream: payload["stream_options"] = {"include_usage": True} if temperature != 1: payload["temperature"] = temperature - if max_tokens: payload["max_tokens"] = max_tokens + if max_tokens: payload["max_completion_tokens" if ml.startswith(("gpt-5", "o1", "o2", "o3", "o4")) else "max_tokens"] = max_tokens if reasoning_effort: payload["reasoning_effort"] = reasoning_effort if tools: payload["tools"] = _prepare_oai_tools(tools, api_mode) RETRYABLE = {408, 409, 425, 429, 500, 502, 503, 504, 529}