refactor llm session params and thinking parsing
This commit is contained in:
@@ -5,7 +5,7 @@ if sys.stderr is None: sys.stderr = open(os.devnull, "w")
|
|||||||
elif hasattr(sys.stderr, 'reconfigure'): sys.stderr.reconfigure(errors='replace')
|
elif hasattr(sys.stderr, 'reconfigure'): sys.stderr.reconfigure(errors='replace')
|
||||||
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
||||||
|
|
||||||
from llmcore import SiderLLMSession, LLMSession, ToolClient, ClaudeSession, MixinSession, NativeToolClient, NativeClaudeSession, build_multimodal_content, NativeOAISession
|
from llmcore import SiderLLMSession, LLMSession, ToolClient, ClaudeSession, MixinSession, NativeToolClient, NativeClaudeSession, NativeOAISession
|
||||||
from agent_loop import agent_runner_loop
|
from agent_loop import agent_runner_loop
|
||||||
from ga import GenericAgentHandler, smart_format, get_global_memory, format_error, consume_file
|
from ga import GenericAgentHandler, smart_format, get_global_memory, format_error, consume_file
|
||||||
|
|
||||||
@@ -115,10 +115,6 @@ class GeneraticAgent:
|
|||||||
if source == 'feishu' and len(self.history) > 1: # 如果有历史记录且来自飞书,注入到首轮 user_input 中(支持/restore恢复上下文)
|
if source == 'feishu' and len(self.history) > 1: # 如果有历史记录且来自飞书,注入到首轮 user_input 中(支持/restore恢复上下文)
|
||||||
user_input = handler._get_anchor_prompt() + f"\n\n### 用户当前消息\n{raw_query}"
|
user_input = handler._get_anchor_prompt() + f"\n\n### 用户当前消息\n{raw_query}"
|
||||||
initial_user_content = None
|
initial_user_content = None
|
||||||
if images and isinstance(self.llmclient.backend, LLMSession):
|
|
||||||
initial_user_content = build_multimodal_content(user_input, images)
|
|
||||||
elif images:
|
|
||||||
print(f"[INFO] backend {type(self.llmclient.backend).__name__} does not support direct multimodal input, fallback to text attachment hints.")
|
|
||||||
# although new handler, the **full** history is in llmclient, so it is full history!
|
# although new handler, the **full** history is in llmclient, so it is full history!
|
||||||
gen = agent_runner_loop(self.llmclient, sys_prompt, user_input,
|
gen = agent_runner_loop(self.llmclient, sys_prompt, user_input,
|
||||||
handler, TOOLS_SCHEMA, max_turns=40, verbose=self.verbose,
|
handler, TOOLS_SCHEMA, max_turns=40, verbose=self.verbose,
|
||||||
|
|||||||
68
llmcore.py
68
llmcore.py
@@ -90,25 +90,6 @@ def auto_make_url(base, path):
|
|||||||
if b.endswith(p): return b
|
if b.endswith(p): return b
|
||||||
return f"{b}/{p}" if re.search(r'/v\d+(/|$)', b) else f"{b}/v1/{p}"
|
return f"{b}/{p}" if re.search(r'/v\d+(/|$)', b) else f"{b}/v1/{p}"
|
||||||
|
|
||||||
def build_multimodal_content(prompt_text, image_paths):
|
|
||||||
parts = []
|
|
||||||
text = prompt_text if isinstance(prompt_text, str) else str(prompt_text or "")
|
|
||||||
if text.strip():
|
|
||||||
parts.append({"type": "text", "text": text})
|
|
||||||
else:
|
|
||||||
parts.append({"type": "text", "text": "请查看图片并理解用户意图。"})
|
|
||||||
for path in image_paths or []:
|
|
||||||
if not path or not os.path.isfile(path): continue
|
|
||||||
try:
|
|
||||||
mime = mimetypes.guess_type(path)[0] or "image/png"
|
|
||||||
if not mime.startswith("image/"): mime = "image/png"
|
|
||||||
with open(path, "rb") as f:
|
|
||||||
data_url = f"data:{mime};base64,{base64.b64encode(f.read()).decode('ascii')}"
|
|
||||||
parts.append({"type": "image_url", "image_url": {"url": data_url}})
|
|
||||||
except Exception as e:
|
|
||||||
print(f"[WARN] encode image failed {path}: {e}")
|
|
||||||
return parts
|
|
||||||
|
|
||||||
class SiderLLMSession:
|
class SiderLLMSession:
|
||||||
def __init__(self, cfg):
|
def __init__(self, cfg):
|
||||||
from sider_ai_api import Session # 不使用sider的话没必要安装这个包
|
from sider_ai_api import Session # 不使用sider的话没必要安装这个包
|
||||||
@@ -145,6 +126,7 @@ def _parse_claude_sse(resp_lines):
|
|||||||
elif evt_type == "content_block_start":
|
elif evt_type == "content_block_start":
|
||||||
block = evt.get("content_block", {})
|
block = evt.get("content_block", {})
|
||||||
if block.get("type") == "text": current_block = {"type": "text", "text": ""}
|
if block.get("type") == "text": current_block = {"type": "text", "text": ""}
|
||||||
|
elif block.get("type") == "thinking": current_block = {"type": "thinking", "thinking": ""}
|
||||||
elif block.get("type") == "tool_use":
|
elif block.get("type") == "tool_use":
|
||||||
current_block = {"type": "tool_use", "id": block.get("id", ""), "name": block.get("name", ""), "input": {}}
|
current_block = {"type": "tool_use", "id": block.get("id", ""), "name": block.get("name", ""), "input": {}}
|
||||||
tool_json_buf = ""
|
tool_json_buf = ""
|
||||||
@@ -154,6 +136,8 @@ def _parse_claude_sse(resp_lines):
|
|||||||
text = delta.get("text", "")
|
text = delta.get("text", "")
|
||||||
if current_block and current_block.get("type") == "text": current_block["text"] += text
|
if current_block and current_block.get("type") == "text": current_block["text"] += text
|
||||||
if text: yield text
|
if text: yield text
|
||||||
|
elif delta.get("type") == "thinking_delta":
|
||||||
|
if current_block and current_block.get("type") == "thinking": current_block["thinking"] += delta.get("thinking", "")
|
||||||
elif delta.get("type") == "input_json_delta": tool_json_buf += delta.get("partial_json", "")
|
elif delta.get("type") == "input_json_delta": tool_json_buf += delta.get("partial_json", "")
|
||||||
elif evt_type == "content_block_stop":
|
elif evt_type == "content_block_stop":
|
||||||
if current_block:
|
if current_block:
|
||||||
@@ -458,7 +442,8 @@ class BaseSession:
|
|||||||
if effort and not self.reasoning_effort: print(f"[WARN] Invalid reasoning_effort {effort!r}, ignored.")
|
if effort and not self.reasoning_effort: print(f"[WARN] Invalid reasoning_effort {effort!r}, ignored.")
|
||||||
mode = str(cfg.get('api_mode', 'chat_completions')).strip().lower().replace('-', '_')
|
mode = str(cfg.get('api_mode', 'chat_completions')).strip().lower().replace('-', '_')
|
||||||
self.api_mode = 'responses' if mode in ('responses', 'response') else 'chat_completions'
|
self.api_mode = 'responses' if mode in ('responses', 'response') else 'chat_completions'
|
||||||
self.temperature = cfg.get('temperature')
|
self.temperature = cfg.get('temperature', 1.0)
|
||||||
|
self.max_tokens = cfg.get('max_tokens', 8192)
|
||||||
def ask(self, prompt, stream=False):
|
def ask(self, prompt, stream=False):
|
||||||
def _ask_gen():
|
def _ask_gen():
|
||||||
content = ''
|
content = ''
|
||||||
@@ -480,13 +465,11 @@ class BaseSession:
|
|||||||
return _ask_gen() if stream else ''.join(list(_ask_gen()))
|
return _ask_gen() if stream else ''.join(list(_ask_gen()))
|
||||||
|
|
||||||
class ClaudeSession(BaseSession):
|
class ClaudeSession(BaseSession):
|
||||||
def raw_ask(self, messages, temperature=0.5, max_tokens=6144):
|
def raw_ask(self, messages):
|
||||||
model = self.default_model
|
model = self.default_model
|
||||||
ml = model.lower()
|
|
||||||
if 'kimi' in ml or 'moonshot' in ml: temperature = 1.0 # kimi/moonshot only accepts temp 1.0
|
|
||||||
elif 'minimax' in ml: temperature = max(0.01, min(temperature, 1.0)) # MiniMax requires temp in (0, 1]
|
|
||||||
headers = {"x-api-key": self.api_key, "Content-Type": "application/json", "anthropic-version": "2023-06-01", "anthropic-beta": "prompt-caching-2024-07-31"}
|
headers = {"x-api-key": self.api_key, "Content-Type": "application/json", "anthropic-version": "2023-06-01", "anthropic-beta": "prompt-caching-2024-07-31"}
|
||||||
payload = {"model": model, "messages": messages, "temperature": temperature, "max_tokens": max_tokens, "stream": True}
|
payload = {"model": model, "messages": messages, "temperature": self.temperature, "max_tokens": self.max_tokens, "stream": True}
|
||||||
|
if self.reasoning_effort: payload["reasoning_effort"] = self.reasoning_effort
|
||||||
if self.system: payload["system"] = [{"type": "text", "text": self.system, "cache_control": {"type": "persistent"}}]
|
if self.system: payload["system"] = [{"type": "text", "text": self.system, "cache_control": {"type": "persistent"}}]
|
||||||
try:
|
try:
|
||||||
with requests.post(auto_make_url(self.api_base, "messages"), headers=headers, json=payload, stream=True, timeout=(self.connect_timeout, self.read_timeout)) as r:
|
with requests.post(auto_make_url(self.api_base, "messages"), headers=headers, json=payload, stream=True, timeout=(self.connect_timeout, self.read_timeout)) as r:
|
||||||
@@ -503,11 +486,11 @@ class ClaudeSession(BaseSession):
|
|||||||
return msgs
|
return msgs
|
||||||
|
|
||||||
class LLMSession(BaseSession):
|
class LLMSession(BaseSession):
|
||||||
def raw_ask(self, messages, temperature=0.5):
|
def raw_ask(self, messages):
|
||||||
return (yield from _openai_stream(self.api_base, self.api_key, messages, self.default_model, self.api_mode,
|
return (yield from _openai_stream(self.api_base, self.api_key, messages, self.default_model, self.api_mode,
|
||||||
temperature=temperature, reasoning_effort=self.reasoning_effort,
|
temperature=self.temperature, reasoning_effort=self.reasoning_effort,
|
||||||
max_retries=self.max_retries, connect_timeout=self.connect_timeout,
|
max_tokens=self.max_tokens, max_retries=self.max_retries,
|
||||||
read_timeout=self.read_timeout, proxies=self.proxies))
|
connect_timeout=self.connect_timeout, read_timeout=self.read_timeout, proxies=self.proxies))
|
||||||
def make_messages(self, raw_list): return _msgs_claude2oai(raw_list)
|
def make_messages(self, raw_list): return _msgs_claude2oai(raw_list)
|
||||||
|
|
||||||
def _fix_messages(messages):
|
def _fix_messages(messages):
|
||||||
@@ -536,10 +519,9 @@ class NativeClaudeSession(BaseSession):
|
|||||||
self._account_uuid = str(uuid.uuid4())
|
self._account_uuid = str(uuid.uuid4())
|
||||||
self._device_id = uuid.uuid4().hex + uuid.uuid4().hex[:32]
|
self._device_id = uuid.uuid4().hex + uuid.uuid4().hex[:32]
|
||||||
self.tools = None
|
self.tools = None
|
||||||
def raw_ask(self, messages, temperature=0.5, max_tokens=6144):
|
def raw_ask(self, messages):
|
||||||
messages = _fix_messages(messages)
|
messages = _fix_messages(messages)
|
||||||
model = self.default_model
|
model = self.default_model
|
||||||
if self.temperature is not None: temperature = self.temperature
|
|
||||||
beta_parts = ["claude-code-20250219", "interleaved-thinking-2025-05-14", "redact-thinking-2026-02-12", "prompt-caching-scope-2026-01-05"]
|
beta_parts = ["claude-code-20250219", "interleaved-thinking-2025-05-14", "redact-thinking-2026-02-12", "prompt-caching-scope-2026-01-05"]
|
||||||
if "[1m]" in model.lower():
|
if "[1m]" in model.lower():
|
||||||
beta_parts.insert(1, "context-1m-2025-08-07"); model = model.replace("[1m]", "").replace("[1M]", "")
|
beta_parts.insert(1, "context-1m-2025-08-07"); model = model.replace("[1m]", "").replace("[1M]", "")
|
||||||
@@ -548,7 +530,8 @@ class NativeClaudeSession(BaseSession):
|
|||||||
"user-agent": "claude-cli/2.1.90 (external, cli)", "x-app": "cli"}
|
"user-agent": "claude-cli/2.1.90 (external, cli)", "x-app": "cli"}
|
||||||
if self.api_key.startswith("sk-ant-"): headers["x-api-key"] = self.api_key
|
if self.api_key.startswith("sk-ant-"): headers["x-api-key"] = self.api_key
|
||||||
else: headers["authorization"] = f"Bearer {self.api_key}"
|
else: headers["authorization"] = f"Bearer {self.api_key}"
|
||||||
payload = {"model": model, "messages": messages, "temperature": temperature, "max_tokens": max_tokens, "stream": True}
|
payload = {"model": model, "messages": messages, "temperature": self.temperature, "max_tokens": self.max_tokens, "stream": True}
|
||||||
|
if self.reasoning_effort: payload["reasoning_effort"] = self.reasoning_effort
|
||||||
payload["metadata"] = {"user_id": json.dumps({"device_id": self._device_id, "account_uuid": self._account_uuid, "session_id": self._session_id}, separators=(',', ':'))}
|
payload["metadata"] = {"user_id": json.dumps({"device_id": self._device_id, "account_uuid": self._account_uuid, "session_id": self._session_id}, separators=(',', ':'))}
|
||||||
if self.tools:
|
if self.tools:
|
||||||
claude_tools = openai_tools_to_claude(self.tools)
|
claude_tools = openai_tools_to_claude(self.tools)
|
||||||
@@ -588,7 +571,10 @@ class NativeClaudeSession(BaseSession):
|
|||||||
content = "\n".join(text_parts).strip()
|
content = "\n".join(text_parts).strip()
|
||||||
tool_calls = [MockToolCall(b["name"], b.get("input", {}), id=b.get("id", "")) for b in content_blocks if b.get("type") == "tool_use"]
|
tool_calls = [MockToolCall(b["name"], b.get("input", {}), id=b.get("id", "")) for b in content_blocks if b.get("type") == "tool_use"]
|
||||||
if not tool_calls: tool_calls, content = _parse_text_tool_calls(content)
|
if not tool_calls: tool_calls, content = _parse_text_tool_calls(content)
|
||||||
think_pattern = r"<think(?:ing)?>(.*?)</think(?:ing)?>"; thinking = ''
|
thinking_parts = [b["thinking"] for b in content_blocks if b.get("type") == "thinking"]
|
||||||
|
thinking = "\n".join(thinking_parts).strip()
|
||||||
|
if not thinking:
|
||||||
|
think_pattern = r"<think(?:ing)?>(.*?)</think(?:ing)?>"
|
||||||
think_match = re.search(think_pattern, content, re.DOTALL)
|
think_match = re.search(think_pattern, content, re.DOTALL)
|
||||||
if think_match:
|
if think_match:
|
||||||
thinking = think_match.group(1).strip()
|
thinking = think_match.group(1).strip()
|
||||||
@@ -598,11 +584,11 @@ class NativeClaudeSession(BaseSession):
|
|||||||
class NativeOAISession(NativeClaudeSession):
|
class NativeOAISession(NativeClaudeSession):
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
super().__init__(*args, **kwargs)
|
super().__init__(*args, **kwargs)
|
||||||
def raw_ask(self, messages, temperature=0.5, max_tokens=6144, **kw):
|
def raw_ask(self, messages):
|
||||||
"""OpenAI streaming. yields text chunks, generator return = list[content_block]"""
|
"""OpenAI streaming. yields text chunks, generator return = list[content_block]"""
|
||||||
msgs = ([{"role": "system", "content": self.system}] if self.system else []) + _msgs_claude2oai(messages)
|
msgs = ([{"role": "system", "content": self.system}] if self.system else []) + _msgs_claude2oai(messages)
|
||||||
return (yield from _openai_stream(self.api_base, self.api_key, msgs, self.default_model, self.api_mode,
|
return (yield from _openai_stream(self.api_base, self.api_key, msgs, self.default_model, self.api_mode,
|
||||||
temperature=temperature, max_tokens=max_tokens,
|
temperature=self.temperature, max_tokens=self.max_tokens,
|
||||||
tools=self.tools, reasoning_effort=self.reasoning_effort,
|
tools=self.tools, reasoning_effort=self.reasoning_effort,
|
||||||
max_retries=self.max_retries, connect_timeout=self.connect_timeout,
|
max_retries=self.max_retries, connect_timeout=self.connect_timeout,
|
||||||
read_timeout=self.read_timeout, proxies=self.proxies))
|
read_timeout=self.read_timeout, proxies=self.proxies))
|
||||||
@@ -819,8 +805,9 @@ class MixinSession:
|
|||||||
self.default_model = getattr(self._sessions[0], 'default_model', None)
|
self.default_model = getattr(self._sessions[0], 'default_model', None)
|
||||||
self._cur_idx, self._switched_at = 0, 0.0
|
self._cur_idx, self._switched_at = 0, 0.0
|
||||||
def __getattr__(self, name): return getattr(self._sessions[0], name)
|
def __getattr__(self, name): return getattr(self._sessions[0], name)
|
||||||
|
_BROADCAST_ATTRS = frozenset({'system', 'tools', 'temperature', 'max_tokens', 'reasoning_effort'})
|
||||||
def __setattr__(self, name, value):
|
def __setattr__(self, name, value):
|
||||||
if name in ('system', 'tools'):
|
if name in self._BROADCAST_ATTRS:
|
||||||
for s in self._sessions:
|
for s in self._sessions:
|
||||||
v = openai_tools_to_claude(value) if name == 'tools' and type(s) is NativeClaudeSession else value
|
v = openai_tools_to_claude(value) if name == 'tools' and type(s) is NativeClaudeSession else value
|
||||||
setattr(s, name, v)
|
setattr(s, name, v)
|
||||||
@@ -901,13 +888,6 @@ class NativeToolClient:
|
|||||||
while True:
|
while True:
|
||||||
chunk = next(gen); yield chunk
|
chunk = next(gen); yield chunk
|
||||||
except StopIteration as e: resp = e.value
|
except StopIteration as e: resp = e.value
|
||||||
if resp:
|
if resp: _write_llm_log('Response', resp.raw)
|
||||||
_write_llm_log('Response', resp.raw)
|
|
||||||
text = resp.content
|
|
||||||
think_match = re.search(r'<think(?:ing)?>(.*?)</think(?:ing)?>', text, re.DOTALL)
|
|
||||||
if think_match:
|
|
||||||
resp.thinking = think_match.group(1).strip()
|
|
||||||
text = re.sub(r'<think(?:ing)?>.*?</think(?:ing)?>', '', text, flags=re.DOTALL)
|
|
||||||
resp.content = text.strip()
|
|
||||||
if resp and hasattr(resp, 'tool_calls') and resp.tool_calls: self._pending_tool_ids = [tc.id for tc in resp.tool_calls]
|
if resp and hasattr(resp, 'tool_calls') and resp.tool_calls: self._pending_tool_ids = [tc.id for tc in resp.tool_calls]
|
||||||
return resp
|
return resp
|
||||||
Reference in New Issue
Block a user