refactor: unify system prompt injection into _openai_stream; disable gpt done_hooks
This commit is contained in:
@@ -138,7 +138,7 @@ class GeneraticAgent:
|
|||||||
user_input = raw_query
|
user_input = raw_query
|
||||||
if source == 'feishu' and len(self.history) > 1: # 如果有历史记录且来自飞书,注入到首轮 user_input 中(支持/restore恢复上下文)
|
if source == 'feishu' and len(self.history) > 1: # 如果有历史记录且来自飞书,注入到首轮 user_input 中(支持/restore恢复上下文)
|
||||||
user_input = handler._get_anchor_prompt() + f"\n\n### 用户当前消息\n{raw_query}"
|
user_input = handler._get_anchor_prompt() + f"\n\n### 用户当前消息\n{raw_query}"
|
||||||
if 'gpt' in self.get_llm_name(model=True): handler._done_hooks.append('请确定任务是否完成,如果完成请给出信息完整的简报回答,如未完成需要继续工具调用直到完成任务,确实需要问用户应使用ask_user工具')
|
#if 'gpt' in self.get_llm_name(model=True): handler._done_hooks.append('请确定任务是否完成,如果完成请给出信息完整的简报回答,如未完成需要继续工具调用直到完成任务,确实需要问用户应使用ask_user工具')
|
||||||
# although new handler, the **full** history is in llmclient, so it is full history!
|
# although new handler, the **full** history is in llmclient, so it is full history!
|
||||||
gen = agent_runner_loop(self.llmclient, sys_prompt, user_input,
|
gen = agent_runner_loop(self.llmclient, sys_prompt, user_input,
|
||||||
handler, TOOLS_SCHEMA, max_turns=70, verbose=self.verbose)
|
handler, TOOLS_SCHEMA, max_turns=70, verbose=self.verbose)
|
||||||
|
|||||||
12
llmcore.py
12
llmcore.py
@@ -314,7 +314,7 @@ def _stamp_oai_cache_markers(messages, model):
|
|||||||
messages[idx] = {**messages[idx], 'content': c}
|
messages[idx] = {**messages[idx], 'content': c}
|
||||||
|
|
||||||
def _openai_stream(api_base, api_key, messages, model, api_mode='chat_completions', *,
|
def _openai_stream(api_base, api_key, messages, model, api_mode='chat_completions', *,
|
||||||
temperature=0.5, max_tokens=None, tools=None, reasoning_effort=None,
|
system=None, temperature=0.5, max_tokens=None, tools=None, reasoning_effort=None,
|
||||||
max_retries=0, connect_timeout=10, read_timeout=300, proxies=None, stream=True):
|
max_retries=0, connect_timeout=10, read_timeout=300, proxies=None, stream=True):
|
||||||
"""Shared OpenAI-compatible streaming request with retry. Yields text chunks, returns list[content_block]."""
|
"""Shared OpenAI-compatible streaming request with retry. Yields text chunks, returns list[content_block]."""
|
||||||
ml = model.lower()
|
ml = model.lower()
|
||||||
@@ -323,10 +323,12 @@ def _openai_stream(api_base, api_key, messages, model, api_mode='chat_completion
|
|||||||
headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json", "Accept": "text/event-stream"}
|
headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json", "Accept": "text/event-stream"}
|
||||||
if api_mode == "responses":
|
if api_mode == "responses":
|
||||||
url = auto_make_url(api_base, "responses")
|
url = auto_make_url(api_base, "responses")
|
||||||
payload = {"model": model, "input": _to_responses_input(messages), "stream": stream, "prompt_cache_key": _RESP_CACHE_KEY}
|
payload = {"model": model, "input": _to_responses_input(messages), "stream": stream,
|
||||||
|
"prompt_cache_key": _RESP_CACHE_KEY, "instructions": system or "You are an Omnipotent Executor."}
|
||||||
if reasoning_effort: payload["reasoning"] = {"effort": reasoning_effort}
|
if reasoning_effort: payload["reasoning"] = {"effort": reasoning_effort}
|
||||||
else:
|
else:
|
||||||
url = auto_make_url(api_base, "chat/completions")
|
url = auto_make_url(api_base, "chat/completions")
|
||||||
|
if system: messages = [{"role": "system", "content": system}] + messages
|
||||||
_stamp_oai_cache_markers(messages, model)
|
_stamp_oai_cache_markers(messages, model)
|
||||||
payload = {"model": model, "messages": messages, "stream": stream}
|
payload = {"model": model, "messages": messages, "stream": stream}
|
||||||
if stream: payload["stream_options"] = {"include_usage": True}
|
if stream: payload["stream_options"] = {"include_usage": True}
|
||||||
@@ -643,11 +645,9 @@ class NativeClaudeSession(BaseSession):
|
|||||||
|
|
||||||
class NativeOAISession(NativeClaudeSession):
|
class NativeOAISession(NativeClaudeSession):
|
||||||
def raw_ask(self, messages):
|
def raw_ask(self, messages):
|
||||||
"""OpenAI streaming. yields text chunks, generator return = list[content_block]"""
|
|
||||||
messages = _fix_messages(messages)
|
messages = _fix_messages(messages)
|
||||||
msgs = ([{"role": "system", "content": self.system}] if self.system else []) + _msgs_claude2oai(messages)
|
return (yield from _openai_stream(self.api_base, self.api_key, _msgs_claude2oai(messages), self.model, self.api_mode,
|
||||||
return (yield from _openai_stream(self.api_base, self.api_key, msgs, self.model, self.api_mode,
|
system=self.system, temperature=self.temperature, max_tokens=self.max_tokens,
|
||||||
temperature=self.temperature, max_tokens=self.max_tokens,
|
|
||||||
tools=self.tools, reasoning_effort=self.reasoning_effort,
|
tools=self.tools, reasoning_effort=self.reasoning_effort,
|
||||||
max_retries=self.max_retries, connect_timeout=self.connect_timeout,
|
max_retries=self.max_retries, connect_timeout=self.connect_timeout,
|
||||||
read_timeout=self.read_timeout, proxies=self.proxies, stream=self.stream))
|
read_timeout=self.read_timeout, proxies=self.proxies, stream=self.stream))
|
||||||
|
|||||||
Reference in New Issue
Block a user