feat: stream LLM responses and improve agent UI
This commit is contained in:
@@ -45,14 +45,9 @@ def agent_runner_loop(client, system_prompt, user_input, handler, tools_schema,
|
|||||||
]
|
]
|
||||||
for turn in range(max_turns):
|
for turn in range(max_turns):
|
||||||
yield f"**LLM Running (Turn {turn+1}) ...**\n\n"
|
yield f"**LLM Running (Turn {turn+1}) ...**\n\n"
|
||||||
response = client.chat(messages=messages, tools=tools_schema)
|
response_gen = client.chat(messages=messages, tools=tools_schema)
|
||||||
|
response = yield from response_gen
|
||||||
if response.thinking: yield '<thinking>' + response.thinking + '</thinking>\n\n'
|
yield '\n\n'
|
||||||
showcontent = response.content
|
|
||||||
if '</summary>' in showcontent: showcontent = showcontent.replace('</summary>', '</summary>\n\n')
|
|
||||||
if '</file_content>' in showcontent:
|
|
||||||
showcontent = re.sub(r'<file_content>\s*(.*?)\s*</file_content>', r'\n````\n<file_content>\n\1\n</file_content>\n````', showcontent, flags=re.DOTALL)
|
|
||||||
yield showcontent + '\n\n'
|
|
||||||
|
|
||||||
if not response.tool_calls:
|
if not response.tool_calls:
|
||||||
tool_name, args = 'no_tool', {}
|
tool_name, args = 'no_tool', {}
|
||||||
|
|||||||
11
agentmain.py
11
agentmain.py
@@ -31,7 +31,7 @@ class GeneraticAgent:
|
|||||||
from sidercall import sider_cookie, oai_apikey, oai_apibase
|
from sidercall import sider_cookie, oai_apikey, oai_apibase
|
||||||
llm_sessions = []
|
llm_sessions = []
|
||||||
if sider_cookie: llm_sessions += [SiderLLMSession(default_model=x) for x in \
|
if sider_cookie: llm_sessions += [SiderLLMSession(default_model=x) for x in \
|
||||||
["gemini-3.0-flash", "claude-haiku-4.5", "gpt-5-mini"]]
|
["gemini-3.0-flash", "claude-haiku-4.5", "kimi-k2"]]
|
||||||
if oai_apikey: llm_sessions += [LLMSession(api_key=oai_apikey, api_base=oai_apibase)]
|
if oai_apikey: llm_sessions += [LLMSession(api_key=oai_apikey, api_base=oai_apibase)]
|
||||||
if len(llm_sessions) > 0:
|
if len(llm_sessions) > 0:
|
||||||
llmclient = ToolClient([x.ask for x in llm_sessions], auto_save_tokens=True)
|
llmclient = ToolClient([x.ask for x in llm_sessions], auto_save_tokens=True)
|
||||||
@@ -48,6 +48,10 @@ class GeneraticAgent:
|
|||||||
self.stop_sig = False
|
self.stop_sig = False
|
||||||
self.current_source = 'none'
|
self.current_source = 'none'
|
||||||
|
|
||||||
|
def next_llm(self):
|
||||||
|
self.llm_no = (self.llm_no + 1) % len(self.llmclient.raw_apis)
|
||||||
|
self.llmclient.last_tools = ''
|
||||||
|
|
||||||
def abort(self):
|
def abort(self):
|
||||||
if not self.is_running: return
|
if not self.is_running: return
|
||||||
self.stop_sig = True
|
self.stop_sig = True
|
||||||
@@ -73,17 +77,20 @@ class GeneraticAgent:
|
|||||||
self.llmclient.raw_api = self.llmclient.raw_apis[self.llm_no]
|
self.llmclient.raw_api = self.llmclient.raw_apis[self.llm_no]
|
||||||
gen = agent_runner_loop(self.llmclient, sys_prompt,
|
gen = agent_runner_loop(self.llmclient, sys_prompt,
|
||||||
raw_query, handler, TOOLS_SCHEMA, max_turns=25)
|
raw_query, handler, TOOLS_SCHEMA, max_turns=25)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
full_response = ""
|
full_response = ""
|
||||||
for chunk in gen:
|
for chunk in gen:
|
||||||
if self.stop_sig: break
|
if self.stop_sig: break
|
||||||
full_response += chunk
|
full_response += chunk
|
||||||
self.display_queue.put({'next': full_response, 'source': source})
|
self.display_queue.put({'next': full_response, 'source': source})
|
||||||
|
if '</summary>' in full_response: full_response = full_response.replace('</summary>', '</summary>\n\n')
|
||||||
|
if '</file_content>' in full_response: full_response = re.sub(r'<file_content>\s*(.*?)\s*</file_content>', r'\n````\n<file_content>\n\1\n</file_content>\n````', full_response, flags=re.DOTALL)
|
||||||
self.display_queue.put({'done': full_response, 'source': source})
|
self.display_queue.put({'done': full_response, 'source': source})
|
||||||
self.history = handler.history_info
|
self.history = handler.history_info
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Backend Error: {format_error(e)}")
|
print(f"Backend Error: {format_error(e)}")
|
||||||
self.display_queue.put({'done': '异常停止', 'source': source})
|
self.display_queue.put({'done': full_response + f'\n```\n{format_error(e)}\n```', 'source': source})
|
||||||
finally:
|
finally:
|
||||||
self.is_running = False
|
self.is_running = False
|
||||||
self.stop_sig = False
|
self.stop_sig = False
|
||||||
|
|||||||
@@ -62,7 +62,7 @@
|
|||||||
}},
|
}},
|
||||||
{"type": "function", "function": {
|
{"type": "function", "function": {
|
||||||
"name": "conclude_and_reflect",
|
"name": "conclude_and_reflect",
|
||||||
"description": "当模型认为当前任务执行完美,且有具有长期价值的环境事实或用户偏好需要提炼并存入全局记忆时,调用此工具。注意:此工具无参数,调用即代表触发记忆提炼流程。",
|
"description": "当模型认为当前任务(非处理记忆)执行完美,且有具有长期价值的环境事实或用户偏好需要提炼并存入全局记忆时,调用此工具。注意:此工具调用即代表触发记忆提炼流程,如果已经在记忆提炼流程无需调用。",
|
||||||
"parameters": {"type": "object", "properties": {}}}
|
"parameters": {"type": "object", "properties": {}}}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
46
ga.py
46
ga.py
@@ -267,8 +267,8 @@ class GenericAgentHandler(BaseHandler):
|
|||||||
warning = ""
|
warning = ""
|
||||||
if not matches:
|
if not matches:
|
||||||
code = args.get("code")
|
code = args.get("code")
|
||||||
if not code: return StepOutcome(None, next_prompt=f"【系统错误】:你调用了 code_run,但未在回复中提供 ```{code_type} 代码块。请重新输出代码并附带工具调用。")
|
if not code: return StepOutcome(None, next_prompt=f"【系统错误】:你调用了 code_run,但未在先在回复正文中提供 ```{code_type} 代码块。请重新输出代码并附带工具调用。")
|
||||||
warning = "\n下次要记得在回复中提供代码块,而不是放在参数中"
|
warning = "\n下次要记得先在回复正文中提供代码块,而不是放在参数中"
|
||||||
else: code = matches[-1].strip() # 提取最后一个代码块(通常是模型修正后的最终逻辑)
|
else: code = matches[-1].strip() # 提取最后一个代码块(通常是模型修正后的最终逻辑)
|
||||||
timeout = args.get("timeout", 60)
|
timeout = args.get("timeout", 60)
|
||||||
raw_path = os.path.join(self.cwd, args.get("cwd", './'))
|
raw_path = os.path.join(self.cwd, args.get("cwd", './'))
|
||||||
@@ -395,12 +395,42 @@ class GenericAgentHandler(BaseHandler):
|
|||||||
|
|
||||||
def do_no_tool(self, args, response):
|
def do_no_tool(self, args, response):
|
||||||
'''这是一个特殊工具,由引擎自主调用,不要包含在TOOLS_SCHEMA里。
|
'''这是一个特殊工具,由引擎自主调用,不要包含在TOOLS_SCHEMA里。
|
||||||
|
当模型在一轮中未显式调用任何工具时,由引擎自动触发。
|
||||||
|
二次确认仅在回复几乎只包含<thinking>/<summary>和一段大代码块时触发。
|
||||||
'''
|
'''
|
||||||
if not response or not getattr(response, 'content', '').strip():
|
content = getattr(response, 'content', '') or ""
|
||||||
|
|
||||||
|
# 1. 空回复保护:要求模型重新生成内容或调用工具
|
||||||
|
if not response or not content.strip():
|
||||||
yield "[Warn] LLM returned an empty response. Retrying...\n"
|
yield "[Warn] LLM returned an empty response. Retrying...\n"
|
||||||
next_prompt = "[System] 检测到空回复,请重新生成内容或调用工具。"
|
next_prompt = "[System] 检测到空回复,请重新生成内容或调用工具。"
|
||||||
return StepOutcome({}, next_prompt=next_prompt, should_exit=False)
|
return StepOutcome({}, next_prompt=next_prompt, should_exit=False)
|
||||||
yield "[Info] No tool called. Final response to user.\n"
|
# 2. 检测“包含较大代码块但未调用工具”的情况
|
||||||
|
# 这里通过三引号代码块 + 最少字符数的方式粗略判断“大段代码”
|
||||||
|
code_block_pattern = r"```[a-zA-Z0-9_]*\n[\s\S]{100,}?```"
|
||||||
|
m = re.search(code_block_pattern, content)
|
||||||
|
if m:
|
||||||
|
# 仅当 content 由 <thinking> / <summary> 和该代码块构成时才触发二次确认
|
||||||
|
residual = content
|
||||||
|
# 去掉代码块本身
|
||||||
|
residual = residual.replace(m.group(0), "")
|
||||||
|
# 去掉<thinking>和<summary>块(大小写不敏感)
|
||||||
|
residual = re.sub(r"<thinking>[\s\S]*?</thinking>", "", residual, flags=re.IGNORECASE)
|
||||||
|
residual = re.sub(r"<summary>[\s\S]*?</summary>", "", residual, flags=re.IGNORECASE)
|
||||||
|
# 如果去除上述结构后的非空白字符很少,说明没有额外自然语言说明
|
||||||
|
clean_residual = re.sub(r"\s+", "", residual)
|
||||||
|
if len(clean_residual) <= 50:
|
||||||
|
yield "[Info] Detected large code block without tool call and no extra natural language. Requesting clarification.\n"
|
||||||
|
next_prompt = (
|
||||||
|
"[System] 检测到你在上一轮回复中主要内容是较大代码块(仅配有<thinking>/<summary>),且本轮未调用任何工具。\n"
|
||||||
|
"如果这些代码需要执行、写入文件或进一步分析,请重新组织回复并显式调用相应工具"
|
||||||
|
"(例如:code_run、file_write、file_patch 等);\n"
|
||||||
|
"如果只是向用户展示或讲解代码片段,请在回复中补充自然语言说明,"
|
||||||
|
"并明确是否还需要额外的实际操作。"
|
||||||
|
)
|
||||||
|
return StepOutcome({}, next_prompt=next_prompt, should_exit=False)
|
||||||
|
# 3. 正常情况:直接将回复返回给用户并结束循环
|
||||||
|
yield "[Info] Final response to user.\n"
|
||||||
return StepOutcome(response, next_prompt=None, should_exit=True)
|
return StepOutcome(response, next_prompt=None, should_exit=True)
|
||||||
|
|
||||||
def do_conclude_and_reflect(self, args, response):
|
def do_conclude_and_reflect(self, args, response):
|
||||||
@@ -421,14 +451,16 @@ class GenericAgentHandler(BaseHandler):
|
|||||||
print(prompt)
|
print(prompt)
|
||||||
if self.plan: prompt += f"\n<plan>{self.plan}</plan>"
|
if self.plan: prompt += f"\n<plan>{self.plan}</plan>"
|
||||||
if self.focus: prompt += f"\n<focus>{self.focus}</focus>"
|
if self.focus: prompt += f"\n<focus>{self.focus}</focus>"
|
||||||
return prompt + "\n请继续执行下一步。"
|
return prompt
|
||||||
|
|
||||||
def get_global_memory():
|
def get_global_memory():
|
||||||
prompt = "\n"
|
prompt = "\n"
|
||||||
try:
|
try:
|
||||||
with open('memory/global_mem_insight.txt', 'r', encoding='utf-8') as f: insight = f.read()
|
with open('memory/global_mem_insight.txt', 'r', encoding='utf-8') as f: insight = f.read()
|
||||||
prompt += f"\n\n[Global Memory Insight]\n"
|
prompt += f"\n\n[Memory Insight (../memory/global_mem_insight.txt)]\n"
|
||||||
prompt += 'IMPORTANT PATHS: ../memory/global_mem.txt (Facts), ../memory/global_mem_insight.txt (Logic), ../ (Your Code Root), ../temp (./, Your default cwd) \n'
|
prompt += 'IMPORTANT PATHS: ../memory/global_mem.txt (Facts), ../ (Your Code Root)\n'
|
||||||
|
prompt += f'cwd = {os.path.abspath("./temp")}\n'
|
||||||
|
prompt += f'But prefer use relative paths (./ = cwd) to locate.\n'
|
||||||
prompt += 'MEM_RULE: Insight is the index of Facts. Sync Insight whenever Facts change. For details, read Facts.\n'
|
prompt += 'MEM_RULE: Insight is the index of Facts. Sync Insight whenever Facts change. For details, read Facts.\n'
|
||||||
prompt += "EXT: ../memory/ may contain other task-specific memories.\n"
|
prompt += "EXT: ../memory/ may contain other task-specific memories.\n"
|
||||||
prompt += insight + "\n"
|
prompt += insight + "\n"
|
||||||
|
|||||||
97
sidercall.py
97
sidercall.py
@@ -1,4 +1,4 @@
|
|||||||
import os, json, re, time, requests
|
import os, json, re, time, requests, sys
|
||||||
|
|
||||||
try: from mykey import sider_cookie
|
try: from mykey import sider_cookie
|
||||||
except ImportError: sider_cookie = ""
|
except ImportError: sider_cookie = ""
|
||||||
@@ -10,12 +10,14 @@ class SiderLLMSession:
|
|||||||
from sider_ai_api import Session
|
from sider_ai_api import Session
|
||||||
self._core = Session(cookie=sider_cookie, proxies={'https':'127.0.0.1:2082'})
|
self._core = Session(cookie=sider_cookie, proxies={'https':'127.0.0.1:2082'})
|
||||||
self.default_model = default_model
|
self.default_model = default_model
|
||||||
def ask(self, prompt, model=None):
|
def ask(self, prompt, model=None, stream=False):
|
||||||
if model is None: model = self.default_model
|
if model is None: model = self.default_model
|
||||||
if len(prompt) > 29000:
|
if len(prompt) > 29000:
|
||||||
print(f"[Warn] Prompt too long ({len(prompt)} chars), truncating.")
|
print(f"[Warn] Prompt too long ({len(prompt)} chars), truncating.")
|
||||||
prompt = prompt[-29000:]
|
prompt = prompt[-29000:]
|
||||||
return ''.join(self._core.chat(prompt, model))
|
gen = self._core.chat(prompt, model)
|
||||||
|
if stream: return gen
|
||||||
|
return ''.join(list(gen))
|
||||||
|
|
||||||
class LLMSession:
|
class LLMSession:
|
||||||
def __init__(self, api_key=oai_apikey, api_base=oai_apibase, model=oai_model, context_win=16000):
|
def __init__(self, api_key=oai_apikey, api_base=oai_apibase, model=oai_model, context_win=16000):
|
||||||
@@ -28,16 +30,28 @@ class LLMSession:
|
|||||||
|
|
||||||
def raw_ask(self, messages, model=None, temperature=0.5):
|
def raw_ask(self, messages, model=None, temperature=0.5):
|
||||||
if model is None: model = self.model
|
if model is None: model = self.model
|
||||||
headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}
|
headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json", "Accept": "text/event-stream"}
|
||||||
|
payload = {"model": model, "messages": messages, "temperature": temperature, "stream": True}
|
||||||
try:
|
try:
|
||||||
response = requests.post(
|
with requests.post(f"{self.api_base}/chat/completions",
|
||||||
f"{self.api_base}/chat/completions", headers=headers, timeout=60,
|
headers=headers, json=payload, stream=True, timeout=(5, 60)) as r:
|
||||||
json={"model": model, "messages": messages, "temperature": temperature} )
|
r.raise_for_status()
|
||||||
res_json = response.json()
|
buffer = ''
|
||||||
content = res_json["choices"][0]["message"]["content"]
|
for line in r.iter_lines():
|
||||||
return content
|
line = line.decode("utf-8")
|
||||||
|
if not line or not line.startswith("data:"): continue
|
||||||
|
data = line[5:].lstrip()
|
||||||
|
if data == "[DONE]": break
|
||||||
|
obj = json.loads(data)
|
||||||
|
ch = (obj.get("choices") or [{}])[0]
|
||||||
|
if ch.get("finish_reason") is not None: break
|
||||||
|
delta = (ch.get("delta") or {}).get("content")
|
||||||
|
if not delta: continue
|
||||||
|
yield delta
|
||||||
|
buffer += delta
|
||||||
|
if '</tool_use>' in buffer[-30:]: break
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return f"Error: {str(e)}"
|
yield f"Error: {str(e)}"
|
||||||
|
|
||||||
def make_messages(self, raw_list, omit_images=True):
|
def make_messages(self, raw_list, omit_images=True):
|
||||||
messages = []
|
messages = []
|
||||||
@@ -60,22 +74,28 @@ class LLMSession:
|
|||||||
p = "Summarize prev summary and prev conversations into compact memory (facts/decisions/constraints/open questions). Do NOT restate long schemas. The new summary should less than 1000 tokens.\n"
|
p = "Summarize prev summary and prev conversations into compact memory (facts/decisions/constraints/open questions). Do NOT restate long schemas. The new summary should less than 1000 tokens.\n"
|
||||||
messages = self.make_messages(old, omit_images=True)
|
messages = self.make_messages(old, omit_images=True)
|
||||||
messages += [{"role":"user", "content":p}]
|
messages += [{"role":"user", "content":p}]
|
||||||
summary = self.raw_ask(messages, model, temperature=0.1)
|
summary = ''.join(list(self.raw_ask(messages, model, temperature=0.1)))
|
||||||
if not summary.startswith("Error:"):
|
if not summary.startswith("Error:"):
|
||||||
self.raw_msgs.insert(0, {"role":"system", "prompt":"Prev summary:\n"+summary, "image":None})
|
self.raw_msgs.insert(0, {"role":"system", "prompt":"Prev summary:\n"+summary, "image":None})
|
||||||
else: self.raw_msgs = old + self.raw_msgs # 不做了,下次再做
|
else: self.raw_msgs = old + self.raw_msgs # 不做了,下次再做
|
||||||
|
|
||||||
def ask(self, prompt, model=None, image_base64=None):
|
def ask(self, prompt, model=None, image_base64=None, stream=False):
|
||||||
if model is None: model = self.model
|
if model is None: model = self.model
|
||||||
self.raw_msgs.append({"role": "user", "prompt": prompt, "image": image_base64})
|
self.raw_msgs.append({"role": "user", "prompt": prompt, "image": image_base64})
|
||||||
messages = self.make_messages(self.raw_msgs[:-1], omit_images=True)
|
messages = self.make_messages(self.raw_msgs[:-1], omit_images=True)
|
||||||
messages += self.make_messages([self.raw_msgs[-1]], omit_images=False)
|
messages += self.make_messages([self.raw_msgs[-1]], omit_images=False)
|
||||||
total_len = sum(2000 if isinstance(m["content"], list) else len(str(m["content"]))//4 for m in messages) # estimate token count
|
total_len = sum(2000 if isinstance(m["content"], list) else len(str(m["content"]))//4 for m in messages) # estimate token count
|
||||||
content = self.raw_ask(messages, model)
|
gen = self.raw_ask(messages, model)
|
||||||
if not content.startswith("Error:"):
|
def _ask_gen():
|
||||||
self.raw_msgs.append({"role": "assistant", "prompt": content, "image": None})
|
content = ''
|
||||||
if total_len > self.context_win: self.summary_history()
|
for chunk in gen:
|
||||||
return content
|
content += chunk; yield chunk
|
||||||
|
if not content.startswith("Error:"):
|
||||||
|
self.raw_msgs.append({"role": "assistant", "prompt": content, "image": None})
|
||||||
|
if total_len > 5000: print(f"[Debug] Whole context length {total_len}.")
|
||||||
|
if total_len > self.context_win: self.summary_history()
|
||||||
|
if stream: return _ask_gen()
|
||||||
|
return ''.join(list(_ask_gen()))
|
||||||
|
|
||||||
|
|
||||||
class MockFunction:
|
class MockFunction:
|
||||||
@@ -109,7 +129,10 @@ class ToolClient:
|
|||||||
def chat(self, messages, tools=None):
|
def chat(self, messages, tools=None):
|
||||||
full_prompt = self._build_protocol_prompt(messages, tools)
|
full_prompt = self._build_protocol_prompt(messages, tools)
|
||||||
print("Full prompt length:", len(full_prompt))
|
print("Full prompt length:", len(full_prompt))
|
||||||
raw_text = self.raw_api(full_prompt)
|
gen = self.raw_api(full_prompt, stream=True)
|
||||||
|
raw_text = ''
|
||||||
|
for chunk in gen:
|
||||||
|
raw_text += chunk; yield chunk
|
||||||
with open('model_responses.txt', 'a', encoding='utf-8', errors="replace") as f:
|
with open('model_responses.txt', 'a', encoding='utf-8', errors="replace") as f:
|
||||||
f.write(f"=== Prompt ===\n{full_prompt}\n=== Response ===\n{raw_text}\n\n")
|
f.write(f"=== Prompt ===\n{full_prompt}\n=== Response ===\n{raw_text}\n\n")
|
||||||
return self._parse_mixed_response(raw_text)
|
return self._parse_mixed_response(raw_text)
|
||||||
@@ -127,7 +150,7 @@ class ToolClient:
|
|||||||
请按照以下步骤思考并行动:
|
请按照以下步骤思考并行动:
|
||||||
1. **思考**: 在 `<thinking>` 标签中先进行思考,分析现状和策略。
|
1. **思考**: 在 `<thinking>` 标签中先进行思考,分析现状和策略。
|
||||||
2. **总结**: 在 `<summary>` 中输出*极为简短*的高度概括的单行(<30字)物理快照,包括上次工具调用结果获取的新信息+本次工具调用意图和预期。此内容将进入长期工作记忆,记录关键信息,严禁输出无实际信息增量的描述。
|
2. **总结**: 在 `<summary>` 中输出*极为简短*的高度概括的单行(<30字)物理快照,包括上次工具调用结果获取的新信息+本次工具调用意图和预期。此内容将进入长期工作记忆,记录关键信息,严禁输出无实际信息增量的描述。
|
||||||
3. **行动**: 如果需要调用工具,请紧接着输出一个 **<tool_use>块**,然后结束,我会稍后给你返回<tool_result>块。
|
3. **行动**: 如果需要调用工具,请在回复正文之后输出一个 **<tool_use>块**,然后结束,我会稍后给你返回<tool_result>块。
|
||||||
格式: ```<tool_use>\n{{"function": "工具名", "arguments": {{参数}}}}\n</tool_use>\n```
|
格式: ```<tool_use>\n{{"function": "工具名", "arguments": {{参数}}}}\n</tool_use>\n```
|
||||||
|
|
||||||
### 可用工具库
|
### 可用工具库
|
||||||
@@ -164,7 +187,7 @@ class ToolClient:
|
|||||||
|
|
||||||
tool_calls = None
|
tool_calls = None
|
||||||
tool_pattern = r"<tool_use>(.*?)</tool_use>"
|
tool_pattern = r"<tool_use>(.*?)</tool_use>"
|
||||||
tool_match = re.search(tool_pattern, text, re.DOTALL)
|
tool_match = re.search(tool_pattern, remaining_text, re.DOTALL)
|
||||||
|
|
||||||
json_str = ""
|
json_str = ""
|
||||||
if tool_match:
|
if tool_match:
|
||||||
@@ -173,6 +196,8 @@ class ToolClient:
|
|||||||
elif '<tool_use>' in remaining_text:
|
elif '<tool_use>' in remaining_text:
|
||||||
weaktoolstr = remaining_text.split('<tool_use>')[-1].strip()
|
weaktoolstr = remaining_text.split('<tool_use>')[-1].strip()
|
||||||
json_str = weaktoolstr if weaktoolstr.endswith('}') else ''
|
json_str = weaktoolstr if weaktoolstr.endswith('}') else ''
|
||||||
|
if json_str == '' and '```' in weaktoolstr and weaktoolstr.split('```')[0].strip().endswith('}'):
|
||||||
|
json_str = weaktoolstr.split('```')[0].strip()
|
||||||
remaining_text = remaining_text.replace('<tool_use>'+weaktoolstr, "")
|
remaining_text = remaining_text.replace('<tool_use>'+weaktoolstr, "")
|
||||||
|
|
||||||
if json_str:
|
if json_str:
|
||||||
@@ -184,7 +209,7 @@ class ToolClient:
|
|||||||
if func_name: tool_calls = [MockToolCall(func_name, args)]
|
if func_name: tool_calls = [MockToolCall(func_name, args)]
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
print("[Warn] Failed to parse tool_use JSON:", json_str)
|
print("[Warn] Failed to parse tool_use JSON:", json_str)
|
||||||
thinking += f"[Warn] JSON 解析失败,模型输出了无效的 JSON."
|
remaining_text += f"[Warning] JSON 解析失败,模型输出了无效的 JSON."
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("[Error] Exception during tool_use parsing:", str(e), data)
|
print("[Error] Exception during tool_use parsing:", str(e), data)
|
||||||
|
|
||||||
@@ -198,20 +223,32 @@ def tryparse(json_str):
|
|||||||
return json.loads(json_str[:-1])
|
return json.loads(json_str[:-1])
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
llmclient = ToolClient(LLMSession().ask)
|
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
||||||
response = llmclient.chat(
|
try: from mykey import sider_cookie
|
||||||
|
except ImportError: sider_cookie = ""
|
||||||
|
try: from mykey import oai_apikey, oai_apibase, oai_model
|
||||||
|
except ImportError: oai_apikey = oai_apibase = oai_model = ""
|
||||||
|
|
||||||
|
llmclient = ToolClient(LLMSession(api_key=oai_apikey, api_base=oai_apibase, model=oai_model).ask)
|
||||||
|
print(llmclient.raw_api("Hello, world!", stream=False))
|
||||||
|
#llmclient = ToolClient(SiderLLMSession().ask)
|
||||||
|
def get_final(gen):
|
||||||
|
try:
|
||||||
|
while True:
|
||||||
|
print('mid:', next(gen))
|
||||||
|
except StopIteration as e:
|
||||||
|
return e.value
|
||||||
|
|
||||||
|
response = get_final(llmclient.chat(
|
||||||
messages=[{"role": "user", "content": "我的IP是多少"}],
|
messages=[{"role": "user", "content": "我的IP是多少"}],
|
||||||
tools=[{"name": "get_ip", "parameters": {}}]
|
tools=[{"name": "get_ip", "parameters": {}}]
|
||||||
)
|
))
|
||||||
# 4. 获取结果
|
|
||||||
print(f"思考: {response.thinking}")
|
print(f"思考: {response.thinking}")
|
||||||
# -> 我需要查一下 IP。
|
|
||||||
|
|
||||||
if response.tool_calls:
|
if response.tool_calls:
|
||||||
cmd = response.tool_calls[0]
|
cmd = response.tool_calls[0]
|
||||||
print(f"调用: {cmd.function.name} 参数: {cmd.function.arguments}")
|
print(f"调用: {cmd.function.name} 参数: {cmd.function.arguments}")
|
||||||
|
|
||||||
response = llmclient.chat(
|
response = get_final(llmclient.chat(
|
||||||
messages=[{"role": "user", "content": "<tool_result>10.176.45.12</tool_result>"}]
|
messages=[{"role": "user", "content": "<tool_result>10.176.45.12</tool_result>"}]
|
||||||
)
|
))
|
||||||
print(response.content)
|
print(response.content)
|
||||||
35
stapp.py
35
stapp.py
@@ -35,27 +35,32 @@ def render_llm_switcher():
|
|||||||
current_idx = agent.llm_no
|
current_idx = agent.llm_no
|
||||||
st.caption(f"LLM Core: {current_idx}")
|
st.caption(f"LLM Core: {current_idx}")
|
||||||
if st.button("切换备用链路"):
|
if st.button("切换备用链路"):
|
||||||
agent.llm_no = (current_idx + 1) % len(agent.llmclient.raw_apis)
|
agent.next_llm()
|
||||||
st.rerun(scope="fragment")
|
st.rerun(scope="fragment")
|
||||||
if st.button("强行停止任务"):
|
if st.button("强行停止任务"):
|
||||||
agent.abort()
|
agent.abort()
|
||||||
st.toast("已发送停止信号")
|
st.toast("已发送停止信号")
|
||||||
|
if st.button("重新注入System Prompt"):
|
||||||
|
agent.llmclient.last_tools = ''
|
||||||
|
st.toast("下次将重新注入System Prompt")
|
||||||
with st.sidebar: render_llm_switcher()
|
with st.sidebar: render_llm_switcher()
|
||||||
|
|
||||||
@st.fragment(run_every="1s")
|
@st.fragment(run_every="1s")
|
||||||
def global_queue_listener():
|
def global_queue_listener():
|
||||||
if agent.current_source != 'auto': return
|
if agent.current_source == 'auto':
|
||||||
while not agent.display_queue.empty():
|
while not agent.display_queue.empty():
|
||||||
item = agent.display_queue.get()
|
item = agent.display_queue.get()
|
||||||
if 'next' in item:
|
if item.get('source') == 'auto':
|
||||||
st.session_state.idle_buf = item['next']
|
if 'next' in item: st.session_state.idle_buf = item['next']
|
||||||
if 'done' in item:
|
if 'done' in item:
|
||||||
st.session_state.messages.append({"role": "assistant", "content": f"{item['done']}"})
|
st.session_state.messages.append({"role": "assistant", "content": f"🤖 {item['done']}"})
|
||||||
st.session_state.idle_buf = ""
|
st.session_state.idle_buf = ""; st.rerun()
|
||||||
st.rerun()
|
if st.session_state.get("idle_buf"):
|
||||||
if st.session_state.get("idle_buf"):
|
with st.chat_message("assistant"):
|
||||||
with st.chat_message("assistant"):
|
st.write(st.session_state.idle_buf + "▌")
|
||||||
st.write(st.session_state.idle_buf + "▌")
|
else:
|
||||||
|
st.caption("🟢 Agent Listener Active", help=f"Last sync: {int(time.time())}")
|
||||||
|
st.session_state.idle_buf = ""
|
||||||
|
|
||||||
global_queue_listener()
|
global_queue_listener()
|
||||||
|
|
||||||
@@ -65,10 +70,10 @@ def agent_backend_stream(prompt):
|
|||||||
while True:
|
while True:
|
||||||
item = agent.display_queue.get()
|
item = agent.display_queue.get()
|
||||||
if 'next' in item: yield item['next']
|
if 'next' in item: yield item['next']
|
||||||
if 'done' in item: break
|
if 'done' in item:
|
||||||
|
yield item['done']; break
|
||||||
finally:
|
finally:
|
||||||
agent.abort()
|
agent.abort()
|
||||||
print('User aborted the operation.')
|
|
||||||
|
|
||||||
if prompt := st.chat_input("请输入指令"):
|
if prompt := st.chat_input("请输入指令"):
|
||||||
st.session_state.messages.append({"role": "user", "content": prompt})
|
st.session_state.messages.append({"role": "user", "content": prompt})
|
||||||
|
|||||||
Reference in New Issue
Block a user