From 669095699e4ec6e2c553da243d99433351392721 Mon Sep 17 00:00:00 2001 From: Liang Jiaqing Date: Sat, 31 Jan 2026 15:47:23 +0800 Subject: [PATCH] feat: refine vision processing logic and update SOP --- agent_loop.py | 2 +- agentapp.py | 15 +++++++++++++-- ga.py | 26 ++++++++++++++------------ sidercall.py | 18 ++++++++++++------ 4 files changed, 40 insertions(+), 21 deletions(-) diff --git a/agent_loop.py b/agent_loop.py index 7f53380..5d02ff0 100644 --- a/agent_loop.py +++ b/agent_loop.py @@ -48,8 +48,8 @@ def agent_runner_loop(client, system_prompt, user_input, handler, tools_schema, response = client.chat(messages=messages, tools=tools_schema) if response.thinking: yield '' + response.thinking + '\n\n' - if '```' in response.content: response.content = response.content.replace('```', '\n```') showcontent = response.content + if '' in showcontent: showcontent = showcontent.replace('', '\n\n') if '' in showcontent: showcontent = re.sub(r'\s*(.*?)\s*', r'\n````\n\n\1\n\n````', showcontent, flags=re.DOTALL) yield showcontent + '\n\n' diff --git a/agentapp.py b/agentapp.py index 27750f4..c2c100a 100644 --- a/agentapp.py +++ b/agentapp.py @@ -18,8 +18,11 @@ from agent_loop import agent_runner_loop, StepOutcome, BaseHandler @st.cache_resource def init(): if not os.path.exists('temp'): os.makedirs('temp') - mainllm = SiderLLMSession(multiturns=6) - llmclient = ToolClient(mainllm.ask, auto_save_tokens=True) + llm_sessions = [SiderLLMSession(default_model="gemini-3.0-flash"), + SiderLLMSession(default_model="gpt-5-mini"), + SiderLLMSession(default_model="claude-4.5-haiku"), + LLMSession()] + llmclient = ToolClient([x.ask for x in llm_sessions], auto_save_tokens=True) return llmclient llmclient = init() @@ -47,6 +50,7 @@ def agent_backend_stream(raw_query): sys_prompt = get_system_prompt() handler = GenericAgentHandler(None, history, './temp') llmclient.last_tools = '' + llmclient.raw_api = llmclient.raw_apis[st.session_state.get("llm_no", 0)] ret = yield from agent_runner_loop(llmclient, sys_prompt, raw_query, handler, TOOLS_SCHEMA, max_turns=25) @@ -62,6 +66,13 @@ for msg in st.session_state.messages: with st.chat_message(msg["role"]): st.markdown(msg["content"]) +with st.sidebar: + current_idx = st.session_state.get("llm_no", 0) + st.caption(f"LLM Core: {current_idx}") + if st.button("切换备用链路"): + st.session_state.llm_no = (st.session_state.get("llm_no", 0) + 1) % len(llmclient.raw_apis) + st.rerun() + if prompt := st.chat_input("请输入指令"): st.session_state.messages.append({"role": "user", "content": prompt}) with st.chat_message("user"): diff --git a/ga.py b/ga.py index a75fa7d..605d605 100644 --- a/ga.py +++ b/ga.py @@ -66,12 +66,12 @@ def code_run(code: str, code_type: str = "python", timeout: int = 60, cwd: str = status = "success" if exit_code == 0 else "error" status_icon = "✅" if exit_code == 0 else "❌" if exit_code is None: status_icon = "⏳" - output_snippet = (stdout_str[:100] + '...' + stdout_str[-100:]) if len(stdout_str) > 300 else stdout_str + output_snippet = smart_format(stdout_str, max_str_len=600, omit_str='\n[omitted long output]\n') yield f"[Status] {status_icon} Exit Code: {exit_code}\n[Stdout]\n{output_snippet}\n" - if process.stdout: process.stdout.close() + if process.stdout: threading.Thread(target=process.stdout.close, daemon=True).start() return { "status": status, - "stdout": stdout_str[-2000:], + "stdout": smart_format(stdout_str, max_str_len=4000, omit_str='\n[omitted long output]\n'), "exit_code": exit_code } except Exception as e: @@ -198,7 +198,7 @@ def file_read(path, start=1, keyword=None, count=100, show_linenos=True): try: with open(path, 'r', encoding='utf-8', errors='replace') as f: stream = ( - (i, (l[:L_MAX].rstrip() + TAG if len(l) > L_MAX else l.rstrip())) + (i, (l[:L_MAX].rstrip() + TAG if len(l) > L_MAX else l.rstrip('\r\n'))) for i, l in enumerate(f, 1) ) stream = itertools.dropwhile(lambda x: x[0] < start, stream) @@ -211,15 +211,15 @@ def file_read(path, start=1, keyword=None, count=100, show_linenos=True): before.append((i, l)) else: return f"Keyword '{keyword}' not found after line {start}." else: res = itertools.islice(stream, count) - return "\n".join(f"{i}| {l}" if show_linenos else l for i, l in res) + return "\n".join(f"{i}|{l}" if show_linenos else l for i, l in res) except Exception as e: return f"Error: {str(e)}" -def smart_format(data, max_depth=2, max_str_len=100): +def smart_format(data, max_depth=2, max_str_len=100, omit_str=' ... '): def truncate(obj, depth): if isinstance(obj, str): - if len(obj) > max_str_len: return f"{obj[:max_str_len//2]} ... {obj[-max_str_len//2:]}" - return obj + if len(obj) < max_str_len+len(omit_str)*2: return obj + return f"{obj[:max_str_len//2]}{omit_str}{obj[-max_str_len//2:]}" if depth >= max_depth: return truncate(str(obj), depth + 1) if isinstance(obj, dict): return {k: truncate(v, depth + 1) for k, v in obj.items()} if isinstance(obj, list): return [truncate(i, depth + 1) for i in obj] @@ -259,15 +259,17 @@ class GenericAgentHandler(BaseHandler): # 从 response.content 中提取代码块, 匹配 ```python ... ``` 或 ```powershell ... ``` pattern = rf"```{code_type}\n(.*?)\n```" matches = re.findall(pattern, response.content, re.DOTALL) + warning = "" if not matches: - return StepOutcome(None, next_prompt=f"【系统错误】:你调用了 code_run,但未在回复中提供 ```{code_type} 代码块。请重新输出代码并附带工具调用。") - # 提取最后一个代码块(通常是模型修正后的最终逻辑) - code = matches[-1].strip() + code = args.get("code") + if not code: return StepOutcome(None, next_prompt=f"【系统错误】:你调用了 code_run,但未在回复中提供 ```{code_type} 代码块。请重新输出代码并附带工具调用。") + warning = "\n下次要记得在回复中提供代码块,而不是放在参数中" + else: code = matches[-1].strip() # 提取最后一个代码块(通常是模型修正后的最终逻辑) timeout = args.get("timeout", 60) raw_path = os.path.join(self.cwd, args.get("cwd", './')) cwd = os.path.normpath(os.path.abspath(raw_path)) result = yield from code_run(code, code_type, timeout, cwd) - next_prompt = self._get_anchor_prompt() + next_prompt = self._get_anchor_prompt() + warning return StepOutcome(result, next_prompt=next_prompt) def do_ask_user(self, args, response): diff --git a/sidercall.py b/sidercall.py index 1418f49..6c78a1d 100644 --- a/sidercall.py +++ b/sidercall.py @@ -8,9 +8,11 @@ except ImportError: capikey = "" class SiderLLMSession: - def __init__(self, multiturns=6): + def __init__(self, multiturns=6, default_model="gemini-3.0-flash"): self._core = Session(cookie=sider_cookie, proxies={'https':'127.0.0.1:2082'}) - def ask(self, prompt, model="gemini-3.0-flash"): + self.default_model = default_model + def ask(self, prompt, model=None): + if model is None: model = self.default_model if len(prompt) > 29000: print(f"[Warn] Prompt too long ({len(prompt)} chars), truncating.") prompt = prompt[-29000:] @@ -44,7 +46,7 @@ class LLMSession: elif not omit_images and msg['image']: messages.append({"role": msg['role'], "content": [ {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{msg['image']}"}}, - {"type": "text", "text": msg['prompt']} ]}) + {"type": "text", "text": msg['prompt']} ]}) else: messages.append({"role": msg['role'], "content": msg['prompt']}) return messages @@ -56,8 +58,10 @@ class LLMSession: p = "Summarize prev summary and prev conversations into compact memory (facts/decisions/constraints/open questions). Do NOT restate long schemas. The new summary should less than 1000 tokens.\n" messages = self.make_messages(old, omit_images=True) messages += [{"role":"user", "content":p}] - self.summary = self.raw_ask(messages, model, temperature=0.1) - self.raw_msgs.insert(0, {"role":"system", "prompt":"Prev summary:\n"+self.summary, "image":None}) + summary = self.raw_ask(messages, model, temperature=0.1) + if not summary.startswith("Error:"): + self.raw_msgs.insert(0, {"role":"system", "prompt":"Prev summary:\n"+summary, "image":None}) + else: self.raw_msgs = old + self.raw_msgs # 不做了,下次再做 def ask(self, prompt, model="openai/gpt-5.1", image_base64=None): self.raw_msgs.append({"role": "user", "prompt": prompt, "image": image_base64}) @@ -92,7 +96,9 @@ class MockResponse: class ToolClient: def __init__(self, raw_api_func, auto_save_tokens=False): - self.raw_api = raw_api_func + if isinstance(raw_api_func, list): self.raw_apis = raw_api_func + else: self.raw_apis = [raw_api_func] + self.raw_api = self.raw_apis[0] self.auto_save_tokens = auto_save_tokens self.last_tools = '' self.total_cd_tokens = 0