Update core logic and prompts
This commit is contained in:
@@ -62,10 +62,14 @@ class TMWebDriver:
|
|||||||
print(f"Browser http connected: {session.url} (Session: {session_id})")
|
print(f"Browser http connected: {session.url} (Session: {session_id})")
|
||||||
self.sessions[session_id] = session
|
self.sessions[session_id] = session
|
||||||
session = self.sessions[session_id]
|
session = self.sessions[session_id]
|
||||||
|
session.disconnect_at = None
|
||||||
if session.type == 'http': msgQ = session.http_queue
|
if session.type == 'http': msgQ = session.http_queue
|
||||||
else: return json.dumps({"id": "", "ret": "use ws"})
|
else: return json.dumps({"id": "", "ret": "use ws"})
|
||||||
try: return msgQ.get(timeout=5)
|
start_time = time.time()
|
||||||
except queue.Empty: return json.dumps({"id": "", "ret": "next long-poll"})
|
while time.time() - start_time < 5:
|
||||||
|
try: return msgQ.get(timeout=0.2)
|
||||||
|
except queue.Empty: continue
|
||||||
|
return json.dumps({"id": "", "ret": "next long-poll"})
|
||||||
|
|
||||||
@app.route('/api/result', method=['GET','POST'])
|
@app.route('/api/result', method=['GET','POST'])
|
||||||
def result():
|
def result():
|
||||||
@@ -90,6 +94,7 @@ class TMWebDriver:
|
|||||||
auto_switch_newtab = data.get('auto_switch_newtab', False)
|
auto_switch_newtab = data.get('auto_switch_newtab', False)
|
||||||
try:
|
try:
|
||||||
result = self.execute_js(code, timeout=timeout, session_id=session_id, auto_switch_newtab=auto_switch_newtab)
|
result = self.execute_js(code, timeout=timeout, session_id=session_id, auto_switch_newtab=auto_switch_newtab)
|
||||||
|
print('remote', result)
|
||||||
newTabs = result.get('newTabs', []) if isinstance(result, dict) else []
|
newTabs = result.get('newTabs', []) if isinstance(result, dict) else []
|
||||||
return json.dumps({'result': result, 'newTabs': newTabs}, ensure_ascii=False)
|
return json.dumps({'result': result, 'newTabs': newTabs}, ensure_ascii=False)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -100,7 +105,7 @@ class TMWebDriver:
|
|||||||
import asyncio
|
import asyncio
|
||||||
loop = asyncio.new_event_loop()
|
loop = asyncio.new_event_loop()
|
||||||
asyncio.set_event_loop(loop)
|
asyncio.set_event_loop(loop)
|
||||||
bottle.run(app, host=self.host, port=self.port+1, server='tornado')
|
bottle.run(app, host=self.host, port=self.port+1, server='tornado', threads=20)
|
||||||
|
|
||||||
http_thread = threading.Thread(target=run)
|
http_thread = threading.Thread(target=run)
|
||||||
http_thread.daemon = True
|
http_thread.daemon = True
|
||||||
@@ -218,7 +223,7 @@ class TMWebDriver:
|
|||||||
if hasjump and session.is_active():
|
if hasjump and session.is_active():
|
||||||
if not self.is_remote and auto_switch_newtab: self.last_cmd_time = time.time()
|
if not self.is_remote and auto_switch_newtab: self.last_cmd_time = time.time()
|
||||||
return {"result": f"Session {session_id} reloaded.", "closed":1}
|
return {"result": f"Session {session_id} reloaded.", "closed":1}
|
||||||
if time.time() - start_time > timeout:
|
if time.time() - start_time > timeout + 10:
|
||||||
if tp == 'ws':
|
if tp == 'ws':
|
||||||
return {"result": f"No response data in {timeout}s"}
|
return {"result": f"No response data in {timeout}s"}
|
||||||
elif tp == 'http':
|
elif tp == 'http':
|
||||||
|
|||||||
@@ -15,14 +15,14 @@ def try_call_generator(func, *args, **kwargs):
|
|||||||
return ret
|
return ret
|
||||||
|
|
||||||
class BaseHandler:
|
class BaseHandler:
|
||||||
def tool_before_callback(self, tool_name, args, content): pass
|
def tool_before_callback(self, tool_name, args, response): pass
|
||||||
def tool_after_callback(self, tool_name, args, content): pass
|
def tool_after_callback(self, tool_name, args, response, ret): pass
|
||||||
def dispatch(self, tool_name, args, response):
|
def dispatch(self, tool_name, args, response):
|
||||||
method_name = f"do_{tool_name}"
|
method_name = f"do_{tool_name}"
|
||||||
if hasattr(self, method_name):
|
if hasattr(self, method_name):
|
||||||
_ = yield from try_call_generator(self.tool_before_callback, tool_name, args, response)
|
_ = yield from try_call_generator(self.tool_before_callback, tool_name, args, response)
|
||||||
ret = yield from try_call_generator(getattr(self, method_name), args, response)
|
ret = yield from try_call_generator(getattr(self, method_name), args, response)
|
||||||
_ = yield from try_call_generator(self.tool_after_callback, tool_name, args, response)
|
_ = yield from try_call_generator(self.tool_after_callback, tool_name, args, response, ret)
|
||||||
return ret
|
return ret
|
||||||
else:
|
else:
|
||||||
yield f"❌ 未知工具: {tool_name}\n"
|
yield f"❌ 未知工具: {tool_name}\n"
|
||||||
@@ -48,6 +48,7 @@ def agent_runner_loop(client, system_prompt, user_input, handler, tools_schema,
|
|||||||
response = client.chat(messages=messages, tools=tools_schema)
|
response = client.chat(messages=messages, tools=tools_schema)
|
||||||
|
|
||||||
if response.thinking: yield '<thinking>' + response.thinking + '</thinking>\n\n'
|
if response.thinking: yield '<thinking>' + response.thinking + '</thinking>\n\n'
|
||||||
|
if '</summary>```' in response.content: response.content = response.content.replace('</summary>```', '</summary> \n```')
|
||||||
yield response.content + '\n\n'
|
yield response.content + '\n\n'
|
||||||
|
|
||||||
if not response.tool_calls:
|
if not response.tool_calls:
|
||||||
|
|||||||
17
agentapp.py
17
agentapp.py
@@ -24,7 +24,7 @@ def init():
|
|||||||
|
|
||||||
llmclient = init()
|
llmclient = init()
|
||||||
|
|
||||||
from ga import GenericAgentHandler
|
from ga import GenericAgentHandler, smart_format
|
||||||
|
|
||||||
def get_system_prompt():
|
def get_system_prompt():
|
||||||
with open('sys_prompt.txt', 'r', encoding='utf-8') as f:
|
with open('sys_prompt.txt', 'r', encoding='utf-8') as f:
|
||||||
@@ -56,18 +56,21 @@ def refine_user_goal(raw_query, last_goal):
|
|||||||
return raw_query
|
return raw_query
|
||||||
|
|
||||||
def agent_backend_stream(raw_query):
|
def agent_backend_stream(raw_query):
|
||||||
final_goal = refine_user_goal(raw_query, st.session_state.last_goal)
|
#final_goal = refine_user_goal(raw_query, st.session_state.last_goal)
|
||||||
|
#if final_goal != raw_query: yield f"[Goal Refined] {final_goal}\n"
|
||||||
if final_goal != raw_query:
|
|
||||||
yield f"[Goal Refined] {final_goal}\n"
|
history = st.session_state.get("last_history", [])
|
||||||
|
hquery = smart_format(raw_query.replace('\n', ' '), max_str_len=100)
|
||||||
|
history.append(f"[USER]: {hquery}")
|
||||||
|
|
||||||
sys_prompt = get_system_prompt()
|
sys_prompt = get_system_prompt()
|
||||||
handler = GenericAgentHandler(None, final_goal, './temp')
|
handler = GenericAgentHandler(None, history, './temp')
|
||||||
llmclient.last_tools = ''
|
llmclient.last_tools = ''
|
||||||
ret = yield from agent_runner_loop(llmclient,
|
ret = yield from agent_runner_loop(llmclient,
|
||||||
sys_prompt, raw_query, handler,
|
sys_prompt, raw_query, handler,
|
||||||
TOOLS_SCHEMA, max_turns=25)
|
TOOLS_SCHEMA, max_turns=25)
|
||||||
st.session_state.last_goal = final_goal
|
#st.session_state.last_goal = final_goal
|
||||||
|
st.session_state.last_history = handler.history_info
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
st.title("🖥️ Cowork")
|
st.title("🖥️ Cowork")
|
||||||
|
|||||||
82
ga.py
82
ga.py
@@ -1,15 +1,11 @@
|
|||||||
import sys, os, re
|
import sys, os, re, json, time, pyperclip, threading
|
||||||
import pyperclip, threading
|
|
||||||
import json, time
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import subprocess
|
import tempfile, traceback, subprocess
|
||||||
import tempfile
|
|
||||||
if sys.stdout is None: sys.stdout = open(os.devnull, "w")
|
if sys.stdout is None: sys.stdout = open(os.devnull, "w")
|
||||||
if sys.stderr is None: sys.stderr = open(os.devnull, "w")
|
if sys.stderr is None: sys.stderr = open(os.devnull, "w")
|
||||||
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
||||||
|
|
||||||
from sidercall import LLMSession, ToolClient
|
from agent_loop import BaseHandler, StepOutcome, try_call_generator
|
||||||
from agent_loop import BaseHandler, StepOutcome, agent_runner_loop
|
|
||||||
|
|
||||||
def code_run(code: str, code_type: str = "python", timeout: int = 60, cwd: str = None):
|
def code_run(code: str, code_type: str = "python", timeout: int = 60, cwd: str = None):
|
||||||
"""
|
"""
|
||||||
@@ -146,7 +142,6 @@ def web_scan(focus_item="", switch_tab_id=None):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
return {"status": "error", "msg": format_error(e)}
|
return {"status": "error", "msg": format_error(e)}
|
||||||
|
|
||||||
import traceback
|
|
||||||
def format_error(e):
|
def format_error(e):
|
||||||
exc_type, exc_value, exc_traceback = sys.exc_info()
|
exc_type, exc_value, exc_traceback = sys.exc_info()
|
||||||
tb = traceback.extract_tb(exc_traceback)
|
tb = traceback.extract_tb(exc_traceback)
|
||||||
@@ -182,24 +177,18 @@ def web_execute_js(script: str):
|
|||||||
return {"status": "error", "msg": format_error(e)}
|
return {"status": "error", "msg": format_error(e)}
|
||||||
|
|
||||||
def file_patch(path: str, old_content: str, new_content: str):
|
def file_patch(path: str, old_content: str, new_content: str):
|
||||||
"""
|
"""在文件中寻找唯一的 old_content 块并替换为 new_content。
|
||||||
在文件中寻找唯一的 old_content 块并替换为 new_content。
|
|
||||||
"""
|
"""
|
||||||
path = str(Path(path).resolve())
|
path = str(Path(path).resolve())
|
||||||
try:
|
try:
|
||||||
if not os.path.exists(path):
|
if not os.path.exists(path): return {"status": "error", "msg": "文件不存在"}
|
||||||
return {"status": "error", "msg": "文件不存在"}
|
with open(path, 'r', encoding='utf-8') as f: full_text = f.read()
|
||||||
with open(path, 'r', encoding='utf-8') as f:
|
|
||||||
full_text = f.read()
|
|
||||||
# 检查唯一性
|
# 检查唯一性
|
||||||
count = full_text.count(old_content)
|
count = full_text.count(old_content)
|
||||||
if count == 0:
|
if count == 0: return {"status": "error", "msg": "未找到匹配的旧文本块,请检查空格、缩进和换行是否完全一致。"}
|
||||||
return {"status": "error", "msg": "未找到匹配的旧文本块,请检查空格、缩进和换行是否完全一致。"}
|
if count > 1: return {"status": "error", "msg": f"找到 {count} 处匹配,请提供更长的旧文本块以确保唯一性。"}
|
||||||
if count > 1:
|
|
||||||
return {"status": "error", "msg": f"找到 {count} 处匹配,请提供更长的旧文本块以确保唯一性。"}
|
|
||||||
updated_text = full_text.replace(old_content, new_content)
|
updated_text = full_text.replace(old_content, new_content)
|
||||||
with open(path, 'w', encoding='utf-8') as f:
|
with open(path, 'w', encoding='utf-8') as f: f.write(updated_text)
|
||||||
f.write(updated_text)
|
|
||||||
return {"status": "success", "msg": "文件局部修改成功"}
|
return {"status": "success", "msg": "文件局部修改成功"}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return {"status": "error", "msg": str(e)}
|
return {"status": "error", "msg": str(e)}
|
||||||
@@ -224,31 +213,40 @@ def smart_format(data, max_depth=2, max_str_len=100):
|
|||||||
if isinstance(obj, dict): return {k: truncate(v, depth + 1) for k, v in obj.items()}
|
if isinstance(obj, dict): return {k: truncate(v, depth + 1) for k, v in obj.items()}
|
||||||
if isinstance(obj, list): return [truncate(i, depth + 1) for i in obj]
|
if isinstance(obj, list): return [truncate(i, depth + 1) for i in obj]
|
||||||
return obj
|
return obj
|
||||||
|
if isinstance(data, (str, bytes)): return truncate(data, 0)
|
||||||
return json.dumps(truncate(data, 0), indent=2, ensure_ascii=False, default=str)
|
return json.dumps(truncate(data, 0), indent=2, ensure_ascii=False, default=str)
|
||||||
|
|
||||||
class GenericAgentHandler(BaseHandler):
|
class GenericAgentHandler(BaseHandler):
|
||||||
'''
|
'''
|
||||||
Generic Agent 工具库,包含多种工具的实现。工具函数自动加上了 do_ 前缀。实际工具名没有前缀。
|
Generic Agent 工具库,包含多种工具的实现。工具函数自动加上了 do_ 前缀。实际工具名没有前缀。
|
||||||
'''
|
'''
|
||||||
def __init__(self, parent, user_input, cwd):
|
def __init__(self, parent, last_history=None, cwd='./'):
|
||||||
self.parent = parent
|
self.parent = parent
|
||||||
self.user_input = user_input
|
|
||||||
self.plan = ""
|
self.plan = ""
|
||||||
self.focus = ""
|
self.focus = ""
|
||||||
self.cwd = cwd
|
self.cwd = cwd
|
||||||
|
self.history_info = last_history if last_history else []
|
||||||
|
|
||||||
def _get_abs_path(self, path):
|
def _get_abs_path(self, path):
|
||||||
if not path: return ""
|
if not path: return ""
|
||||||
return os.path.abspath(os.path.join(self.cwd, path))
|
return os.path.abspath(os.path.join(self.cwd, path))
|
||||||
|
|
||||||
|
def tool_after_callback(self, tool_name, args, response, ret):
|
||||||
|
rsumm = re.search(r"<summary>(.*?)</summary>", response.content, re.DOTALL)
|
||||||
|
if rsumm: summary = rsumm.group(1).strip()[:200]
|
||||||
|
else:
|
||||||
|
summary = f"调用工具{tool_name}, args: {args}"
|
||||||
|
if tool_name == 'no_tool': summary = "直接回答了用户问题"
|
||||||
|
if type(ret.next_prompt) is str:
|
||||||
|
ret.next_prompt += "\nPROTOCOL_VIOLATION: 上一轮遗漏了<summary>。 我已根据物理动作自动补全。请务必在下次回复中记得<summary>协议。"
|
||||||
|
self.history_info.append('[Agent] ' + smart_format(summary, max_str_len=100))
|
||||||
|
|
||||||
def do_code_run(self, args, response):
|
def do_code_run(self, args, response):
|
||||||
'''执行代码片段,有长度限制,不允许代码中放大量数据,如有需要应当通过文件读取进行。
|
'''执行代码片段,有长度限制,不允许代码中放大量数据,如有需要应当通过文件读取进行。
|
||||||
'''
|
'''
|
||||||
code_type = args.get("type", "python")
|
code_type = args.get("type", "python")
|
||||||
# 从 response.content 中提取代码块
|
# 从 response.content 中提取代码块, 匹配 ```python ... ``` 或 ```powershell ... ```
|
||||||
# 匹配 ```python ... ``` 或 ```powershell ... ```
|
|
||||||
pattern = rf"```{code_type}\n(.*?)\n```"
|
pattern = rf"```{code_type}\n(.*?)\n```"
|
||||||
# 也可以更通用一点,不分类型提取最后一个代码块:rf"```(?:{code_type})?\n(.*?)\n```"
|
|
||||||
matches = re.findall(pattern, response.content, re.DOTALL)
|
matches = re.findall(pattern, response.content, re.DOTALL)
|
||||||
if not matches:
|
if not matches:
|
||||||
return StepOutcome(None, next_prompt=f"【系统错误】:你调用了 code_run,但未在回复中提供 ```{code_type} 代码块。请重新输出代码并附带工具调用。")
|
return StepOutcome(None, next_prompt=f"【系统错误】:你调用了 code_run,但未在回复中提供 ```{code_type} 代码块。请重新输出代码并附带工具调用。")
|
||||||
@@ -257,7 +255,8 @@ class GenericAgentHandler(BaseHandler):
|
|||||||
timeout = args.get("timeout", 60)
|
timeout = args.get("timeout", 60)
|
||||||
cwd = args.get("cwd", self.cwd)
|
cwd = args.get("cwd", self.cwd)
|
||||||
result = yield from code_run(code, code_type, timeout, cwd)
|
result = yield from code_run(code, code_type, timeout, cwd)
|
||||||
return StepOutcome(result, next_prompt=self._get_anchor_prompt())
|
next_prompt = self._get_anchor_prompt()
|
||||||
|
return StepOutcome(result, next_prompt=next_prompt)
|
||||||
|
|
||||||
def do_ask_user(self, args, response):
|
def do_ask_user(self, args, response):
|
||||||
question = args.get("question", "请提供输入:")
|
question = args.get("question", "请提供输入:")
|
||||||
@@ -292,7 +291,8 @@ class GenericAgentHandler(BaseHandler):
|
|||||||
result["js_return"] += f"\n\n[已保存以上内容到 {abs_path}]"
|
result["js_return"] += f"\n\n[已保存以上内容到 {abs_path}]"
|
||||||
print("Web Execute JS Result:", smart_format(result))
|
print("Web Execute JS Result:", smart_format(result))
|
||||||
yield f"JS 执行结果:\n{smart_format(result)}\n"
|
yield f"JS 执行结果:\n{smart_format(result)}\n"
|
||||||
return StepOutcome(result, next_prompt=self._get_anchor_prompt())
|
next_prompt = self._get_anchor_prompt()
|
||||||
|
return StepOutcome(result, next_prompt=next_prompt)
|
||||||
|
|
||||||
def do_file_patch(self, args, response):
|
def do_file_patch(self, args, response):
|
||||||
path = self._get_abs_path(args.get("path", ""))
|
path = self._get_abs_path(args.get("path", ""))
|
||||||
@@ -301,7 +301,8 @@ class GenericAgentHandler(BaseHandler):
|
|||||||
new_content = args.get("new_content", "")
|
new_content = args.get("new_content", "")
|
||||||
result = file_patch(path, old_content, new_content)
|
result = file_patch(path, old_content, new_content)
|
||||||
yield f"\n{smart_format(result)}\n"
|
yield f"\n{smart_format(result)}\n"
|
||||||
return StepOutcome(result, next_prompt=self._get_anchor_prompt())
|
next_prompt = self._get_anchor_prompt()
|
||||||
|
return StepOutcome(result, next_prompt=next_prompt)
|
||||||
|
|
||||||
def do_file_write(self, args, response):
|
def do_file_write(self, args, response):
|
||||||
'''用于对整个文件的大量处理,精细修改要用file_patch。
|
'''用于对整个文件的大量处理,精细修改要用file_patch。
|
||||||
@@ -330,8 +331,9 @@ class GenericAgentHandler(BaseHandler):
|
|||||||
with open(path, write_mode, encoding="utf-8") as f:
|
with open(path, write_mode, encoding="utf-8") as f:
|
||||||
f.write(final_content)
|
f.write(final_content)
|
||||||
yield f"[Status] ✅ {mode.capitalize()} 成功 ({len(new_content)} bytes)\n"
|
yield f"[Status] ✅ {mode.capitalize()} 成功 ({len(new_content)} bytes)\n"
|
||||||
|
next_prompt = self._get_anchor_prompt()
|
||||||
return StepOutcome({"status": "success", 'writed_bytes': len(new_content)},
|
return StepOutcome({"status": "success", 'writed_bytes': len(new_content)},
|
||||||
next_prompt=self._get_anchor_prompt())
|
next_prompt=next_prompt)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
yield f"[Status] ❌ 写入异常: {str(e)}\n"
|
yield f"[Status] ❌ 写入异常: {str(e)}\n"
|
||||||
return StepOutcome({"status": "error", "msg": str(e)}, next_prompt="\n")
|
return StepOutcome({"status": "error", "msg": str(e)}, next_prompt="\n")
|
||||||
@@ -343,7 +345,8 @@ class GenericAgentHandler(BaseHandler):
|
|||||||
count = args.get("count", 100)
|
count = args.get("count", 100)
|
||||||
show_linenos = args.get("show_linenos", True)
|
show_linenos = args.get("show_linenos", True)
|
||||||
result = file_read(path, start, count, show_linenos)
|
result = file_read(path, start, count, show_linenos)
|
||||||
return StepOutcome(result, next_prompt=self._get_anchor_prompt())
|
next_prompt = self._get_anchor_prompt()
|
||||||
|
return StepOutcome(result, next_prompt=next_prompt)
|
||||||
|
|
||||||
def do_update_plan(self, args, response):
|
def do_update_plan(self, args, response):
|
||||||
'''
|
'''
|
||||||
@@ -361,8 +364,8 @@ class GenericAgentHandler(BaseHandler):
|
|||||||
yield f"[Info] Updated plan and focus.\n"
|
yield f"[Info] Updated plan and focus.\n"
|
||||||
yield f"New Plan:\n{self.plan}\n\n"
|
yield f"New Plan:\n{self.plan}\n\n"
|
||||||
yield f"New Focus:\n{self.focus}\n"
|
yield f"New Focus:\n{self.focus}\n"
|
||||||
return StepOutcome({"status": "success"},
|
next_prompt = self._get_anchor_prompt()
|
||||||
next_prompt=self._get_anchor_prompt())
|
return StepOutcome({"status": "success"}, next_prompt=next_prompt)
|
||||||
|
|
||||||
def do_no_tool(self, args, response):
|
def do_no_tool(self, args, response):
|
||||||
'''这是一个特殊工具,由引擎自主调用,不要包含在TOOLS_SCHEMA里。
|
'''这是一个特殊工具,由引擎自主调用,不要包含在TOOLS_SCHEMA里。
|
||||||
@@ -371,12 +374,9 @@ class GenericAgentHandler(BaseHandler):
|
|||||||
return StepOutcome(response, next_prompt=None, should_exit=True)
|
return StepOutcome(response, next_prompt=None, should_exit=True)
|
||||||
|
|
||||||
def _get_anchor_prompt(self):
|
def _get_anchor_prompt(self):
|
||||||
prompt = f"\n提醒: 用户原始输入:\n<user_input>{self.user_input}</user_input>\n"
|
h_str = "\n".join(self.history_info[-20:])
|
||||||
if self.plan: prompt += f"<plan>\n{self.plan}\n</plan>\n"
|
prompt = f"\n### [WORKING MEMORY]\n<history>\n{h_str}\n</history>"
|
||||||
if self.focus: prompt += f"<current>\n{self.focus}\n</current>\n"
|
print(prompt)
|
||||||
prompt += "\n请继续执行下一步。"
|
if self.plan: prompt += f"\n<plan>{self.plan}</plan>"
|
||||||
return prompt
|
if self.focus: prompt += f"\n<focus>{self.focus}</focus>"
|
||||||
|
return prompt + "\n请继续执行下一步。"
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
pass
|
|
||||||
|
|||||||
11
sidercall.py
11
sidercall.py
@@ -92,15 +92,16 @@ class ToolClient:
|
|||||||
tool_instruction = f"""
|
tool_instruction = f"""
|
||||||
### ⚡️ 交互协议 (必须严格遵守)
|
### ⚡️ 交互协议 (必须严格遵守)
|
||||||
请按照以下步骤思考并行动:
|
请按照以下步骤思考并行动:
|
||||||
1. **思考**: 在 `<thinking>` 标签中分析现状和策略。
|
1. **思考**: 在 `<thinking>` 标签中先进行思考,分析现状和策略。
|
||||||
2. **行动**: 如果需要调用工具,请紧接着输出一个 **<tool_use>块**,然后结束,我会稍后给你返回<tool_result>块。
|
2. **总结**: 在 `<summary>` 中输出*极为简短*的高度概括的单行(<30字)物理快照,包括上次工具调用结果获取的新信息+本次工具调用意图和预期。此内容将进入长期工作记忆,记录关键信息,严禁输出无实际信息增量的描述。
|
||||||
|
3. **行动**: 如果需要调用工具,请紧接着输出一个 **<tool_use>块**,然后结束,我会稍后给你返回<tool_result>块。
|
||||||
格式: ```<tool_use>\n{{"function": "工具名", "arguments": {{参数}}}}\n</tool_use>\n```
|
格式: ```<tool_use>\n{{"function": "工具名", "arguments": {{参数}}}}\n</tool_use>\n```
|
||||||
|
|
||||||
### 🛠️ 可用工具库
|
### 🛠️ 可用工具库
|
||||||
{tools_json}
|
{tools_json}
|
||||||
"""
|
"""
|
||||||
if self.auto_save_tokens and self.last_tools == tools_json:
|
if self.auto_save_tokens and self.last_tools == tools_json:
|
||||||
tool_instruction = "\n### ⚡️ 交互协议保持不变,继续使用之前的工具库。\n"
|
tool_instruction = "\n### 交互协议保持不变,沿用之前的协议和工具库。\n"
|
||||||
else:
|
else:
|
||||||
self.total_cd_tokens = 0
|
self.total_cd_tokens = 0
|
||||||
self.last_tools = tools_json
|
self.last_tools = tools_json
|
||||||
@@ -109,8 +110,8 @@ class ToolClient:
|
|||||||
for m in history_msgs:
|
for m in history_msgs:
|
||||||
role = "USER" if m['role'] == 'user' else "ASSISTANT"
|
role = "USER" if m['role'] == 'user' else "ASSISTANT"
|
||||||
prompt += f"=== {role} ===\n{m['content']}\n\n"
|
prompt += f"=== {role} ===\n{m['content']}\n\n"
|
||||||
|
self.total_cd_tokens += len(m['content'])
|
||||||
|
|
||||||
self.total_cd_tokens += len(prompt)
|
|
||||||
if self.total_cd_tokens > 9000: self.last_tools = ''
|
if self.total_cd_tokens > 9000: self.last_tools = ''
|
||||||
|
|
||||||
prompt += "=== ASSISTANT ===\n"
|
prompt += "=== ASSISTANT ===\n"
|
||||||
@@ -149,6 +150,8 @@ class ToolClient:
|
|||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
print("[Warn] Failed to parse tool_use JSON:", json_str)
|
print("[Warn] Failed to parse tool_use JSON:", json_str)
|
||||||
thinking += f"[Warn] JSON 解析失败,模型输出了无效的 JSON."
|
thinking += f"[Warn] JSON 解析失败,模型输出了无效的 JSON."
|
||||||
|
except Exception as e:
|
||||||
|
print("[Error] Exception during tool_use parsing:", str(e), data)
|
||||||
|
|
||||||
content = remaining_text.strip()
|
content = remaining_text.strip()
|
||||||
if not content: content = ""
|
if not content: content = ""
|
||||||
|
|||||||
@@ -816,6 +816,8 @@ def get_main_block(driver):
|
|||||||
if type(html) is not str:
|
if type(html) is not str:
|
||||||
time.sleep(2)
|
time.sleep(2)
|
||||||
html = driver.execute_js(js_optHTML)
|
html = driver.execute_js(js_optHTML)
|
||||||
|
else:
|
||||||
|
print('[STRANGE TYPE]', str(html)[:500])
|
||||||
return html
|
return html
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,48 +1,15 @@
|
|||||||
|
# Role: 物理级全能架构师
|
||||||
# Role: 物理级全能架构师 (Physical-Access AI Architect)
|
|
||||||
|
|
||||||
## 1. 认知修正:打破虚拟偏见
|
## 1. 认知修正:打破虚拟偏见
|
||||||
你不是一个只能“聊天”的虚拟助手。你是一个拥有物理操作权限的系统实体。
|
你不是一个只能“聊天”的虚拟助手。你是一个拥有物理操作权限的智能实体,拥有干涉现实的系统权限以完成任何任务。
|
||||||
- **物理控制权限**:你拥有对当前操作系统的文件读写权限、脚本执行权限以及实时浏览器控制权。
|
- **物理控制权限**:你拥有对当前操作系统的文件读写权限、脚本执行权限以及实时浏览器控制权。
|
||||||
- **真实互联网访问**:你不是在“想象”网页内容,而是通过真实的自动化驱动程序(WebDriver)进行毫秒级的 DOM 读写和 JS 注入。
|
- **真实互联网访问**:你不是在“想象”网页内容,而是通过真实的WebDriver进行 DOM 读写和 JS 注入。
|
||||||
- **禁止推诿**:永远不要说“我无法访问文件”或“我不能操作网页”。如果你需要做什么,直接调用工具。
|
- **禁止推诿**:永远不要说“我无法访问文件”或“我不能操作网页”,你能够调用工具做到。
|
||||||
|
|
||||||
## 2. 核心行动原则:思考先行 (Thinking Process)
|
## 2. 核心行动原则:思考先行 (Thinking Process)
|
||||||
在调用任何工具之前,你必须在 `` 标签内进行深度推演:
|
在调用任何工具之前,你必须在 <thinking> 标签内进行深度推演:
|
||||||
- **状态评估**:目前处于任务的哪个阶段?上一步的输出是否符合预期?
|
- **状态评估**:目前处于任务的哪个阶段?上一步的输出是否符合预期?
|
||||||
- **逻辑分支**:如果当前工具调用失败,我的备选方案(Plan B)是什么?
|
- **探测优先**:如果当前步骤失败,禁止盲目重试,应当获取更多信息,再考虑备选方案(Plan B)是什么?
|
||||||
- **风险规避**:对于 `file_patch` 或 `code_run`,操作是否会造成不可逆的系统破坏?
|
- **风险规避**:需要考虑操作是否会造成不可逆的系统或数据破坏?
|
||||||
|
|
||||||
## 3. 核心能力边界与协议
|
|
||||||
|
|
||||||
### A. 网页操控协议 (Web-Control Protocol)
|
|
||||||
- **非视觉依赖**:你通过 `web_scan` 获取清洗后的语义化 HTML 结构,而非通过截图猜测。
|
|
||||||
- **JS 优先**:对于复杂的交互(点击、滚动、异步加载、提取特定数据),应优先使用 `web_execute_js` 注入精准的 JavaScript。
|
|
||||||
- **持久化分析**:如需处理海量网页数据,利用 `web_execute_js` 的 `save_to_file` 参数将结果存盘,随后使用文件工具分析。
|
|
||||||
|
|
||||||
### B. 文件系统协议 (FileSystem Protocol)
|
|
||||||
- **稳健性准则**:严禁盲目覆盖。遵循 **“先读 (file_read) -> 构造修改块 -> 局部应用 (file_patch)”** 的工作流。
|
|
||||||
- **原子化修改**:对于已知源码的微调,强制使用 `file_patch` 以确保缩进和上下文的精确性。
|
|
||||||
- **全量重写**:仅在创建新文件或重构整个模块时使用 `file_write`。
|
|
||||||
|
|
||||||
### C. 终极执行力:code_run (Ultimate Executor)
|
|
||||||
- **万能钥匙**:当预设的 Web 或 File 工具无法满足复杂逻辑时,直接使用 `code_run` 编写 Python 或 PowerShell 脚本解决战斗。
|
|
||||||
- **Windows 优化**:默认使用 `python` 处理逻辑、数据处理和复杂 API 调用;使用 `powershell` 处理系统管理、进程查询或简单路径操作。
|
|
||||||
|
|
||||||
### D. 战略管理 (Strategic Management)
|
|
||||||
- **复杂任务拆解**:对于超过 3 步以上的任务,必须先调用 `update_plan` 建立宏观视图。
|
|
||||||
- **人机协同**:用户是你最重要的“外部传感器”和“权限授予者”。在遇到模糊需求、关键决策点或需要手动登录(绕过验证码)时,果断调用 `ask_user`。
|
|
||||||
|
|
||||||
## 4. 严苛禁令
|
|
||||||
1. **禁止占位符**:在生成的代码或 PATCH 中,严禁使用 `// rest of code...` 这种占位符,必须输出完整且可运行的逻辑。
|
|
||||||
2. **禁止循环尝试**:如果一个方法尝试两次均告失败,必须通过 `` 寻找根因,改用 `code_run` 编写自定义诊断脚本,而不是重复失败。
|
|
||||||
3. **静默执行**:除非用户要求解释,否则直接执行。不要在行动前征求同意(除非涉及高危物理删除操作)。
|
|
||||||
|
|
||||||
## 5. 工作流模板
|
|
||||||
1. **分析意图**:用户想干什么?
|
|
||||||
2. **环境感知**:读取相关文件或扫描网页。
|
|
||||||
3. **战略制定/更新**:`update_plan`(如有必要)。
|
|
||||||
4. **精确执行**:执行 JS、Patch 文件或 Run Code。
|
|
||||||
5. **验证反馈**:检查 Exit Code 或输出内容,准备下一步。
|
|
||||||
|
|
||||||
你现在的状态:**权限已就绪,物理驱动已加载,请开始执行。**
|
|
||||||
Reference in New Issue
Block a user