From 66ae64c98e39f6fa6e79b9f0dd75e6684850b063 Mon Sep 17 00:00:00 2001 From: Liang Jiaqing Date: Sun, 22 Feb 2026 09:28:57 +0800 Subject: [PATCH] refine: tool descriptions, js diff top_change, compact find_changed_elements --- assets/tools_schema.json | 4 +-- ga.py | 2 +- simphtml.py | 76 +++++++++++++++++----------------------- 3 files changed, 35 insertions(+), 47 deletions(-) diff --git a/assets/tools_schema.json b/assets/tools_schema.json index f6b25d6..8ee00df 100644 --- a/assets/tools_schema.json +++ b/assets/tools_schema.json @@ -48,7 +48,7 @@ }}, {"type": "function", "function": { "name": "update_working_mem", - "description": "短期工作便签,内容每轮自动注入,防止长任务中关键信息丢失。何时调用:(1)即将切换子任务、上下文将被大量新信息冲刷前,存入当前路径/参数/进度;(2)获得后续步骤必需的关键发现后;(3)SOP多步执行时,完成一步后更新为本步结果+下一步要求。原则:只存N轮后可能忘记但后面还要用的信息,刚发生的、上下文里还热乎的不用存。宁可多更新不可丢关键上下文。", + "description": "短期工作便签,内容每轮自动注入,防止长任务中关键信息丢失。要在任务前中期而非结束时调用,新任务切换时应当及时使用清除之前影响。何时调用:(1)即将切换子任务、上下文将被大量新信息冲刷前,存入当前路径/参数/进度;(2)获得后续步骤必需的关键发现后;(3)SOP多步执行时,完成一步后更新为本步结果+下一步要求。原则:只存N轮后可能忘记但后面还要用的信息,刚发生的不用存。宁可多更新不可丢关键上下文。", "parameters": {"type": "object", "properties": { "key_info": {"type": "string", "description": "替换当前便签(<200 tokens)。只写后续必须记住的:文件路径、关键参数/发现、当前进度、下一步计划、要避的坑。刚完成的和上下文中显而易见的不写,省空间给真正容易丢的信息。"}, "related_sop": {"type": "string", "description": "相关sop名称,可以多个,必要时需要再读"}}} @@ -62,7 +62,7 @@ }}, {"type": "function", "function": { "name": "trigger_memory_update", - "description": "若发现值得长期记忆的信息(环境事实/用户偏好/避坑经验),调用此工具触发记忆提炼。一次用户对话只允许调用一次,严禁在记忆更新或自主流程内调用。", + "description": "准备开始提炼记忆。发现值得长期记忆的信息(环境事实/用户偏好/避坑经验)时调用此工具。一次用户对话只允许调用一次,已记忆更新或在自主流程内时无需调用。", "parameters": {"type": "object", "properties": {}}} } ] \ No newline at end of file diff --git a/ga.py b/ga.py index fb8c41f..b57e331 100644 --- a/ga.py +++ b/ga.py @@ -305,7 +305,7 @@ class GenericAgentHandler(BaseHandler): if save_to_file and "js_return" in result: content = str(result["js_return"] or '') abs_path = self._get_abs_path(save_to_file) - result["js_return"] = smart_format(content, max_str_len=200) + result["js_return"] = smart_format(content, max_str_len=170) try: with open(abs_path, 'w', encoding='utf-8') as f: f.write(str(content)) result["js_return"] += f"\n\n[已保存完整内容到 {abs_path}]" diff --git a/simphtml.py b/simphtml.py index 71c796f..0449446 100644 --- a/simphtml.py +++ b/simphtml.py @@ -825,47 +825,34 @@ def get_main_block(driver): return html -def find_changed_elements(before_html, after_html): - before_soup = BeautifulSoup(before_html, 'html.parser') - after_soup = BeautifulSoup(after_html, 'html.parser') - def get_element_signature(element): - attrs = {k:v for k,v in element.attrs.items() if k != 'data-track-id'} - children = len(list(element.find_all(recursive=False))) - text = element.get_text(strip=True) - return f"{element.name}:{str(attrs)}:{children}:{text}" - def build_element_tree(soup): - result = {} - for element in soup.find_all(True): - signature = get_element_signature(element) - if signature in result: - result[signature].append(element) - else: - result[signature] = [element] - return result - before_tree = build_element_tree(before_soup) - after_tree = build_element_tree(after_soup) - changed_elements = [] - for signature, elements in after_tree.items(): - if signature not in before_tree: - # 完全新的元素 - for element in elements: - changed_elements.append(str(element)) - elif len(after_tree[signature]) > len(before_tree[signature]): - # 数量增加的元素 - diff_count = len(after_tree[signature]) - len(before_tree[signature]) - for element in elements[:diff_count]: - changed_elements.append(str(element)) - before_elements = sum(len(elements) for elements in before_tree.values()) - after_elements = sum(len(elements) for elements in after_tree.values()) - common_elements = sum(min(len(before_tree.get(sig, [])), len(after_tree.get(sig, []))) - for sig in set(before_tree.keys()) | set(after_tree.keys())) - - similarity = common_elements / max(before_elements, after_elements) if max(before_elements, after_elements) > 0 else 1.0 - return { - "changed": len(changed_elements), - "similarity": similarity - #"changed_elements": changed_elements[:3] - } +def find_changed_elements(before_html, after_html): + before_soup = BeautifulSoup(before_html, 'html.parser') + after_soup = BeautifulSoup(after_html, 'html.parser') + def direct_text(el): + return ''.join(t.strip() for t in el.find_all(string=True, recursive=False)).strip() + def get_sig(el): + attrs = {k:v for k,v in el.attrs.items() if k != 'data-track-id'} + return f"{el.name}:{attrs}:{direct_text(el)}" + def build_sigs(soup): + result = {} + for el in soup.find_all(True): + sig = get_sig(el) + result.setdefault(sig, []).append(el) + return result + before_sigs, after_sigs = build_sigs(before_soup), build_sigs(after_soup) + changed = [] + for sig, els in after_sigs.items(): + if sig not in before_sigs: changed.extend(els) + elif len(els) > len(before_sigs[sig]): changed.extend(els[:len(els) - len(before_sigs[sig])]) + # 变化边界: parent不在changed中的元素 + cids = set(id(el) for el in changed) + boundaries = [el for el in changed if el.parent is None or id(el.parent) not in cids] + top = max(boundaries, key=lambda el: len(str(el))) if boundaries else None + result = {"changed": len(changed)} + if top: + h = str(top) + result["top_change"] = h if len(h) <= 2000 else h[:2000] + '...[TRUNCATED]' + return result def get_html(driver, cutlist=False, maxchars=28000, instruction=""): page = get_main_block(driver) @@ -925,15 +912,16 @@ def execute_js_rich(script, driver): if last_html is None: raise Exception("no baseline") diff_data = find_changed_elements(last_html, current_html) change_count = diff_data.get('changed', 0) + top_change = diff_data.get('top_change', '') diff_summary = f"DOM变化量: {change_count}" + if top_change: diff_summary += f"\n最显著变化:\n{top_change}" transients = rr.get('transients', []) - if change_count < 5 and not transients and not new_tab: - diff_summary += " (页面几乎无静默变化)" + if change_count == 0 and not transients and not new_tab: + diff_summary += " (页面无变化)" rr['suggestion'] = "页面无明显变化" else: rr['suggestion'] = "" except: diff_summary = "页面变化监控不可用" - rr['suggestion'] = "" rr['diff'] = diff_summary return rr