From 8d537e954bdaadddbdba3d7b248f2fd2b67e618d Mon Sep 17 00:00:00 2001 From: Liang Jiaqing Date: Fri, 3 Apr 2026 09:40:09 +0800 Subject: [PATCH] fix: simphtml visibility/dialog/SVG improvements & minor agent fixes - simphtml: rewrite visibility inheritance (area-based maxC), add isVisible to childrenInfo, fix className SVG compat (getAttribute), expand isDialog detection, hoist deep fixed dialogs, clear SVG attrs, fix warning message concat - ga: return error messages instead of None for code_run/web_execute_js failures - stapp: disable unsafe_allow_html - code_run_header: add excepthook hint for ImportError/AttributeError - mykey_template: remove stale prompt_cache comment --- assets/code_run_header.py | 2 +- frontends/stapp.py | 2 +- ga.py | 6 +++--- mykey_template.py | 1 - simphtml.py | 43 ++++++++++++++++++++++++++------------- 5 files changed, 34 insertions(+), 20 deletions(-) diff --git a/assets/code_run_header.py b/assets/code_run_header.py index 7640934..52a5e5f 100644 --- a/assets/code_run_header.py +++ b/assets/code_run_header.py @@ -19,4 +19,4 @@ def _run(*a, **k): if r.stderr is not None: r.stderr = _d(r.stderr) return r subprocess.run = _run - +sys.excepthook = lambda t, v, tb: (sys.__excepthook__(t, v, tb), print(f"\n[Agent Hint]: NO GUESSING! You MUST probe first. If missing common package, pip.")) if issubclass(t, (ImportError, AttributeError)) else sys.__excepthook__(t, v, tb) diff --git a/frontends/stapp.py b/frontends/stapp.py index 7ccc179..02bddf9 100644 --- a/frontends/stapp.py +++ b/frontends/stapp.py @@ -84,7 +84,7 @@ def agent_backend_stream(prompt): if "messages" not in st.session_state: st.session_state.messages = [] for msg in st.session_state.messages: - with st.chat_message(msg["role"]): st.markdown(msg["content"], unsafe_allow_html=True) + with st.chat_message(msg["role"]): st.markdown(msg["content"], unsafe_allow_html=False) # IME composition fix (macOS only) - prevents Enter from submitting during CJK input if os.name != 'nt': diff --git a/ga.py b/ga.py index fcbacad..4e6f30a 100644 --- a/ga.py +++ b/ga.py @@ -289,12 +289,12 @@ class GenericAgentHandler(BaseHandler): def do_code_run(self, args, response): '''执行代码片段,有长度限制,不允许代码中放大量数据,如有需要应当通过文件读取进行。''' if response.tool_calls and sum(1 for tc in response.tool_calls[:args.get('_index', 0)] if tc.function.name == 'code_run') > 0: - return StepOutcome("[BLANK]", next_prompt="no multi code_run in one round!") + return StepOutcome("[ERROR] no multi code_run in one round!", next_prompt="\n") code_type = args.get("type", "python") code = args.get("code") or args.get("script") if not code: code = self._extract_code_block(response, code_type) - if not code: return StepOutcome(None, next_prompt=f"[Error] Code missing. Use ```{code_type} block or 'script' arg.") + if not code: return StepOutcome("[Error] Code missing. Use ```{code_type} block or 'script' arg.", next_prompt="\n") timeout = args.get("timeout", 60) raw_path = os.path.join(self.cwd, args.get("cwd", './')) cwd = os.path.normpath(os.path.abspath(raw_path)) @@ -333,7 +333,7 @@ class GenericAgentHandler(BaseHandler): def do_web_execute_js(self, args, response): '''web情况下的优先使用工具,执行任何js达成对浏览器的*完全*控制。支持将结果保存到文件供后续读取分析。''' script = args.get("script", "") or self._extract_code_block(response, "javascript") - if not script: return StepOutcome(None, next_prompt="[Error] Script missing. Use ```javascript block or 'script' arg.") + if not script: return StepOutcome("[Error] Script missing. Use ```javascript block or 'script' arg.", next_prompt="\n") abs_path = self._get_abs_path(script.strip()) if os.path.isfile(abs_path): with open(abs_path, 'r', encoding='utf-8') as f: script = f.read() diff --git a/mykey_template.py b/mykey_template.py index 0f8162c..ae8ecb3 100644 --- a/mykey_template.py +++ b/mykey_template.py @@ -20,7 +20,6 @@ oai_config = { 'model': 'openai/gpt-5.1', 'api_mode': 'chat_completions', # 'chat_completions' | 'responses' # 'reasoning_effort': 'low', # none|low|medium|high|xhigh (OpenAI o系列) - # 'prompt_cache': False, 'max_retries': 2, # 429/timeout/5xx 重试次数 'connect_timeout': 10, # 秒 'read_timeout': 120, # 秒(流式读取) diff --git a/simphtml.py b/simphtml.py index 219e70d..c509383 100644 --- a/simphtml.py +++ b/simphtml.py @@ -72,10 +72,9 @@ function createEnhancedDOMCopy() { const nonTextChildren = childNodes.filter(child => child.nodeType !== 3); const hasValidChildren = nonTextChildren.length > 0; - if (!isVisible && nonTextChildren.length > 0) { - const visChild = nonTextChildren.find(child => - nodeInfo.has(child) && nodeInfo.get(child).isVisible); - if (visChild) info = nodeInfo.get(visChild); + if (hasValidChildren) { + const maxC = nonTextChildren.map(c => nodeInfo.get(c)).filter(i => i?.isVisible).sort((a, b) => b.area - a.area)[0]; + if (maxC && maxC.area > 10000 && (!isVisible || maxC.area > info.area * 5)) info = maxC; } nodeInfo.set(clone, info); @@ -136,7 +135,7 @@ function analyzeNode(node, pPathType='main') { const childrenInfo = children.map(child => { const info = getNodeInfo(child) || { rect: {}, style: {} }; return { node: child, rect: info.rect, style: info.style, - area: info.area, zIndex: info.zIndex }; + area: info.area, zIndex: info.zIndex, isVisible: info.isVisible }; }).sort((a, b) => b.area - a.area); // 检测是划分还是覆盖 @@ -168,10 +167,11 @@ function analyzeNode(node, pPathType='main') { childrenInfo[0].node.dataset.mark = 'K:main'; for (let i = 1; i < childrenInfo.length; i++) { const child = childrenInfo[i]; + let className = (child.node.getAttribute('class') || '').toLowerCase(); let isSecondary = containsButton(child.node); - if (child.node.className.toLowerCase().includes('nav')) isSecondary = true; - if (child.node.className.toLowerCase().includes('breadcrumbs')) isSecondary = true; - if (child.node.className.toLowerCase().includes('header') && child.node.className.toLowerCase().includes('table')) isSecondary = true; + if (className.includes('nav')) isSecondary = true; + if (className.includes('breadcrumbs')) isSecondary = true; + if (className.includes('header') && className.includes('table')) isSecondary = true; if (child.node.innerHTML.trim().replace(/\s+/g, '').length < 500) isSecondary = true; if (child.node.textContent.trim().length > 200) isSecondary = true; // P3: 有实质文本内容则保留 if (child.style.visibility === 'hidden') isSecondary = false; @@ -179,7 +179,7 @@ function analyzeNode(node, pPathType='main') { else child.node.dataset.mark = 'R:nonEssential'; } } else { - const uniqueClassNames = new Set(childrenInfo.map(item => item.node.className)).size; + const uniqueClassNames = new Set(childrenInfo.map(item => item.node.getAttribute('class') || '')).size; const highClassNameVariety = uniqueClassNames >= childrenInfo.length * 0.8; if (pathType !== 'main' && highClassNameVariety && childrenInfo.length > 5) { childrenInfo.forEach(child => child.node.dataset.mark = 'R:equalmany'); @@ -215,7 +215,7 @@ function analyzeNode(node, pPathType='main') { const minDimensionRatio = Math.min(rect.width / window.innerWidth, rect.height / window.innerHeight); const maxDimensionRatio = Math.max(rect.width / window.innerWidth, rect.height / window.innerHeight); const isNearTop = rect.top < 50; - const isDialog = top.node.querySelector('iframe') && centerDiff < 0.3; + const isDialog = (top.node.querySelector('iframe') || top.node.querySelector('button') || top.node.querySelector('input')) && centerDiff < 0.3; if (isComplex && centerDiff < 0.2 && ((minDimensionRatio > 0.2 && rect.width/window.innerWidth < 0.98) || minDimensionRatio > 0.95)) { @@ -253,6 +253,18 @@ function analyzeNode(node, pPathType='main') { ); } +// Hoist top 1-2 deep fixed dialogs to body level for overlay detection +const _fc = [...domCopy.querySelectorAll('*')].filter(el => { + if (el.parentNode === domCopy) return false; + const info = getNodeInfo(el); + if (!info?.rect || (info.style.position !== 'fixed' && info.style.position !== 'sticky')) return false; + const r = info.rect, cover = (r.width * r.height) / viewportArea; + const cd = Math.abs((r.left + r.width/2) - window.innerWidth/2) / window.innerWidth; + return cover > 0.15 && cd < 0.3 && el.querySelector('button, input, a, [role="button"], iframe'); +}).filter((el, _, arr) => !arr.some(o => o !== el && o.contains(el))) + .sort((a, b) => (getNodeInfo(b).rect.width * getNodeInfo(b).rect.height) - (getNodeInfo(a).rect.width * getNodeInfo(a).rect.height)) + .slice(0, 2); +_fc.forEach(el => { const r = getNodeInfo(el).rect; console.log('[simphtml] Hoisted fixed dialog:', el.tagName + (el.id ? '#'+el.id : '') + (el.className ? '.'+String(el.className).split(' ')[0] : ''), Math.round(r.width)+'x'+Math.round(r.height), Math.round(100*r.width*r.height/viewportArea)+'%'); el.parentNode.removeChild(el); domCopy.appendChild(el); }); const result = analyzeNode(domCopy); domCopy.querySelectorAll('[data-mark^="R:"]').forEach(el=>el.parentNode?.removeChild(el)); let root = domCopy; @@ -718,7 +730,8 @@ js_findMainContent = ''' def optimize_html_for_tokens(html): if type(html) is str: soup = BeautifulSoup(html, 'html.parser') else: soup = html - for svg in soup.find_all('svg'): svg.clear() + for svg in soup.find_all('svg'): + svg.clear(); svg.attrs = {} [tag.attrs.pop('style', None) for tag in soup.find_all(True)] for tag in soup.find_all(True): if tag.has_attr('src'): @@ -780,7 +793,7 @@ def get_temp_texts(driver): print(e) return [] -import time, re +import time, re, os def get_main_block(driver, extra_js="", text_only=False): page = driver.execute_js(f"{extra_js}\n{js_optHTML}\nreturn optHTML({str(text_only).lower()});").get('data', '') if text_only: @@ -826,6 +839,7 @@ def find_changed_elements(before_html, after_html): def get_html(driver, cutlist=False, maxchars=38000, instruction="", extra_js="", text_only=False): page = get_main_block(driver, extra_js=extra_js, text_only=text_only) if text_only: return page + warning = '' soup = optimize_html_for_tokens(page) html = str(soup) if not cutlist or len(html) <= maxchars: return html @@ -838,10 +852,11 @@ def get_html(driver, cutlist=False, maxchars=38000, instruction="", extra_js="", keep = hit[:6] if hit else items[:3] for it in items: if it not in keep: it.decompose() - ss = '[SYSTEM] Found item list, only show some items ...\n' + str(optimize_html_for_tokens(s)) + ss = str(optimize_html_for_tokens(s)) + warning = '[SYSTEM] Found item list, only show some items ...\n' else: ss = html if len(ss) > maxchars: ss = ss[:maxchars] + ' ... [TRUNCATED]' - return ss + return warning + str(ss) def execute_js_rich(script, driver, no_monitor=False): last_html = None