fix: simphtml visibility/dialog/SVG improvements & minor agent fixes

- simphtml: rewrite visibility inheritance (area-based maxC), add isVisible to childrenInfo,
  fix className SVG compat (getAttribute), expand isDialog detection, hoist deep fixed dialogs,
  clear SVG attrs, fix warning message concat
- ga: return error messages instead of None for code_run/web_execute_js failures
- stapp: disable unsafe_allow_html
- code_run_header: add excepthook hint for ImportError/AttributeError
- mykey_template: remove stale prompt_cache comment
This commit is contained in:
Liang Jiaqing
2026-04-03 09:40:09 +08:00
parent b0d4563ae8
commit 8d537e954b
5 changed files with 34 additions and 20 deletions

View File

@@ -72,10 +72,9 @@ function createEnhancedDOMCopy() {
const nonTextChildren = childNodes.filter(child => child.nodeType !== 3);
const hasValidChildren = nonTextChildren.length > 0;
if (!isVisible && nonTextChildren.length > 0) {
const visChild = nonTextChildren.find(child =>
nodeInfo.has(child) && nodeInfo.get(child).isVisible);
if (visChild) info = nodeInfo.get(visChild);
if (hasValidChildren) {
const maxC = nonTextChildren.map(c => nodeInfo.get(c)).filter(i => i?.isVisible).sort((a, b) => b.area - a.area)[0];
if (maxC && maxC.area > 10000 && (!isVisible || maxC.area > info.area * 5)) info = maxC;
}
nodeInfo.set(clone, info);
@@ -136,7 +135,7 @@ function analyzeNode(node, pPathType='main') {
const childrenInfo = children.map(child => {
const info = getNodeInfo(child) || { rect: {}, style: {} };
return { node: child, rect: info.rect, style: info.style,
area: info.area, zIndex: info.zIndex };
area: info.area, zIndex: info.zIndex, isVisible: info.isVisible };
}).sort((a, b) => b.area - a.area);
// 检测是划分还是覆盖
@@ -168,10 +167,11 @@ function analyzeNode(node, pPathType='main') {
childrenInfo[0].node.dataset.mark = 'K:main';
for (let i = 1; i < childrenInfo.length; i++) {
const child = childrenInfo[i];
let className = (child.node.getAttribute('class') || '').toLowerCase();
let isSecondary = containsButton(child.node);
if (child.node.className.toLowerCase().includes('nav')) isSecondary = true;
if (child.node.className.toLowerCase().includes('breadcrumbs')) isSecondary = true;
if (child.node.className.toLowerCase().includes('header') && child.node.className.toLowerCase().includes('table')) isSecondary = true;
if (className.includes('nav')) isSecondary = true;
if (className.includes('breadcrumbs')) isSecondary = true;
if (className.includes('header') && className.includes('table')) isSecondary = true;
if (child.node.innerHTML.trim().replace(/\s+/g, '').length < 500) isSecondary = true;
if (child.node.textContent.trim().length > 200) isSecondary = true; // P3: 有实质文本内容则保留
if (child.style.visibility === 'hidden') isSecondary = false;
@@ -179,7 +179,7 @@ function analyzeNode(node, pPathType='main') {
else child.node.dataset.mark = 'R:nonEssential';
}
} else {
const uniqueClassNames = new Set(childrenInfo.map(item => item.node.className)).size;
const uniqueClassNames = new Set(childrenInfo.map(item => item.node.getAttribute('class') || '')).size;
const highClassNameVariety = uniqueClassNames >= childrenInfo.length * 0.8;
if (pathType !== 'main' && highClassNameVariety && childrenInfo.length > 5) {
childrenInfo.forEach(child => child.node.dataset.mark = 'R:equalmany');
@@ -215,7 +215,7 @@ function analyzeNode(node, pPathType='main') {
const minDimensionRatio = Math.min(rect.width / window.innerWidth, rect.height / window.innerHeight);
const maxDimensionRatio = Math.max(rect.width / window.innerWidth, rect.height / window.innerHeight);
const isNearTop = rect.top < 50;
const isDialog = top.node.querySelector('iframe') && centerDiff < 0.3;
const isDialog = (top.node.querySelector('iframe') || top.node.querySelector('button') || top.node.querySelector('input')) && centerDiff < 0.3;
if (isComplex && centerDiff < 0.2 &&
((minDimensionRatio > 0.2 && rect.width/window.innerWidth < 0.98) || minDimensionRatio > 0.95)) {
@@ -253,6 +253,18 @@ function analyzeNode(node, pPathType='main') {
);
}
// Hoist top 1-2 deep fixed dialogs to body level for overlay detection
const _fc = [...domCopy.querySelectorAll('*')].filter(el => {
if (el.parentNode === domCopy) return false;
const info = getNodeInfo(el);
if (!info?.rect || (info.style.position !== 'fixed' && info.style.position !== 'sticky')) return false;
const r = info.rect, cover = (r.width * r.height) / viewportArea;
const cd = Math.abs((r.left + r.width/2) - window.innerWidth/2) / window.innerWidth;
return cover > 0.15 && cd < 0.3 && el.querySelector('button, input, a, [role="button"], iframe');
}).filter((el, _, arr) => !arr.some(o => o !== el && o.contains(el)))
.sort((a, b) => (getNodeInfo(b).rect.width * getNodeInfo(b).rect.height) - (getNodeInfo(a).rect.width * getNodeInfo(a).rect.height))
.slice(0, 2);
_fc.forEach(el => { const r = getNodeInfo(el).rect; console.log('[simphtml] Hoisted fixed dialog:', el.tagName + (el.id ? '#'+el.id : '') + (el.className ? '.'+String(el.className).split(' ')[0] : ''), Math.round(r.width)+'x'+Math.round(r.height), Math.round(100*r.width*r.height/viewportArea)+'%'); el.parentNode.removeChild(el); domCopy.appendChild(el); });
const result = analyzeNode(domCopy);
domCopy.querySelectorAll('[data-mark^="R:"]').forEach(el=>el.parentNode?.removeChild(el));
let root = domCopy;
@@ -718,7 +730,8 @@ js_findMainContent = '''
def optimize_html_for_tokens(html):
if type(html) is str: soup = BeautifulSoup(html, 'html.parser')
else: soup = html
for svg in soup.find_all('svg'): svg.clear()
for svg in soup.find_all('svg'):
svg.clear(); svg.attrs = {}
[tag.attrs.pop('style', None) for tag in soup.find_all(True)]
for tag in soup.find_all(True):
if tag.has_attr('src'):
@@ -780,7 +793,7 @@ def get_temp_texts(driver):
print(e)
return []
import time, re
import time, re, os
def get_main_block(driver, extra_js="", text_only=False):
page = driver.execute_js(f"{extra_js}\n{js_optHTML}\nreturn optHTML({str(text_only).lower()});").get('data', '')
if text_only:
@@ -826,6 +839,7 @@ def find_changed_elements(before_html, after_html):
def get_html(driver, cutlist=False, maxchars=38000, instruction="", extra_js="", text_only=False):
page = get_main_block(driver, extra_js=extra_js, text_only=text_only)
if text_only: return page
warning = ''
soup = optimize_html_for_tokens(page)
html = str(soup)
if not cutlist or len(html) <= maxchars: return html
@@ -838,10 +852,11 @@ def get_html(driver, cutlist=False, maxchars=38000, instruction="", extra_js="",
keep = hit[:6] if hit else items[:3]
for it in items:
if it not in keep: it.decompose()
ss = '[SYSTEM] Found item list, only show some items ...\n' + str(optimize_html_for_tokens(s))
ss = str(optimize_html_for_tokens(s))
warning = '[SYSTEM] Found item list, only show some items ...\n'
else: ss = html
if len(ss) > maxchars: ss = ss[:maxchars] + ' ... [TRUNCATED]'
return ss
return warning + str(ss)
def execute_js_rich(script, driver, no_monitor=False):
last_html = None