simphtml: refine pathType detection, simplify secondary marking, init response, reduce sleep

This commit is contained in:
Liang Jiaqing
2026-03-30 20:37:27 +08:00
parent f320d141a0
commit 117028b5d9
3 changed files with 9 additions and 8 deletions

View File

@@ -119,7 +119,7 @@ function analyzeNode(node, pPathType='main') {
node.nodeType === 1 && (node.dataset.mark = 'K:leaf'); node.nodeType === 1 && (node.dataset.mark = 'K:leaf');
return; return;
} }
const pathType = (node.dataset.mark && !node.dataset.mark.includes(':main')) ? 'second' : pPathType; const pathType = (node.dataset.mark === 'K:secondary') ? 'second' : pPathType;
const nodeInfoData = getNodeInfo(node); const nodeInfoData = getNodeInfo(node);
if (!nodeInfoData || !nodeInfoData.rect) return; if (!nodeInfoData || !nodeInfoData.rect) return;
const rectn = nodeInfoData.rect; const rectn = nodeInfoData.rect;
@@ -166,13 +166,14 @@ function analyzeNode(node, pPathType='main') {
(childrenInfo.length === 1 || childrenInfo[0].area > childrenInfo[1].area * 2); (childrenInfo.length === 1 || childrenInfo[0].area > childrenInfo[1].area * 2);
if (hasMainElement) { if (hasMainElement) {
childrenInfo[0].node.dataset.mark = 'K:main'; childrenInfo[0].node.dataset.mark = 'K:main';
for (let i = pathType==='main'?1:0; i < childrenInfo.length; i++) { for (let i = 1; i < childrenInfo.length; i++) {
const child = childrenInfo[i]; const child = childrenInfo[i];
let isSecondary = containsButton(child.node); let isSecondary = containsButton(child.node);
if (pathType === "main" && child.node.className.toLowerCase().includes('nav')) isSecondary = true; if (child.node.className.toLowerCase().includes('nav')) isSecondary = true;
if (pathType === "main" && child.node.className.toLowerCase().includes('breadcrumbs')) isSecondary = true; if (child.node.className.toLowerCase().includes('breadcrumbs')) isSecondary = true;
if (pathType === "main" && child.node.className.toLowerCase().includes('header') && child.node.className.toLowerCase().includes('table')) isSecondary = true; if (child.node.className.toLowerCase().includes('header') && child.node.className.toLowerCase().includes('table')) isSecondary = true;
if (pathType === "main" && child.node.innerHTML.trim().replace(/\s+/g, '').length < 500) isSecondary = true; if (child.node.innerHTML.trim().replace(/\s+/g, '').length < 500) isSecondary = true;
if (child.node.textContent.trim().length > 200) isSecondary = true; // P3: 有实质文本内容则保留
if (child.style.visibility === 'hidden') isSecondary = false; if (child.style.visibility === 'hidden') isSecondary = false;
if (isSecondary) child.node.dataset.mark = 'K:secondary'; if (isSecondary) child.node.dataset.mark = 'K:secondary';
else child.node.dataset.mark = 'R:nonEssential'; else child.node.dataset.mark = 'R:nonEssential';
@@ -848,13 +849,13 @@ def execute_js_rich(script, driver, no_monitor=False):
try: last_html = get_html(driver, cutlist=False, extra_js=temp_monitor_js, maxchars=9999999) try: last_html = get_html(driver, cutlist=False, extra_js=temp_monitor_js, maxchars=9999999)
except: pass except: pass
result = None; error_msg = None; reloaded = False; newTabs = [] result = None; error_msg = None; reloaded = False; newTabs = []
before_sids = set(driver.get_session_dict().keys()) before_sids = set(driver.get_session_dict().keys()); response = {}
try: try:
print(f"Executing: {script[:250]} ...") print(f"Executing: {script[:250]} ...")
response = driver.execute_js(script) response = driver.execute_js(script)
result = response.get('data') or response.get('result') result = response.get('data') or response.get('result')
if response.get('closed', 0) == 1: reloaded = True if response.get('closed', 0) == 1: reloaded = True
time.sleep(2) time.sleep(1)
except Exception as e: except Exception as e:
error = e.args[0] if e.args else str(e) error = e.args[0] if e.args else str(e)
if isinstance(error, dict): error.pop('stack', None) if isinstance(error, dict): error.pop('stack', None)