From 1520a85a9e291eca6921e0a50faddceb1e116046 Mon Sep 17 00:00:00 2001 From: Jiaqing Liang Date: Wed, 11 Mar 2026 15:18:23 +0800 Subject: [PATCH] simphtml: remove dead code (js_cleanDOM, isValidInteractiveElement, redundant return) --- simphtml.py | 66 +---------------------------------------------------- 1 file changed, 1 insertion(+), 65 deletions(-) diff --git a/simphtml.py b/simphtml.py index 76e9534..c439dd4 100644 --- a/simphtml.py +++ b/simphtml.py @@ -179,7 +179,7 @@ function analyzeNode(node, pPathType='main') { const hasStandardButton = container.querySelector('button, input[type="button"], input[type="submit"], [role="button"]') !== null; if (hasStandardButton) return true; const hasClassButton = container.querySelector('[class*="-btn"], [class*="-button"], .button, .btn, [class*="btn-"]') !== null; - return hasStandardButton || hasClassButton; + return hasClassButton; } function handleOverlayContainer(childrenInfo, pathType) { @@ -225,15 +225,6 @@ function analyzeNode(node, pPathType='main') { rest.length && (!hasOverlap(rest) ? handlePartitionContainer(rest, pathType) : handleOverlayContainer(rest, pathType)); } } - - function isValidInteractiveElement(info) { - const { node, rect, style } = info; - const isCentered = Math.abs((rect.left + rect.width/2) - window.innerWidth/2) < window.innerWidth*0.3; - const isVisible = parseFloat(style.opacity) > 0.1; - const isProminent = (parseInt(info.zIndex) > 30 || style.boxShadow !== 'none'); - const hasInteractiveElements = node.querySelector('button, a, input') !== null; - return isCentered && isVisible && isProminent && hasInteractiveElements; - } function hasOverlap(items) { return items.some((a, i) => @@ -262,8 +253,6 @@ return root.outerHTML; } optHTML()''' - - js_findMainList = r'''function findMainList(startElement = null) { const containerElement = startElement || document.body; const rect = containerElement.getBoundingClientRect(); @@ -712,59 +701,6 @@ js_findMainContent = ''' } } ''' -js_cleanDOM = '''function cleanDOM(element) { - const clone = element.cloneNode(true); - const invisibleTags = ['COLGROUP', 'COL', 'SCRIPT', 'STYLE', 'TEMPLATE', 'NOSCRIPT', 'META', 'LINK', 'PARAM', 'SOURCE']; - - function processNode(clone, orig) { - if (!clone || !orig) return; - - // 处理所有子节点类型 - for (let i = clone.childNodes.length - 1; i >= 0; i--) { - const cloneNode = clone.childNodes[i]; - - // 移除注释节点 - if (cloneNode.nodeType === 8) { - cloneNode.remove(); - continue; - } - - // 只处理元素节点 - if (cloneNode.nodeType !== 1) continue; - - const origChild = orig.children[Array.from(clone.children).indexOf(cloneNode)]; - if (!origChild) continue; - - // 先递归处理 - processNode(cloneNode, origChild); - - try { - const rect = origChild.getBoundingClientRect(); - const style = window.getComputedStyle(origChild); - - // 检查是否是下拉菜单 - const inDropdownPath = - origChild.classList?.contains('dropdown-menu') || - /dropdown|menu/i.test(origChild.className) || - // 检查祖先节点是否为下拉菜单 - (orig.classList?.contains('dropdown-menu') || /dropdown|menu/i.test(orig.className)); - - // 如果是不可见且不在下拉菜单路径上,则移除 - if (invisibleTags.includes(origChild.tagName) || origChild.id === 'ljq-ind' || - (!inDropdownPath && (rect.width <= 1 || rect.height <= 1 || - style.display === 'none' || style.visibility === 'hidden' || - style.opacity === '0'))) { - cloneNode.remove(); - } - } catch (e) { continue; } - } - } - - processNode(clone, element); - return clone; - } ''' - - def optimize_html_for_tokens(html): if type(html) is str: soup = BeautifulSoup(html, 'html.parser') else: soup = html