simphtml: remove dead code (js_cleanDOM, isValidInteractiveElement, redundant return)

This commit is contained in:
Jiaqing Liang
2026-03-11 15:18:23 +08:00
parent 33f2df4270
commit 1520a85a9e

View File

@@ -179,7 +179,7 @@ function analyzeNode(node, pPathType='main') {
const hasStandardButton = container.querySelector('button, input[type="button"], input[type="submit"], [role="button"]') !== null; const hasStandardButton = container.querySelector('button, input[type="button"], input[type="submit"], [role="button"]') !== null;
if (hasStandardButton) return true; if (hasStandardButton) return true;
const hasClassButton = container.querySelector('[class*="-btn"], [class*="-button"], .button, .btn, [class*="btn-"]') !== null; const hasClassButton = container.querySelector('[class*="-btn"], [class*="-button"], .button, .btn, [class*="btn-"]') !== null;
return hasStandardButton || hasClassButton; return hasClassButton;
} }
function handleOverlayContainer(childrenInfo, pathType) { function handleOverlayContainer(childrenInfo, pathType) {
@@ -226,15 +226,6 @@ function analyzeNode(node, pPathType='main') {
} }
} }
function isValidInteractiveElement(info) {
const { node, rect, style } = info;
const isCentered = Math.abs((rect.left + rect.width/2) - window.innerWidth/2) < window.innerWidth*0.3;
const isVisible = parseFloat(style.opacity) > 0.1;
const isProminent = (parseInt(info.zIndex) > 30 || style.boxShadow !== 'none');
const hasInteractiveElements = node.querySelector('button, a, input') !== null;
return isCentered && isVisible && isProminent && hasInteractiveElements;
}
function hasOverlap(items) { function hasOverlap(items) {
return items.some((a, i) => return items.some((a, i) =>
items.slice(i+1).some(b => { items.slice(i+1).some(b => {
@@ -262,8 +253,6 @@ return root.outerHTML;
} }
optHTML()''' optHTML()'''
js_findMainList = r'''function findMainList(startElement = null) { js_findMainList = r'''function findMainList(startElement = null) {
const containerElement = startElement || document.body; const containerElement = startElement || document.body;
const rect = containerElement.getBoundingClientRect(); const rect = containerElement.getBoundingClientRect();
@@ -712,59 +701,6 @@ js_findMainContent = '''
} }
} ''' } '''
js_cleanDOM = '''function cleanDOM(element) {
const clone = element.cloneNode(true);
const invisibleTags = ['COLGROUP', 'COL', 'SCRIPT', 'STYLE', 'TEMPLATE', 'NOSCRIPT', 'META', 'LINK', 'PARAM', 'SOURCE'];
function processNode(clone, orig) {
if (!clone || !orig) return;
// 处理所有子节点类型
for (let i = clone.childNodes.length - 1; i >= 0; i--) {
const cloneNode = clone.childNodes[i];
// 移除注释节点
if (cloneNode.nodeType === 8) {
cloneNode.remove();
continue;
}
// 只处理元素节点
if (cloneNode.nodeType !== 1) continue;
const origChild = orig.children[Array.from(clone.children).indexOf(cloneNode)];
if (!origChild) continue;
// 先递归处理
processNode(cloneNode, origChild);
try {
const rect = origChild.getBoundingClientRect();
const style = window.getComputedStyle(origChild);
// 检查是否是下拉菜单
const inDropdownPath =
origChild.classList?.contains('dropdown-menu') ||
/dropdown|menu/i.test(origChild.className) ||
// 检查祖先节点是否为下拉菜单
(orig.classList?.contains('dropdown-menu') || /dropdown|menu/i.test(orig.className));
// 如果是不可见且不在下拉菜单路径上,则移除
if (invisibleTags.includes(origChild.tagName) || origChild.id === 'ljq-ind' ||
(!inDropdownPath && (rect.width <= 1 || rect.height <= 1 ||
style.display === 'none' || style.visibility === 'hidden' ||
style.opacity === '0'))) {
cloneNode.remove();
}
} catch (e) { continue; }
}
}
processNode(clone, element);
return clone;
} '''
def optimize_html_for_tokens(html): def optimize_html_for_tokens(html):
if type(html) is str: soup = BeautifulSoup(html, 'html.parser') if type(html) is str: soup = BeautifulSoup(html, 'html.parser')
else: soup = html else: soup = html