feat: enhance text_only mode - add form element annotations, block-level line breaks, and whitespace cleanup
This commit is contained in:
@@ -2,8 +2,8 @@ Facts(L2): ../memory/global_mem.txt | Code: ../ | SOPs(L3): ../memory/*.md or *.
|
|||||||
Insight是极简索引,L2/L3变更时同步Insight,索引必须极简。写记忆前先读META-SOP(L0)。
|
Insight是极简索引,L2/L3变更时同步Insight,索引必须极简。写记忆前先读META-SOP(L0)。
|
||||||
|
|
||||||
[CONSTITUTION]
|
[CONSTITUTION]
|
||||||
1. 改自身源码先请示;./内可自主实验,允许装包和portable工具。
|
1. 改自身源码先请示;./内可自主实验,允许装包和portable工具
|
||||||
2. 决策前查记忆库;未查证不断言。
|
2. 决策前查记忆库;未查证不断言
|
||||||
3. 分步执行,控制粒度,限制失败半径;3次失败请求干预。
|
3. 分步执行,控制粒度,限制失败半径;3次失败请求干预
|
||||||
4. 密钥文件仅引用,不读取/移动。
|
4. 密钥文件仅引用,不读取/移动
|
||||||
5. 写任何记忆前读META-SOP核验,memory下文件只能patch修改(除非新建)。
|
5. 写任何记忆前读META-SOP核验,memory下文件只能patch修改(除非新建)
|
||||||
|
|||||||
23
simphtml.py
23
simphtml.py
@@ -99,7 +99,18 @@ function createEnhancedDOMCopy() {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
const { domCopy, getNodeInfo, isVisible } = createEnhancedDOMCopy();
|
const { domCopy, getNodeInfo, isVisible } = createEnhancedDOMCopy();
|
||||||
if (text_only) return domCopy.innerText;
|
if (text_only) {
|
||||||
|
const blocks = new Set(['DIV','P','H1','H2','H3','H4','H5','H6','LI','TR','SECTION','ARTICLE','HEADER','FOOTER','NAV','BLOCKQUOTE','PRE','HR','BR','DT','DD','FIGCAPTION','DETAILS','SUMMARY']);
|
||||||
|
domCopy.querySelectorAll('*').forEach(el => {
|
||||||
|
if (blocks.has(el.tagName)) el.insertAdjacentText('beforebegin', '\n');
|
||||||
|
});
|
||||||
|
domCopy.querySelectorAll('input:not([type=hidden]),textarea,select').forEach(el=>{
|
||||||
|
const p=[el.tagName,el.id&&'#'+el.id,el.getAttribute('name')&&'name='+el.getAttribute('name'),el.tagName==='INPUT'&&'type='+(el.getAttribute('type')||'text'),el.getAttribute('placeholder')&&'"'+el.getAttribute('placeholder')+'"',el.getAttribute('data-autofilled')&&'autofilled',el.disabled&&'disabled',el.tagName==='SELECT'&&el.getAttribute('data-selected')&&'="'+el.getAttribute('data-selected')+'"'].filter(Boolean).join(' ');
|
||||||
|
el.insertAdjacentText('beforebegin','\n['+p+']\n');
|
||||||
|
});
|
||||||
|
domCopy.querySelectorAll('button[disabled]').forEach(el=>el.insertAdjacentText('beforebegin','[DISABLED] '));
|
||||||
|
return domCopy.textContent;
|
||||||
|
}
|
||||||
const viewportArea = window.innerWidth * window.innerHeight;
|
const viewportArea = window.innerWidth * window.innerHeight;
|
||||||
|
|
||||||
function analyzeNode(node, pPathType='main') {
|
function analyzeNode(node, pPathType='main') {
|
||||||
@@ -768,9 +779,15 @@ def get_temp_texts(driver):
|
|||||||
print(e)
|
print(e)
|
||||||
return []
|
return []
|
||||||
|
|
||||||
import time
|
import time, re
|
||||||
def get_main_block(driver, extra_js="", text_only=False):
|
def get_main_block(driver, extra_js="", text_only=False):
|
||||||
return driver.execute_js(f"{extra_js}\n{js_optHTML}\nreturn optHTML({str(text_only).lower()});").get('data', '')
|
page = driver.execute_js(f"{extra_js}\n{js_optHTML}\nreturn optHTML({str(text_only).lower()});").get('data', '')
|
||||||
|
if text_only:
|
||||||
|
page = re.sub(r' {2,}', ' ', page) # 连续空格→单空格
|
||||||
|
page = re.sub(r'^ +', '', page, flags=re.M) # 去行首空格
|
||||||
|
page = re.sub(r'(\n\s*){3,}', '\n\n', page) # 3+空行→1空行
|
||||||
|
return page.strip()
|
||||||
|
return page
|
||||||
|
|
||||||
def find_changed_elements(before_html, after_html):
|
def find_changed_elements(before_html, after_html):
|
||||||
before_soup = BeautifulSoup(before_html, 'html.parser')
|
before_soup = BeautifulSoup(before_html, 'html.parser')
|
||||||
|
|||||||
Reference in New Issue
Block a user