feat: simphtml保留表单value属性+列表裁剪提示; web_execute_js支持switch_tab_id
This commit is contained in:
6
ga.py
6
ga.py
@@ -148,7 +148,7 @@ def format_error(e):
|
||||
return f"{exc_type.__name__}: {str(e)} @ {fname}:{f.lineno}, {f.name} -> `{f.line}`"
|
||||
return f"{exc_type.__name__}: {str(e)}"
|
||||
|
||||
def web_execute_js(script: str):
|
||||
def web_execute_js(script, switch_tab_id=None):
|
||||
"""
|
||||
执行 JS 脚本来控制浏览器,并捕获结果和页面变化。
|
||||
script: 要执行的 JavaScript 代码字符串。
|
||||
@@ -170,6 +170,7 @@ def web_execute_js(script: str):
|
||||
if driver is None: first_init_driver()
|
||||
if len(driver.get_all_sessions()) == 0:
|
||||
return {"status": "error", "msg": "没有可用的浏览器标签页,请先打开一个浏览器标签页,且确认TMWebDriver浏览器tempermonkey插件已安装并启用。"}
|
||||
if switch_tab_id: driver.default_session_id = switch_tab_id
|
||||
result = execute_js_rich(script, driver)
|
||||
return result
|
||||
except Exception as e:
|
||||
@@ -301,7 +302,8 @@ class GenericAgentHandler(BaseHandler):
|
||||
script = args.get("script", "")
|
||||
if not script: return StepOutcome(None, next_prompt="[Error] Empty script param. Check your tool call arguments.")
|
||||
save_to_file = args.get("save_to_file", "")
|
||||
result = web_execute_js(script)
|
||||
switch_tab_id = args.get("switch_tab_id")
|
||||
result = web_execute_js(script, switch_tab_id=switch_tab_id)
|
||||
if save_to_file and "js_return" in result:
|
||||
content = str(result["js_return"] or '')
|
||||
abs_path = self._get_abs_path(save_to_file)
|
||||
|
||||
44
simphtml.py
44
simphtml.py
@@ -16,6 +16,8 @@ function createEnhancedDOMCopy() {
|
||||
}
|
||||
if (sourceNode.nodeType === 3) return sourceNode.cloneNode(false);
|
||||
const clone = sourceNode.cloneNode(false);
|
||||
if ((sourceNode.tagName === 'INPUT' || sourceNode.tagName === 'TEXTAREA') && sourceNode.value) clone.setAttribute('value', sourceNode.value);
|
||||
else if (sourceNode.tagName === 'SELECT' && sourceNode.value) clone.setAttribute('data-selected', sourceNode.value);
|
||||
|
||||
const isDropdown = sourceNode.classList?.contains('dropdown-menu') ||
|
||||
/dropdown|menu/i.test(sourceNode.className) || sourceNode.getAttribute('role') === 'menu';
|
||||
@@ -744,32 +746,24 @@ js_cleanDOM = '''function cleanDOM(element) {
|
||||
def optimize_html_for_tokens(html):
|
||||
if type(html) is str: soup = BeautifulSoup(html, 'html.parser')
|
||||
else: soup = html
|
||||
# 1. 删除所有style属性
|
||||
[tag.attrs.pop('style', None) for tag in soup.find_all(True)]
|
||||
|
||||
# 2. 极简处理src和href (不保留原始映射)
|
||||
for tag in soup.find_all(True):
|
||||
# 2.1 处理src属性 - 常见于img, script等标签
|
||||
if tag.has_attr('src'):
|
||||
# Base64图片直接替换为超短占位符
|
||||
if tag['src'].startswith('data:'):
|
||||
tag['src'] = '__img__'
|
||||
# 长URL替换为短占位符
|
||||
elif len(tag['src']) > 30:
|
||||
tag['src'] = '__url__'
|
||||
|
||||
# 2.2 处理href属性 - 常见于a标签
|
||||
if tag.has_attr('href') and len(tag['href']) > 30:
|
||||
tag['href'] = '__link__'
|
||||
|
||||
# 2.3 删除其他不必要的长属性值
|
||||
if tag['src'].startswith('data:'): tag['src'] = '__img__'
|
||||
elif len(tag['src']) > 30: tag['src'] = '__url__'
|
||||
if tag.has_attr('href') and len(tag['href']) > 30: tag['href'] = '__link__'
|
||||
if tag.has_attr('action') and len(tag['action']) > 30: tag['action'] = '__url__'
|
||||
for a in ('value', 'title', 'alt'):
|
||||
if tag.has_attr(a) and isinstance(tag[a], str) and len(tag[a]) > 100: tag[a] = tag[a][:50] + ' ...'
|
||||
for attr in list(tag.attrs.keys()):
|
||||
if attr not in ['id', 'class', 'name', 'src', 'href', 'alt']:
|
||||
# 保留data-*属性名但简化其值
|
||||
if attr.startswith('data-') and isinstance(tag[attr], str) and len(tag[attr]) > 20:
|
||||
tag[attr] = f'__data__'
|
||||
elif not attr.startswith('data-'):
|
||||
tag.attrs.pop(attr, None)
|
||||
if attr not in ['id', 'class', 'name', 'src', 'href', 'alt', 'value', 'type', 'placeholder',
|
||||
'disabled', 'checked', 'selected', 'readonly', 'required', 'multiple',
|
||||
'role', 'aria-label', 'aria-expanded', 'aria-hidden', 'contenteditable',
|
||||
'title', 'for', 'action', 'method', 'target', 'colspan', 'rowspan']:
|
||||
if attr.startswith('data-v'): tag.attrs.pop(attr, None)
|
||||
elif attr.startswith('data-') and isinstance(tag[attr], str) and len(tag[attr]) > 20:
|
||||
tag[attr] = '__data__'
|
||||
elif not attr.startswith('data-'): tag.attrs.pop(attr, None)
|
||||
return soup
|
||||
|
||||
|
||||
@@ -868,7 +862,7 @@ def get_html(driver, cutlist=False, maxchars=28000, instruction=""):
|
||||
keep = hit[:6] if hit else items[:3]
|
||||
for it in items:
|
||||
if it not in keep: it.decompose()
|
||||
ss = str(optimize_html_for_tokens(s))
|
||||
ss = '[SYSTEM] Found item list, only show some items ...\n' + str(optimize_html_for_tokens(s))
|
||||
else: ss = html
|
||||
if len(ss) > maxchars: ss = ss[:maxchars] + ' ... [TRUNCATED]'
|
||||
return ss
|
||||
@@ -877,7 +871,7 @@ def execute_js_rich(script, driver):
|
||||
try: start_temp_monitor(driver)
|
||||
except: pass
|
||||
curr_session = driver.default_session_id
|
||||
try: last_html = get_html(driver)
|
||||
try: last_html = get_html(driver, cutlist=False)
|
||||
except: last_html = None
|
||||
result = None; error_msg = None
|
||||
new_tab = False; reloaded = False
|
||||
@@ -908,7 +902,7 @@ def execute_js_rich(script, driver):
|
||||
except: rr['transients'] = []
|
||||
if not reloaded and not new_tab:
|
||||
try:
|
||||
current_html = get_html(driver)
|
||||
current_html = get_html(driver, cutlist=False)
|
||||
if last_html is None: raise Exception("no baseline")
|
||||
diff_data = find_changed_elements(last_html, current_html)
|
||||
change_count = diff_data.get('changed', 0)
|
||||
|
||||
Reference in New Issue
Block a user