feat: simphtml保留表单value属性+列表裁剪提示; web_execute_js支持switch_tab_id
This commit is contained in:
6
ga.py
6
ga.py
@@ -148,7 +148,7 @@ def format_error(e):
|
|||||||
return f"{exc_type.__name__}: {str(e)} @ {fname}:{f.lineno}, {f.name} -> `{f.line}`"
|
return f"{exc_type.__name__}: {str(e)} @ {fname}:{f.lineno}, {f.name} -> `{f.line}`"
|
||||||
return f"{exc_type.__name__}: {str(e)}"
|
return f"{exc_type.__name__}: {str(e)}"
|
||||||
|
|
||||||
def web_execute_js(script: str):
|
def web_execute_js(script, switch_tab_id=None):
|
||||||
"""
|
"""
|
||||||
执行 JS 脚本来控制浏览器,并捕获结果和页面变化。
|
执行 JS 脚本来控制浏览器,并捕获结果和页面变化。
|
||||||
script: 要执行的 JavaScript 代码字符串。
|
script: 要执行的 JavaScript 代码字符串。
|
||||||
@@ -170,6 +170,7 @@ def web_execute_js(script: str):
|
|||||||
if driver is None: first_init_driver()
|
if driver is None: first_init_driver()
|
||||||
if len(driver.get_all_sessions()) == 0:
|
if len(driver.get_all_sessions()) == 0:
|
||||||
return {"status": "error", "msg": "没有可用的浏览器标签页,请先打开一个浏览器标签页,且确认TMWebDriver浏览器tempermonkey插件已安装并启用。"}
|
return {"status": "error", "msg": "没有可用的浏览器标签页,请先打开一个浏览器标签页,且确认TMWebDriver浏览器tempermonkey插件已安装并启用。"}
|
||||||
|
if switch_tab_id: driver.default_session_id = switch_tab_id
|
||||||
result = execute_js_rich(script, driver)
|
result = execute_js_rich(script, driver)
|
||||||
return result
|
return result
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -301,7 +302,8 @@ class GenericAgentHandler(BaseHandler):
|
|||||||
script = args.get("script", "")
|
script = args.get("script", "")
|
||||||
if not script: return StepOutcome(None, next_prompt="[Error] Empty script param. Check your tool call arguments.")
|
if not script: return StepOutcome(None, next_prompt="[Error] Empty script param. Check your tool call arguments.")
|
||||||
save_to_file = args.get("save_to_file", "")
|
save_to_file = args.get("save_to_file", "")
|
||||||
result = web_execute_js(script)
|
switch_tab_id = args.get("switch_tab_id")
|
||||||
|
result = web_execute_js(script, switch_tab_id=switch_tab_id)
|
||||||
if save_to_file and "js_return" in result:
|
if save_to_file and "js_return" in result:
|
||||||
content = str(result["js_return"] or '')
|
content = str(result["js_return"] or '')
|
||||||
abs_path = self._get_abs_path(save_to_file)
|
abs_path = self._get_abs_path(save_to_file)
|
||||||
|
|||||||
46
simphtml.py
46
simphtml.py
@@ -15,7 +15,9 @@ function createEnhancedDOMCopy() {
|
|||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
if (sourceNode.nodeType === 3) return sourceNode.cloneNode(false);
|
if (sourceNode.nodeType === 3) return sourceNode.cloneNode(false);
|
||||||
const clone = sourceNode.cloneNode(false);
|
const clone = sourceNode.cloneNode(false);
|
||||||
|
if ((sourceNode.tagName === 'INPUT' || sourceNode.tagName === 'TEXTAREA') && sourceNode.value) clone.setAttribute('value', sourceNode.value);
|
||||||
|
else if (sourceNode.tagName === 'SELECT' && sourceNode.value) clone.setAttribute('data-selected', sourceNode.value);
|
||||||
|
|
||||||
const isDropdown = sourceNode.classList?.contains('dropdown-menu') ||
|
const isDropdown = sourceNode.classList?.contains('dropdown-menu') ||
|
||||||
/dropdown|menu/i.test(sourceNode.className) || sourceNode.getAttribute('role') === 'menu';
|
/dropdown|menu/i.test(sourceNode.className) || sourceNode.getAttribute('role') === 'menu';
|
||||||
@@ -744,32 +746,24 @@ js_cleanDOM = '''function cleanDOM(element) {
|
|||||||
def optimize_html_for_tokens(html):
|
def optimize_html_for_tokens(html):
|
||||||
if type(html) is str: soup = BeautifulSoup(html, 'html.parser')
|
if type(html) is str: soup = BeautifulSoup(html, 'html.parser')
|
||||||
else: soup = html
|
else: soup = html
|
||||||
# 1. 删除所有style属性
|
|
||||||
[tag.attrs.pop('style', None) for tag in soup.find_all(True)]
|
[tag.attrs.pop('style', None) for tag in soup.find_all(True)]
|
||||||
|
|
||||||
# 2. 极简处理src和href (不保留原始映射)
|
|
||||||
for tag in soup.find_all(True):
|
for tag in soup.find_all(True):
|
||||||
# 2.1 处理src属性 - 常见于img, script等标签
|
|
||||||
if tag.has_attr('src'):
|
if tag.has_attr('src'):
|
||||||
# Base64图片直接替换为超短占位符
|
if tag['src'].startswith('data:'): tag['src'] = '__img__'
|
||||||
if tag['src'].startswith('data:'):
|
elif len(tag['src']) > 30: tag['src'] = '__url__'
|
||||||
tag['src'] = '__img__'
|
if tag.has_attr('href') and len(tag['href']) > 30: tag['href'] = '__link__'
|
||||||
# 长URL替换为短占位符
|
if tag.has_attr('action') and len(tag['action']) > 30: tag['action'] = '__url__'
|
||||||
elif len(tag['src']) > 30:
|
for a in ('value', 'title', 'alt'):
|
||||||
tag['src'] = '__url__'
|
if tag.has_attr(a) and isinstance(tag[a], str) and len(tag[a]) > 100: tag[a] = tag[a][:50] + ' ...'
|
||||||
|
|
||||||
# 2.2 处理href属性 - 常见于a标签
|
|
||||||
if tag.has_attr('href') and len(tag['href']) > 30:
|
|
||||||
tag['href'] = '__link__'
|
|
||||||
|
|
||||||
# 2.3 删除其他不必要的长属性值
|
|
||||||
for attr in list(tag.attrs.keys()):
|
for attr in list(tag.attrs.keys()):
|
||||||
if attr not in ['id', 'class', 'name', 'src', 'href', 'alt']:
|
if attr not in ['id', 'class', 'name', 'src', 'href', 'alt', 'value', 'type', 'placeholder',
|
||||||
# 保留data-*属性名但简化其值
|
'disabled', 'checked', 'selected', 'readonly', 'required', 'multiple',
|
||||||
if attr.startswith('data-') and isinstance(tag[attr], str) and len(tag[attr]) > 20:
|
'role', 'aria-label', 'aria-expanded', 'aria-hidden', 'contenteditable',
|
||||||
tag[attr] = f'__data__'
|
'title', 'for', 'action', 'method', 'target', 'colspan', 'rowspan']:
|
||||||
elif not attr.startswith('data-'):
|
if attr.startswith('data-v'): tag.attrs.pop(attr, None)
|
||||||
tag.attrs.pop(attr, None)
|
elif attr.startswith('data-') and isinstance(tag[attr], str) and len(tag[attr]) > 20:
|
||||||
|
tag[attr] = '__data__'
|
||||||
|
elif not attr.startswith('data-'): tag.attrs.pop(attr, None)
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
|
|
||||||
@@ -868,7 +862,7 @@ def get_html(driver, cutlist=False, maxchars=28000, instruction=""):
|
|||||||
keep = hit[:6] if hit else items[:3]
|
keep = hit[:6] if hit else items[:3]
|
||||||
for it in items:
|
for it in items:
|
||||||
if it not in keep: it.decompose()
|
if it not in keep: it.decompose()
|
||||||
ss = str(optimize_html_for_tokens(s))
|
ss = '[SYSTEM] Found item list, only show some items ...\n' + str(optimize_html_for_tokens(s))
|
||||||
else: ss = html
|
else: ss = html
|
||||||
if len(ss) > maxchars: ss = ss[:maxchars] + ' ... [TRUNCATED]'
|
if len(ss) > maxchars: ss = ss[:maxchars] + ' ... [TRUNCATED]'
|
||||||
return ss
|
return ss
|
||||||
@@ -877,7 +871,7 @@ def execute_js_rich(script, driver):
|
|||||||
try: start_temp_monitor(driver)
|
try: start_temp_monitor(driver)
|
||||||
except: pass
|
except: pass
|
||||||
curr_session = driver.default_session_id
|
curr_session = driver.default_session_id
|
||||||
try: last_html = get_html(driver)
|
try: last_html = get_html(driver, cutlist=False)
|
||||||
except: last_html = None
|
except: last_html = None
|
||||||
result = None; error_msg = None
|
result = None; error_msg = None
|
||||||
new_tab = False; reloaded = False
|
new_tab = False; reloaded = False
|
||||||
@@ -908,7 +902,7 @@ def execute_js_rich(script, driver):
|
|||||||
except: rr['transients'] = []
|
except: rr['transients'] = []
|
||||||
if not reloaded and not new_tab:
|
if not reloaded and not new_tab:
|
||||||
try:
|
try:
|
||||||
current_html = get_html(driver)
|
current_html = get_html(driver, cutlist=False)
|
||||||
if last_html is None: raise Exception("no baseline")
|
if last_html is None: raise Exception("no baseline")
|
||||||
diff_data = find_changed_elements(last_html, current_html)
|
diff_data = find_changed_elements(last_html, current_html)
|
||||||
change_count = diff_data.get('changed', 0)
|
change_count = diff_data.get('changed', 0)
|
||||||
|
|||||||
Reference in New Issue
Block a user