fix: ljqCtrl Press list bug, update SOPs (must call working_mem), agentmain round-end marker
This commit is contained in:
@@ -3,7 +3,7 @@
|
||||
授权你进行自主行动,只要不对环境造成副作用都可进行。
|
||||
请先选择核心目标,再选择一个小目标进行。最终探测结果形成报告(含操作申请),待用户回来确认后再进行可能的写入或修改操作。
|
||||
|
||||
> **启动时写入工作记忆**:`自主探索|≤15回合|只有cwd内可写|用户不在(问题存报告)|收尾:重读本SOP确认报告目录+更新报告目录内history|产出=报告+记忆提案`
|
||||
> **must call update_working_memory**:`自主探索|≤15回合|只有cwd内可写|用户不在(问题存报告)|收尾:重读本SOP确认报告目录+更新报告目录内history|产出=报告+记忆提案`
|
||||
|
||||
## 📋 大纲
|
||||
- 报告目录与规则
|
||||
|
||||
@@ -51,17 +51,20 @@ def Click(x, y=None):
|
||||
if type(x) is type(tuple()): x, y = int(x[0]), int(x[1])
|
||||
SetCursorPos( (x, y) )
|
||||
MouseClick()
|
||||
click = Click
|
||||
|
||||
def Press(cmd, staytime=0):
|
||||
cmds = cmd.lower().split('+')
|
||||
if type(cmd) is list: cmds = [x.lower() for x in cmd]
|
||||
else: cmds = cmd.lower().split('+')
|
||||
for z in cmds:
|
||||
win32api.keybd_event(VK_CODE[z], 0, 0, 0)
|
||||
time.sleep(staytime)
|
||||
for z in reversed(cmds):
|
||||
time.sleep(staytime)
|
||||
win32api.keybd_event(VK_CODE[z], 0, win32con.KEYEVENTF_KEYUP, 0)
|
||||
press = Press
|
||||
|
||||
VK_CODE = {'backspace':0x08, 'tab':0x09, 'clear':0x0C, 'enter':0x0D, 'shift':0x10, 'ctrl':0x11, 'alt':0x12, 'pause':0x13, 'caps_lock':0x14, 'esc':0x1B, 'space':0x20, 'page_up':0x21, 'page_down':0x22, 'end':0x23, 'home':0x24, 'left_arrow':0x25, 'up_arrow':0x26, 'right_arrow':0x27, 'down_arrow':0x28, 'select':0x29, 'print':0x2A, 'execute':0x2B, 'print_screen':0x2C, 'ins':0x2D, 'del':0x2E, 'help':0x2F, '0':0x30, '1':0x31, '2':0x32, '3':0x33, '4':0x34, '5':0x35, '6':0x36, '7':0x37, '8':0x38, '9':0x39, 'a':0x41, 'b':0x42, 'c':0x43, 'd':0x44, 'e':0x45, 'f':0x46, 'g':0x47, 'h':0x48, 'i':0x49, 'j':0x4A, 'k':0x4B, 'l':0x4C, 'm':0x4D, 'n':0x4E, 'o':0x4F, 'p':0x50, 'q':0x51, 'r':0x52, 's':0x53, 't':0x54, 'u':0x55, 'v':0x56, 'w':0x57, 'x':0x58, 'y':0x59, 'z':0x5A, 'numpad_0':0x60, 'numpad_1':0x61, 'numpad_2':0x62, 'numpad_3':0x63, 'numpad_4':0x64, 'numpad_5':0x65, 'numpad_6':0x66, 'numpad_7':0x67, 'numpad_8':0x68, 'numpad_9':0x69, 'multiply_key':0x6A, 'add_key':0x6B, 'separator_key':0x6C, 'subtract_key':0x6D, 'decimal_key':0x6E, 'divide_key':0x6F, 'F1':0x70, 'F2':0x71, 'F3':0x72, 'F4':0x73, 'F5':0x74, 'F6':0x75, 'F7':0x76, 'F8':0x77, 'F9':0x78, 'F10':0x79, 'F11':0x7A, 'F12':0x7B, 'F13':0x7C, 'F14':0x7D, 'F15':0x7E, 'F16':0x7F, 'F17':0x80, 'F18':0x81, 'F19':0x82, 'F20':0x83, 'F21':0x84, 'F22':0x85, 'F23':0x86, 'F24':0x87, 'num_lock':0x90, 'scroll_lock':0x91, 'left_shift':0xA0, 'right_shift ':0xA1, 'left_control':0xA2, 'right_control':0xA3, 'left_menu':0xA4, 'right_menu':0xA5, 'browser_back':0xA6, 'browser_forward':0xA7, 'browser_refresh':0xA8, 'browser_stop':0xA9, 'browser_search':0xAA, 'browser_favorites':0xAB, 'browser_start_and_home':0xAC, 'volume_mute':0xAD, 'volume_Down':0xAE, 'volume_up':0xAF, 'next_track':0xB0, 'previous_track':0xB1, 'stop_media':0xB2, 'play/pause_media':0xB3, 'start_mail':0xB4, 'select_media':0xB5, 'start_application_1':0xB6, 'start_application_2':0xB7, 'attn_key':0xF6, 'crsel_key':0xF7, 'exsel_key':0xF8, 'play_key':0xFA, 'zoom_key':0xFB, 'clear_key':0xFE, '+':0xBB, ',':0xBC, '-':0xBD, '.':0xBE, '/':0xBF, '`':0xC0, ';':0xBA, '[':0xDB, '\\':0xDC, ']':0xDD, "'":0xDE, '`':0xC0}
|
||||
VK_CODE = {'backspace':0x08, 'tab':0x09, 'clear':0x0C, 'enter':0x0D, 'shift':0x10, 'ctrl':0x11, 'alt':0x12, 'pause':0x13, 'caps_lock':0x14, 'esc':0x1B, 'escape':0x1B, 'space':0x20, 'page_up':0x21, 'page_down':0x22, 'end':0x23, 'home':0x24, 'left_arrow':0x25, 'up_arrow':0x26, 'right_arrow':0x27, 'down_arrow':0x28, 'select':0x29, 'print':0x2A, 'execute':0x2B, 'print_screen':0x2C, 'ins':0x2D, 'del':0x2E, 'help':0x2F, '0':0x30, '1':0x31, '2':0x32, '3':0x33, '4':0x34, '5':0x35, '6':0x36, '7':0x37, '8':0x38, '9':0x39, 'a':0x41, 'b':0x42, 'c':0x43, 'd':0x44, 'e':0x45, 'f':0x46, 'g':0x47, 'h':0x48, 'i':0x49, 'j':0x4A, 'k':0x4B, 'l':0x4C, 'm':0x4D, 'n':0x4E, 'o':0x4F, 'p':0x50, 'q':0x51, 'r':0x52, 's':0x53, 't':0x54, 'u':0x55, 'v':0x56, 'w':0x57, 'x':0x58, 'y':0x59, 'z':0x5A, 'numpad_0':0x60, 'numpad_1':0x61, 'numpad_2':0x62, 'numpad_3':0x63, 'numpad_4':0x64, 'numpad_5':0x65, 'numpad_6':0x66, 'numpad_7':0x67, 'numpad_8':0x68, 'numpad_9':0x69, 'multiply_key':0x6A, 'add_key':0x6B, 'separator_key':0x6C, 'subtract_key':0x6D, 'decimal_key':0x6E, 'divide_key':0x6F, 'F1':0x70, 'F2':0x71, 'F3':0x72, 'F4':0x73, 'F5':0x74, 'F6':0x75, 'F7':0x76, 'F8':0x77, 'F9':0x78, 'F10':0x79, 'F11':0x7A, 'F12':0x7B, 'F13':0x7C, 'F14':0x7D, 'F15':0x7E, 'F16':0x7F, 'F17':0x80, 'F18':0x81, 'F19':0x82, 'F20':0x83, 'F21':0x84, 'F22':0x85, 'F23':0x86, 'F24':0x87, 'num_lock':0x90, 'scroll_lock':0x91, 'left_shift':0xA0, 'right_shift ':0xA1, 'left_control':0xA2, 'right_control':0xA3, 'left_menu':0xA4, 'right_menu':0xA5, 'browser_back':0xA6, 'browser_forward':0xA7, 'browser_refresh':0xA8, 'browser_stop':0xA9, 'browser_search':0xAA, 'browser_favorites':0xAB, 'browser_start_and_home':0xAC, 'volume_mute':0xAD, 'volume_Down':0xAE, 'volume_up':0xAF, 'next_track':0xB0, 'previous_track':0xB1, 'stop_media':0xB2, 'play/pause_media':0xB3, 'start_mail':0xB4, 'select_media':0xB5, 'start_application_1':0xB6, 'start_application_2':0xB7, 'attn_key':0xF6, 'crsel_key':0xF7, 'exsel_key':0xF8, 'play_key':0xFA, 'zoom_key':0xFB, 'clear_key':0xFE, '+':0xBB, ',':0xBC, '-':0xBD, '.':0xBE, '/':0xBF, '`':0xC0, ';':0xBA, '[':0xDB, '\\':0xDC, ']':0xDD, "'":0xDE, '`':0xC0}
|
||||
VK_CODE = {k.lower():v for k,v in VK_CODE.items()}
|
||||
|
||||
def imshow(mt, sec=0):
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
# ljqCtrl 使用与坐标转换 SOP
|
||||
|
||||
> **must call update_working_memory**:`ljqCtrl一律使用物理坐标|禁pyautogui|操作前先gw激活窗口`
|
||||
|
||||
## 0. API 快速参考 (Signatures)
|
||||
- `ljqCtrl.dpi_scale`: float (缩放系数 = 逻辑宽度 / 物理宽度)
|
||||
- `ljqCtrl.SetCursorPos(z)`: 移动鼠标到逻辑坐标 z=(x, y)
|
||||
@@ -35,6 +37,7 @@ ljqCtrl.Click(px, py)
|
||||
```
|
||||
|
||||
## 4. 避坑指南
|
||||
- **⚠️ 一律使用物理坐标**:传给 ljqCtrl.Click/SetCursorPos 的坐标必须是物理坐标(=截图像素坐标)。从 pygetwindow 获取的逻辑坐标需先 `/ dpi_scale` 转换。禁止传入逻辑坐标。
|
||||
- **物理验证**:模拟操作前必须确保窗口已通过 `activate()` 置于前台。
|
||||
- **偏移量**:所有的相对偏移像素值(如“向右移动 10 像素”)同样需要除以 `dpi_scale`。
|
||||
- **坐标对齐**: 物理坐标 = 截图坐标;ljqCtrl 自动处理 DPI 换算,禁止手动重复计算。
|
||||
|
||||
@@ -32,19 +32,47 @@ python ../memory/mem_scanner.py <PID> "pattern" --llm
|
||||
- **权限**: 并非强制要求管理员权限,但需具备对目标进程的 `PROCESS_QUERY_INFORMATION` 和 `PROCESS_VM_READ` 权限。
|
||||
- **效率**: 搜索大块内存时,尽量提供更唯一的特征码以减少误报。
|
||||
|
||||
## 4. 典型场景:CE式差集扫描定位动态字段(已验证)
|
||||
用于定位微信等自绘UI中「当前会话标题」等随操作变化的内存字段。
|
||||
## 4. CE式差集扫描定位动态字段
|
||||
定位微信等自绘UI中随操作变化的内存字段(如当前会话标题)。核心:一次全量scan + 多次ReadProcessMemory筛选。
|
||||
|
||||
**方法(类似Cheat Engine找游戏数值):**
|
||||
1. 找到主窗口PID(Weixin.exe有多个进程,用win32gui.GetWindowThreadProcessId取有窗口的那个)
|
||||
2. 切到会话A → `scan_memory(pid, "人名A", mode="string")` → 得地址集S_A
|
||||
3. 切到会话B → `scan_memory(pid, "人名B", mode="string")` → 得地址集S_B
|
||||
4. 差集:S_A独有地址(A时有、B时无)= 候选地址
|
||||
5. 切回A → 用ReadProcessMemory逐个读候选地址,确认内容变回"人名A"的即为目标
|
||||
6. 再切第3、4个人交叉验证
|
||||
**流程(3个联系人A/B/C即可收敛):**
|
||||
1. 取PID:Weixin.exe有多进程,用`win32gui.GetWindowThreadProcessId`取有窗口的
|
||||
2. 当前会话=A → `scan_memory(pid, "人名A", mode="string")` → 地址集S
|
||||
3. 切到B → 读S全部地址 → 保留内容≠"人名A"的 → 候选C
|
||||
4. 切到A → 读C全部地址 → 保留内容=="人名A"的 → 候选C'(通常1-3个)
|
||||
5. 若C'>1 → 再切B/C重复 → 直到唯一
|
||||
|
||||
**切换会话+读地址 完整代码:**
|
||||
```python
|
||||
import sys; sys.path.append('../memory')
|
||||
import ljqCtrl, pygetwindow as gw, pyperclip, time, ctypes
|
||||
|
||||
def switch_chat(name):
|
||||
win = gw.getWindowsWithTitle('微信')[0]
|
||||
if win.isMinimized: win.restore()
|
||||
win.activate(); time.sleep(0.3)
|
||||
S = 1 / ljqCtrl.dpi_scale
|
||||
ljqCtrl.Click(int((win.left+150)*S), int((win.top+40)*S)); time.sleep(0.5)
|
||||
pyperclip.copy(name); ljqCtrl.Press('ctrl+v'); time.sleep(1.5)
|
||||
ljqCtrl.Click(int((win.left+150)*S), int((win.top+130)*S)); time.sleep(0.8)
|
||||
|
||||
def read_addrs(pid, addrs):
|
||||
k32 = ctypes.windll.kernel32
|
||||
hp = k32.OpenProcess(0x10, False, pid)
|
||||
buf = ctypes.create_string_buffer(256)
|
||||
rd = ctypes.c_size_t()
|
||||
result = {}
|
||||
for a in addrs:
|
||||
a = int(a, 16) if isinstance(a, str) else a
|
||||
k32.ReadProcessMemory(hp, ctypes.c_void_p(a), buf, 256, ctypes.byref(rd))
|
||||
result[a] = buf.raw.split(b'\x00')[0].decode('utf-8', errors='ignore').strip()
|
||||
k32.CloseHandle(hp)
|
||||
return result # {addr: text}
|
||||
```
|
||||
|
||||
**坑点:**
|
||||
- 搜索切换会污染结果(搜索框缓存也含人名),最终验证应用列表点击而非搜索
|
||||
- 地址是绝对虚拟地址,进程重启后失效,需重新校准(约10秒)
|
||||
- ReadProcessMemory读UTF-8,用`raw.split(b'\x00')[0].decode('utf-8')`提取干净文本
|
||||
- 微信主进程名为Weixin.exe(非WeChat.exe)
|
||||
- 进程名Weixin.exe(非WeChat.exe);地址字符串先`int(addr,16)`
|
||||
- 步骤3筛≠A(排除空/乱码),步骤4筛==A(正向确认),交替最快
|
||||
- 搜索框粘贴后≥1.5s再点结果;"文件传输助手"等常见词首条可能是广告,建议用不常见的联系人名做差集(如真实好友昵称),或搜索后点第2条结果
|
||||
- 切换后用read_addrs验证确实切成功了再继续
|
||||
- 最终候选>1时的消歧:不要用搜索切换,直接在联系人列表中点一个靠后的人(不经过搜索框),然后read_addrs看哪个地址变了——变的才是标题栏,没变的是搜索框残留
|
||||
@@ -10,6 +10,8 @@
|
||||
- 不满足map模式的任务 → 主agent顺序执行即可,别用subagent
|
||||
|
||||
**额外场景:SOP dry-run验证**——启动单个subagent执行目标SOP,通过output日志发现SOP缺陷(缺参数/选择器不准/步骤模糊),主agent据此patch优化SOP。单subagent不存在资源冲突。
|
||||
- 测SOP质量:input指定SOP名(如"用ezgmail_sop查看最近3封未读邮件"),排除导航干扰,失败即SOP问题
|
||||
- 测导航能力:input只写目标,验证subagent能自主从insight找到正确SOP。禁止内联SOP内容
|
||||
|
||||
**标准流程(map-reduce)**:
|
||||
1. 主agent准备阶段:爬取/dump数据,存为多个独立输入文件
|
||||
@@ -17,16 +19,23 @@
|
||||
3. 收集:等所有subagent完成,主agent读取各输出文件,汇总结果
|
||||
|
||||
## Task Mode 文件IO协议
|
||||
- 目录:`./{task_name}/`,启动:`python agentmain.py --task {task_name} [--llm_no N]`(cwd=GenericAgent根)
|
||||
- 目录:`temp/{task_name}/`(相对代码根GenericAgent/),主agent cwd在temp/时即 `./{task_name}/`
|
||||
- 启动:`python agentmain.py --task {task_name} [--llm_no N]`(cwd=代码根)
|
||||
- 流程:写 input.txt → 启动 → 轮询 output.txt → 读回复 → 写 reply.txt 继续 → 不写则5min自动退出
|
||||
- input.txt原则:写目标+约束,可指定SOP名。禁写具体实现步骤——除非主agent已读过该SOP确认正确。凭印象猜的步骤会误导subagent
|
||||
- reply后subagent的后续输出在 output1.txt, output2.txt ... 中(递增编号),主agent需轮询这些文件
|
||||
- output.txt 每轮覆盖写,用 mtime/size 判断新轮次
|
||||
|
||||
## 后台调用要点
|
||||
```python
|
||||
task_dir = os.path.join(agent_root, 'temp', task_name)
|
||||
proc = subprocess.Popen(
|
||||
[sys.executable, 'agentmain.py', '--task', task_name],
|
||||
cwd=agent_root, creationflags=0x08000000)
|
||||
cwd=agent_root, creationflags=0x08000000,
|
||||
stdout=open(os.path.join(task_dir, 'stdout.log'), 'w', encoding='utf-8'),
|
||||
stderr=open(os.path.join(task_dir, 'stderr.log'), 'w', encoding='utf-8'))
|
||||
```
|
||||
- 必须 Popen,禁止 subprocess.run(会阻塞)
|
||||
- stdout.log/stderr.log 用于调试subagent卡死、LLM调用失败等问题
|
||||
- `--llm_no` 默认=sonnet 4.5,`--llm_no 1`=opus 4.6
|
||||
- 文件统一 UTF-8,subagent 无 reply 5min 自动退出无需清理
|
||||
Reference in New Issue
Block a user