fix: ljqCtrl Press list bug, update SOPs (must call working_mem), agentmain round-end marker

This commit is contained in:
Jiaqing Liang
2026-02-24 16:43:05 +08:00
parent 860d305c44
commit 07a53889fb
6 changed files with 63 additions and 20 deletions

View File

@@ -3,7 +3,7 @@
授权你进行自主行动,只要不对环境造成副作用都可进行。
请先选择核心目标,再选择一个小目标进行。最终探测结果形成报告(含操作申请),待用户回来确认后再进行可能的写入或修改操作。
> **启动时写入工作记忆**`自主探索≤15回合只有cwd内可写用户不在(问题存报告)|收尾:重读本SOP确认报告目录+更新报告目录内history产出=报告+记忆提案`
> **must call update_working_memory**`自主探索≤15回合只有cwd内可写用户不在(问题存报告)|收尾:重读本SOP确认报告目录+更新报告目录内history产出=报告+记忆提案`
## 📋 大纲
- 报告目录与规则

View File

@@ -51,17 +51,20 @@ def Click(x, y=None):
if type(x) is type(tuple()): x, y = int(x[0]), int(x[1])
SetCursorPos( (x, y) )
MouseClick()
click = Click
def Press(cmd, staytime=0):
cmds = cmd.lower().split('+')
if type(cmd) is list: cmds = [x.lower() for x in cmd]
else: cmds = cmd.lower().split('+')
for z in cmds:
win32api.keybd_event(VK_CODE[z], 0, 0, 0)
time.sleep(staytime)
for z in reversed(cmds):
time.sleep(staytime)
win32api.keybd_event(VK_CODE[z], 0, win32con.KEYEVENTF_KEYUP, 0)
press = Press
VK_CODE = {'backspace':0x08, 'tab':0x09, 'clear':0x0C, 'enter':0x0D, 'shift':0x10, 'ctrl':0x11, 'alt':0x12, 'pause':0x13, 'caps_lock':0x14, 'esc':0x1B, 'space':0x20, 'page_up':0x21, 'page_down':0x22, 'end':0x23, 'home':0x24, 'left_arrow':0x25, 'up_arrow':0x26, 'right_arrow':0x27, 'down_arrow':0x28, 'select':0x29, 'print':0x2A, 'execute':0x2B, 'print_screen':0x2C, 'ins':0x2D, 'del':0x2E, 'help':0x2F, '0':0x30, '1':0x31, '2':0x32, '3':0x33, '4':0x34, '5':0x35, '6':0x36, '7':0x37, '8':0x38, '9':0x39, 'a':0x41, 'b':0x42, 'c':0x43, 'd':0x44, 'e':0x45, 'f':0x46, 'g':0x47, 'h':0x48, 'i':0x49, 'j':0x4A, 'k':0x4B, 'l':0x4C, 'm':0x4D, 'n':0x4E, 'o':0x4F, 'p':0x50, 'q':0x51, 'r':0x52, 's':0x53, 't':0x54, 'u':0x55, 'v':0x56, 'w':0x57, 'x':0x58, 'y':0x59, 'z':0x5A, 'numpad_0':0x60, 'numpad_1':0x61, 'numpad_2':0x62, 'numpad_3':0x63, 'numpad_4':0x64, 'numpad_5':0x65, 'numpad_6':0x66, 'numpad_7':0x67, 'numpad_8':0x68, 'numpad_9':0x69, 'multiply_key':0x6A, 'add_key':0x6B, 'separator_key':0x6C, 'subtract_key':0x6D, 'decimal_key':0x6E, 'divide_key':0x6F, 'F1':0x70, 'F2':0x71, 'F3':0x72, 'F4':0x73, 'F5':0x74, 'F6':0x75, 'F7':0x76, 'F8':0x77, 'F9':0x78, 'F10':0x79, 'F11':0x7A, 'F12':0x7B, 'F13':0x7C, 'F14':0x7D, 'F15':0x7E, 'F16':0x7F, 'F17':0x80, 'F18':0x81, 'F19':0x82, 'F20':0x83, 'F21':0x84, 'F22':0x85, 'F23':0x86, 'F24':0x87, 'num_lock':0x90, 'scroll_lock':0x91, 'left_shift':0xA0, 'right_shift ':0xA1, 'left_control':0xA2, 'right_control':0xA3, 'left_menu':0xA4, 'right_menu':0xA5, 'browser_back':0xA6, 'browser_forward':0xA7, 'browser_refresh':0xA8, 'browser_stop':0xA9, 'browser_search':0xAA, 'browser_favorites':0xAB, 'browser_start_and_home':0xAC, 'volume_mute':0xAD, 'volume_Down':0xAE, 'volume_up':0xAF, 'next_track':0xB0, 'previous_track':0xB1, 'stop_media':0xB2, 'play/pause_media':0xB3, 'start_mail':0xB4, 'select_media':0xB5, 'start_application_1':0xB6, 'start_application_2':0xB7, 'attn_key':0xF6, 'crsel_key':0xF7, 'exsel_key':0xF8, 'play_key':0xFA, 'zoom_key':0xFB, 'clear_key':0xFE, '+':0xBB, ',':0xBC, '-':0xBD, '.':0xBE, '/':0xBF, '`':0xC0, ';':0xBA, '[':0xDB, '\\':0xDC, ']':0xDD, "'":0xDE, '`':0xC0}
VK_CODE = {'backspace':0x08, 'tab':0x09, 'clear':0x0C, 'enter':0x0D, 'shift':0x10, 'ctrl':0x11, 'alt':0x12, 'pause':0x13, 'caps_lock':0x14, 'esc':0x1B, 'escape':0x1B, 'space':0x20, 'page_up':0x21, 'page_down':0x22, 'end':0x23, 'home':0x24, 'left_arrow':0x25, 'up_arrow':0x26, 'right_arrow':0x27, 'down_arrow':0x28, 'select':0x29, 'print':0x2A, 'execute':0x2B, 'print_screen':0x2C, 'ins':0x2D, 'del':0x2E, 'help':0x2F, '0':0x30, '1':0x31, '2':0x32, '3':0x33, '4':0x34, '5':0x35, '6':0x36, '7':0x37, '8':0x38, '9':0x39, 'a':0x41, 'b':0x42, 'c':0x43, 'd':0x44, 'e':0x45, 'f':0x46, 'g':0x47, 'h':0x48, 'i':0x49, 'j':0x4A, 'k':0x4B, 'l':0x4C, 'm':0x4D, 'n':0x4E, 'o':0x4F, 'p':0x50, 'q':0x51, 'r':0x52, 's':0x53, 't':0x54, 'u':0x55, 'v':0x56, 'w':0x57, 'x':0x58, 'y':0x59, 'z':0x5A, 'numpad_0':0x60, 'numpad_1':0x61, 'numpad_2':0x62, 'numpad_3':0x63, 'numpad_4':0x64, 'numpad_5':0x65, 'numpad_6':0x66, 'numpad_7':0x67, 'numpad_8':0x68, 'numpad_9':0x69, 'multiply_key':0x6A, 'add_key':0x6B, 'separator_key':0x6C, 'subtract_key':0x6D, 'decimal_key':0x6E, 'divide_key':0x6F, 'F1':0x70, 'F2':0x71, 'F3':0x72, 'F4':0x73, 'F5':0x74, 'F6':0x75, 'F7':0x76, 'F8':0x77, 'F9':0x78, 'F10':0x79, 'F11':0x7A, 'F12':0x7B, 'F13':0x7C, 'F14':0x7D, 'F15':0x7E, 'F16':0x7F, 'F17':0x80, 'F18':0x81, 'F19':0x82, 'F20':0x83, 'F21':0x84, 'F22':0x85, 'F23':0x86, 'F24':0x87, 'num_lock':0x90, 'scroll_lock':0x91, 'left_shift':0xA0, 'right_shift ':0xA1, 'left_control':0xA2, 'right_control':0xA3, 'left_menu':0xA4, 'right_menu':0xA5, 'browser_back':0xA6, 'browser_forward':0xA7, 'browser_refresh':0xA8, 'browser_stop':0xA9, 'browser_search':0xAA, 'browser_favorites':0xAB, 'browser_start_and_home':0xAC, 'volume_mute':0xAD, 'volume_Down':0xAE, 'volume_up':0xAF, 'next_track':0xB0, 'previous_track':0xB1, 'stop_media':0xB2, 'play/pause_media':0xB3, 'start_mail':0xB4, 'select_media':0xB5, 'start_application_1':0xB6, 'start_application_2':0xB7, 'attn_key':0xF6, 'crsel_key':0xF7, 'exsel_key':0xF8, 'play_key':0xFA, 'zoom_key':0xFB, 'clear_key':0xFE, '+':0xBB, ',':0xBC, '-':0xBD, '.':0xBE, '/':0xBF, '`':0xC0, ';':0xBA, '[':0xDB, '\\':0xDC, ']':0xDD, "'":0xDE, '`':0xC0}
VK_CODE = {k.lower():v for k,v in VK_CODE.items()}
def imshow(mt, sec=0):

View File

@@ -1,5 +1,7 @@
# ljqCtrl 使用与坐标转换 SOP
> **must call update_working_memory**`ljqCtrl一律使用物理坐标禁pyautogui操作前先gw激活窗口`
## 0. API 快速参考 (Signatures)
- `ljqCtrl.dpi_scale`: float (缩放系数 = 逻辑宽度 / 物理宽度)
- `ljqCtrl.SetCursorPos(z)`: 移动鼠标到逻辑坐标 z=(x, y)
@@ -35,6 +37,7 @@ ljqCtrl.Click(px, py)
```
## 4. 避坑指南
- **⚠️ 一律使用物理坐标**:传给 ljqCtrl.Click/SetCursorPos 的坐标必须是物理坐标(=截图像素坐标)。从 pygetwindow 获取的逻辑坐标需先 `/ dpi_scale` 转换。禁止传入逻辑坐标。
- **物理验证**:模拟操作前必须确保窗口已通过 `activate()` 置于前台。
- **偏移量**:所有的相对偏移像素值(如“向右移动 10 像素”)同样需要除以 `dpi_scale`
- **坐标对齐**: 物理坐标 = 截图坐标ljqCtrl 自动处理 DPI 换算,禁止手动重复计算。

View File

@@ -32,19 +32,47 @@ python ../memory/mem_scanner.py <PID> "pattern" --llm
- **权限**: 并非强制要求管理员权限,但需具备对目标进程的 `PROCESS_QUERY_INFORMATION``PROCESS_VM_READ` 权限。
- **效率**: 搜索大块内存时,尽量提供更唯一的特征码以减少误报。
## 4. 典型场景:CE式差集扫描定位动态字段(已验证)
用于定位微信等自绘UI中「当前会话标题」等随操作变化的内存字段。
## 4. CE式差集扫描定位动态字段
定位微信等自绘UI中随操作变化的内存字段如当前会话标题。核心一次全量scan + 多次ReadProcessMemory筛选
**方法类似Cheat Engine找游戏数值**
1. 找到主窗口PIDWeixin.exe有多进程用win32gui.GetWindowThreadProcessId取有窗口的那个)
2. 切到会话A → `scan_memory(pid, "人名A", mode="string")`地址集S_A
3. 切到会话B → `scan_memory(pid, "人名B", mode="string")` → 得地址集S_B
4. 差集S_A独有地址A时有、B时无= 候选地址
5. 切回A → 用ReadProcessMemory逐个读候选地址确认内容变回"人名A"的即为目标
6. 再切第3、4个人交叉验证
**流程3个联系人A/B/C即可收敛**
1. PIDWeixin.exe有多进程`win32gui.GetWindowThreadProcessId`取有窗口的
2. 当前会话=A → `scan_memory(pid, "人名A", mode="string")` → 地址集S
3. 切到B → 读S全部地址 → 保留内容≠"人名A"的 → 候选C
4. 切到A → 读C全部地址 → 保留内容=="人名A"的 → 候选C'通常1-3个
5. 若C'>1 → 再切B/C重复 → 直到唯一
**切换会话+读地址 完整代码:**
```python
import sys; sys.path.append('../memory')
import ljqCtrl, pygetwindow as gw, pyperclip, time, ctypes
def switch_chat(name):
win = gw.getWindowsWithTitle('微信')[0]
if win.isMinimized: win.restore()
win.activate(); time.sleep(0.3)
S = 1 / ljqCtrl.dpi_scale
ljqCtrl.Click(int((win.left+150)*S), int((win.top+40)*S)); time.sleep(0.5)
pyperclip.copy(name); ljqCtrl.Press('ctrl+v'); time.sleep(1.5)
ljqCtrl.Click(int((win.left+150)*S), int((win.top+130)*S)); time.sleep(0.8)
def read_addrs(pid, addrs):
k32 = ctypes.windll.kernel32
hp = k32.OpenProcess(0x10, False, pid)
buf = ctypes.create_string_buffer(256)
rd = ctypes.c_size_t()
result = {}
for a in addrs:
a = int(a, 16) if isinstance(a, str) else a
k32.ReadProcessMemory(hp, ctypes.c_void_p(a), buf, 256, ctypes.byref(rd))
result[a] = buf.raw.split(b'\x00')[0].decode('utf-8', errors='ignore').strip()
k32.CloseHandle(hp)
return result # {addr: text}
```
**坑点:**
- 搜索切换会污染结果(搜索框缓存也含人名),最终验证应用列表点击而非搜索
- 地址是绝对虚拟地址进程重启后失效需重新校准约10秒
- ReadProcessMemory读UTF-8`raw.split(b'\x00')[0].decode('utf-8')`提取干净文本
- 微信主进程名为Weixin.exe非WeChat.exe
- 进程名Weixin.exe非WeChat.exe地址字符串先`int(addr,16)`
- 步骤3筛≠A排除空/乱码步骤4筛==A正向确认交替最快
- 搜索框粘贴后≥1.5s再点结果;"文件传输助手"等常见词首条可能是广告建议用不常见的联系人名做差集如真实好友昵称或搜索后点第2条结果
- 切换后用read_addrs验证确实切成功了再继续
- 最终候选>1时的消歧不要用搜索切换直接在联系人列表中点一个靠后的人不经过搜索框然后read_addrs看哪个地址变了——变的才是标题栏没变的是搜索框残留

View File

@@ -10,6 +10,8 @@
- 不满足map模式的任务 → 主agent顺序执行即可别用subagent
**额外场景SOP dry-run验证**——启动单个subagent执行目标SOP通过output日志发现SOP缺陷缺参数/选择器不准/步骤模糊主agent据此patch优化SOP。单subagent不存在资源冲突。
- 测SOP质量input指定SOP名如"用ezgmail_sop查看最近3封未读邮件"排除导航干扰失败即SOP问题
- 测导航能力input只写目标验证subagent能自主从insight找到正确SOP。禁止内联SOP内容
**标准流程map-reduce**
1. 主agent准备阶段爬取/dump数据存为多个独立输入文件
@@ -17,16 +19,23 @@
3. 收集等所有subagent完成主agent读取各输出文件汇总结果
## Task Mode 文件IO协议
- 目录:`./{task_name}/`,启动:`python agentmain.py --task {task_name} [--llm_no N]`cwd=GenericAgent根
- 目录:`temp/{task_name}/`相对代码根GenericAgent/主agent cwd在temp/时即 `./{task_name}/`
- 启动:`python agentmain.py --task {task_name} [--llm_no N]`cwd=代码根)
- 流程:写 input.txt → 启动 → 轮询 output.txt → 读回复 → 写 reply.txt 继续 → 不写则5min自动退出
- input.txt原则写目标+约束可指定SOP名。禁写具体实现步骤——除非主agent已读过该SOP确认正确。凭印象猜的步骤会误导subagent
- reply后subagent的后续输出在 output1.txt, output2.txt ... 中递增编号主agent需轮询这些文件
- output.txt 每轮覆盖写,用 mtime/size 判断新轮次
## 后台调用要点
```python
task_dir = os.path.join(agent_root, 'temp', task_name)
proc = subprocess.Popen(
[sys.executable, 'agentmain.py', '--task', task_name],
cwd=agent_root, creationflags=0x08000000)
cwd=agent_root, creationflags=0x08000000,
stdout=open(os.path.join(task_dir, 'stdout.log'), 'w', encoding='utf-8'),
stderr=open(os.path.join(task_dir, 'stderr.log'), 'w', encoding='utf-8'))
```
- 必须 Popen禁止 subprocess.run会阻塞
- stdout.log/stderr.log 用于调试subagent卡死、LLM调用失败等问题
- `--llm_no` 默认=sonnet 4.5`--llm_no 1`=opus 4.6
- 文件统一 UTF-8subagent 无 reply 5min 自动退出无需清理