fix: ljqCtrl Press list bug, update SOPs (must call working_mem), agentmain round-end marker
This commit is contained in:
@@ -130,9 +130,9 @@ if __name__ == '__main__':
|
|||||||
while True:
|
while True:
|
||||||
dq = agent.put_task(raw, source='task')
|
dq = agent.put_task(raw, source='task')
|
||||||
while 'done' not in (item := dq.get(timeout=120)):
|
while 'done' not in (item := dq.get(timeout=120)):
|
||||||
if 'next' in item and random.random() < 0.01: # 1/100的概率写一次中间结果
|
if 'next' in item and random.random() < 0.05: # 1/20的概率写一次中间结果
|
||||||
with open(f'{d}/output{nround}.txt', 'w', encoding='utf-8') as f: f.write(item.get('next', ''))
|
with open(f'{d}/output{nround}.txt', 'w', encoding='utf-8') as f: f.write(item.get('next', ''))
|
||||||
with open(f'{d}/output{nround}.txt', 'w', encoding='utf-8') as f: f.write(item['done'])
|
with open(f'{d}/output{nround}.txt', 'w', encoding='utf-8') as f: f.write(item['done'] + '\n[ROUND END]\n')
|
||||||
for _ in range(150): # 等reply.txt,5分钟超时
|
for _ in range(150): # 等reply.txt,5分钟超时
|
||||||
time.sleep(2)
|
time.sleep(2)
|
||||||
if os.path.exists(rp):
|
if os.path.exists(rp):
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
授权你进行自主行动,只要不对环境造成副作用都可进行。
|
授权你进行自主行动,只要不对环境造成副作用都可进行。
|
||||||
请先选择核心目标,再选择一个小目标进行。最终探测结果形成报告(含操作申请),待用户回来确认后再进行可能的写入或修改操作。
|
请先选择核心目标,再选择一个小目标进行。最终探测结果形成报告(含操作申请),待用户回来确认后再进行可能的写入或修改操作。
|
||||||
|
|
||||||
> **启动时写入工作记忆**:`自主探索|≤15回合|只有cwd内可写|用户不在(问题存报告)|收尾:重读本SOP确认报告目录+更新报告目录内history|产出=报告+记忆提案`
|
> **must call update_working_memory**:`自主探索|≤15回合|只有cwd内可写|用户不在(问题存报告)|收尾:重读本SOP确认报告目录+更新报告目录内history|产出=报告+记忆提案`
|
||||||
|
|
||||||
## 📋 大纲
|
## 📋 大纲
|
||||||
- 报告目录与规则
|
- 报告目录与规则
|
||||||
|
|||||||
@@ -51,17 +51,20 @@ def Click(x, y=None):
|
|||||||
if type(x) is type(tuple()): x, y = int(x[0]), int(x[1])
|
if type(x) is type(tuple()): x, y = int(x[0]), int(x[1])
|
||||||
SetCursorPos( (x, y) )
|
SetCursorPos( (x, y) )
|
||||||
MouseClick()
|
MouseClick()
|
||||||
|
click = Click
|
||||||
|
|
||||||
def Press(cmd, staytime=0):
|
def Press(cmd, staytime=0):
|
||||||
cmds = cmd.lower().split('+')
|
if type(cmd) is list: cmds = [x.lower() for x in cmd]
|
||||||
|
else: cmds = cmd.lower().split('+')
|
||||||
for z in cmds:
|
for z in cmds:
|
||||||
win32api.keybd_event(VK_CODE[z], 0, 0, 0)
|
win32api.keybd_event(VK_CODE[z], 0, 0, 0)
|
||||||
time.sleep(staytime)
|
time.sleep(staytime)
|
||||||
for z in reversed(cmds):
|
for z in reversed(cmds):
|
||||||
time.sleep(staytime)
|
time.sleep(staytime)
|
||||||
win32api.keybd_event(VK_CODE[z], 0, win32con.KEYEVENTF_KEYUP, 0)
|
win32api.keybd_event(VK_CODE[z], 0, win32con.KEYEVENTF_KEYUP, 0)
|
||||||
|
press = Press
|
||||||
|
|
||||||
VK_CODE = {'backspace':0x08, 'tab':0x09, 'clear':0x0C, 'enter':0x0D, 'shift':0x10, 'ctrl':0x11, 'alt':0x12, 'pause':0x13, 'caps_lock':0x14, 'esc':0x1B, 'space':0x20, 'page_up':0x21, 'page_down':0x22, 'end':0x23, 'home':0x24, 'left_arrow':0x25, 'up_arrow':0x26, 'right_arrow':0x27, 'down_arrow':0x28, 'select':0x29, 'print':0x2A, 'execute':0x2B, 'print_screen':0x2C, 'ins':0x2D, 'del':0x2E, 'help':0x2F, '0':0x30, '1':0x31, '2':0x32, '3':0x33, '4':0x34, '5':0x35, '6':0x36, '7':0x37, '8':0x38, '9':0x39, 'a':0x41, 'b':0x42, 'c':0x43, 'd':0x44, 'e':0x45, 'f':0x46, 'g':0x47, 'h':0x48, 'i':0x49, 'j':0x4A, 'k':0x4B, 'l':0x4C, 'm':0x4D, 'n':0x4E, 'o':0x4F, 'p':0x50, 'q':0x51, 'r':0x52, 's':0x53, 't':0x54, 'u':0x55, 'v':0x56, 'w':0x57, 'x':0x58, 'y':0x59, 'z':0x5A, 'numpad_0':0x60, 'numpad_1':0x61, 'numpad_2':0x62, 'numpad_3':0x63, 'numpad_4':0x64, 'numpad_5':0x65, 'numpad_6':0x66, 'numpad_7':0x67, 'numpad_8':0x68, 'numpad_9':0x69, 'multiply_key':0x6A, 'add_key':0x6B, 'separator_key':0x6C, 'subtract_key':0x6D, 'decimal_key':0x6E, 'divide_key':0x6F, 'F1':0x70, 'F2':0x71, 'F3':0x72, 'F4':0x73, 'F5':0x74, 'F6':0x75, 'F7':0x76, 'F8':0x77, 'F9':0x78, 'F10':0x79, 'F11':0x7A, 'F12':0x7B, 'F13':0x7C, 'F14':0x7D, 'F15':0x7E, 'F16':0x7F, 'F17':0x80, 'F18':0x81, 'F19':0x82, 'F20':0x83, 'F21':0x84, 'F22':0x85, 'F23':0x86, 'F24':0x87, 'num_lock':0x90, 'scroll_lock':0x91, 'left_shift':0xA0, 'right_shift ':0xA1, 'left_control':0xA2, 'right_control':0xA3, 'left_menu':0xA4, 'right_menu':0xA5, 'browser_back':0xA6, 'browser_forward':0xA7, 'browser_refresh':0xA8, 'browser_stop':0xA9, 'browser_search':0xAA, 'browser_favorites':0xAB, 'browser_start_and_home':0xAC, 'volume_mute':0xAD, 'volume_Down':0xAE, 'volume_up':0xAF, 'next_track':0xB0, 'previous_track':0xB1, 'stop_media':0xB2, 'play/pause_media':0xB3, 'start_mail':0xB4, 'select_media':0xB5, 'start_application_1':0xB6, 'start_application_2':0xB7, 'attn_key':0xF6, 'crsel_key':0xF7, 'exsel_key':0xF8, 'play_key':0xFA, 'zoom_key':0xFB, 'clear_key':0xFE, '+':0xBB, ',':0xBC, '-':0xBD, '.':0xBE, '/':0xBF, '`':0xC0, ';':0xBA, '[':0xDB, '\\':0xDC, ']':0xDD, "'":0xDE, '`':0xC0}
|
VK_CODE = {'backspace':0x08, 'tab':0x09, 'clear':0x0C, 'enter':0x0D, 'shift':0x10, 'ctrl':0x11, 'alt':0x12, 'pause':0x13, 'caps_lock':0x14, 'esc':0x1B, 'escape':0x1B, 'space':0x20, 'page_up':0x21, 'page_down':0x22, 'end':0x23, 'home':0x24, 'left_arrow':0x25, 'up_arrow':0x26, 'right_arrow':0x27, 'down_arrow':0x28, 'select':0x29, 'print':0x2A, 'execute':0x2B, 'print_screen':0x2C, 'ins':0x2D, 'del':0x2E, 'help':0x2F, '0':0x30, '1':0x31, '2':0x32, '3':0x33, '4':0x34, '5':0x35, '6':0x36, '7':0x37, '8':0x38, '9':0x39, 'a':0x41, 'b':0x42, 'c':0x43, 'd':0x44, 'e':0x45, 'f':0x46, 'g':0x47, 'h':0x48, 'i':0x49, 'j':0x4A, 'k':0x4B, 'l':0x4C, 'm':0x4D, 'n':0x4E, 'o':0x4F, 'p':0x50, 'q':0x51, 'r':0x52, 's':0x53, 't':0x54, 'u':0x55, 'v':0x56, 'w':0x57, 'x':0x58, 'y':0x59, 'z':0x5A, 'numpad_0':0x60, 'numpad_1':0x61, 'numpad_2':0x62, 'numpad_3':0x63, 'numpad_4':0x64, 'numpad_5':0x65, 'numpad_6':0x66, 'numpad_7':0x67, 'numpad_8':0x68, 'numpad_9':0x69, 'multiply_key':0x6A, 'add_key':0x6B, 'separator_key':0x6C, 'subtract_key':0x6D, 'decimal_key':0x6E, 'divide_key':0x6F, 'F1':0x70, 'F2':0x71, 'F3':0x72, 'F4':0x73, 'F5':0x74, 'F6':0x75, 'F7':0x76, 'F8':0x77, 'F9':0x78, 'F10':0x79, 'F11':0x7A, 'F12':0x7B, 'F13':0x7C, 'F14':0x7D, 'F15':0x7E, 'F16':0x7F, 'F17':0x80, 'F18':0x81, 'F19':0x82, 'F20':0x83, 'F21':0x84, 'F22':0x85, 'F23':0x86, 'F24':0x87, 'num_lock':0x90, 'scroll_lock':0x91, 'left_shift':0xA0, 'right_shift ':0xA1, 'left_control':0xA2, 'right_control':0xA3, 'left_menu':0xA4, 'right_menu':0xA5, 'browser_back':0xA6, 'browser_forward':0xA7, 'browser_refresh':0xA8, 'browser_stop':0xA9, 'browser_search':0xAA, 'browser_favorites':0xAB, 'browser_start_and_home':0xAC, 'volume_mute':0xAD, 'volume_Down':0xAE, 'volume_up':0xAF, 'next_track':0xB0, 'previous_track':0xB1, 'stop_media':0xB2, 'play/pause_media':0xB3, 'start_mail':0xB4, 'select_media':0xB5, 'start_application_1':0xB6, 'start_application_2':0xB7, 'attn_key':0xF6, 'crsel_key':0xF7, 'exsel_key':0xF8, 'play_key':0xFA, 'zoom_key':0xFB, 'clear_key':0xFE, '+':0xBB, ',':0xBC, '-':0xBD, '.':0xBE, '/':0xBF, '`':0xC0, ';':0xBA, '[':0xDB, '\\':0xDC, ']':0xDD, "'":0xDE, '`':0xC0}
|
||||||
VK_CODE = {k.lower():v for k,v in VK_CODE.items()}
|
VK_CODE = {k.lower():v for k,v in VK_CODE.items()}
|
||||||
|
|
||||||
def imshow(mt, sec=0):
|
def imshow(mt, sec=0):
|
||||||
|
|||||||
@@ -1,5 +1,7 @@
|
|||||||
# ljqCtrl 使用与坐标转换 SOP
|
# ljqCtrl 使用与坐标转换 SOP
|
||||||
|
|
||||||
|
> **must call update_working_memory**:`ljqCtrl一律使用物理坐标|禁pyautogui|操作前先gw激活窗口`
|
||||||
|
|
||||||
## 0. API 快速参考 (Signatures)
|
## 0. API 快速参考 (Signatures)
|
||||||
- `ljqCtrl.dpi_scale`: float (缩放系数 = 逻辑宽度 / 物理宽度)
|
- `ljqCtrl.dpi_scale`: float (缩放系数 = 逻辑宽度 / 物理宽度)
|
||||||
- `ljqCtrl.SetCursorPos(z)`: 移动鼠标到逻辑坐标 z=(x, y)
|
- `ljqCtrl.SetCursorPos(z)`: 移动鼠标到逻辑坐标 z=(x, y)
|
||||||
@@ -35,6 +37,7 @@ ljqCtrl.Click(px, py)
|
|||||||
```
|
```
|
||||||
|
|
||||||
## 4. 避坑指南
|
## 4. 避坑指南
|
||||||
|
- **⚠️ 一律使用物理坐标**:传给 ljqCtrl.Click/SetCursorPos 的坐标必须是物理坐标(=截图像素坐标)。从 pygetwindow 获取的逻辑坐标需先 `/ dpi_scale` 转换。禁止传入逻辑坐标。
|
||||||
- **物理验证**:模拟操作前必须确保窗口已通过 `activate()` 置于前台。
|
- **物理验证**:模拟操作前必须确保窗口已通过 `activate()` 置于前台。
|
||||||
- **偏移量**:所有的相对偏移像素值(如“向右移动 10 像素”)同样需要除以 `dpi_scale`。
|
- **偏移量**:所有的相对偏移像素值(如“向右移动 10 像素”)同样需要除以 `dpi_scale`。
|
||||||
- **坐标对齐**: 物理坐标 = 截图坐标;ljqCtrl 自动处理 DPI 换算,禁止手动重复计算。
|
- **坐标对齐**: 物理坐标 = 截图坐标;ljqCtrl 自动处理 DPI 换算,禁止手动重复计算。
|
||||||
|
|||||||
@@ -32,19 +32,47 @@ python ../memory/mem_scanner.py <PID> "pattern" --llm
|
|||||||
- **权限**: 并非强制要求管理员权限,但需具备对目标进程的 `PROCESS_QUERY_INFORMATION` 和 `PROCESS_VM_READ` 权限。
|
- **权限**: 并非强制要求管理员权限,但需具备对目标进程的 `PROCESS_QUERY_INFORMATION` 和 `PROCESS_VM_READ` 权限。
|
||||||
- **效率**: 搜索大块内存时,尽量提供更唯一的特征码以减少误报。
|
- **效率**: 搜索大块内存时,尽量提供更唯一的特征码以减少误报。
|
||||||
|
|
||||||
## 4. 典型场景:CE式差集扫描定位动态字段(已验证)
|
## 4. CE式差集扫描定位动态字段
|
||||||
用于定位微信等自绘UI中「当前会话标题」等随操作变化的内存字段。
|
定位微信等自绘UI中随操作变化的内存字段(如当前会话标题)。核心:一次全量scan + 多次ReadProcessMemory筛选。
|
||||||
|
|
||||||
**方法(类似Cheat Engine找游戏数值):**
|
**流程(3个联系人A/B/C即可收敛):**
|
||||||
1. 找到主窗口PID(Weixin.exe有多个进程,用win32gui.GetWindowThreadProcessId取有窗口的那个)
|
1. 取PID:Weixin.exe有多进程,用`win32gui.GetWindowThreadProcessId`取有窗口的
|
||||||
2. 切到会话A → `scan_memory(pid, "人名A", mode="string")` → 得地址集S_A
|
2. 当前会话=A → `scan_memory(pid, "人名A", mode="string")` → 地址集S
|
||||||
3. 切到会话B → `scan_memory(pid, "人名B", mode="string")` → 得地址集S_B
|
3. 切到B → 读S全部地址 → 保留内容≠"人名A"的 → 候选C
|
||||||
4. 差集:S_A独有地址(A时有、B时无)= 候选地址
|
4. 切到A → 读C全部地址 → 保留内容=="人名A"的 → 候选C'(通常1-3个)
|
||||||
5. 切回A → 用ReadProcessMemory逐个读候选地址,确认内容变回"人名A"的即为目标
|
5. 若C'>1 → 再切B/C重复 → 直到唯一
|
||||||
6. 再切第3、4个人交叉验证
|
|
||||||
|
**切换会话+读地址 完整代码:**
|
||||||
|
```python
|
||||||
|
import sys; sys.path.append('../memory')
|
||||||
|
import ljqCtrl, pygetwindow as gw, pyperclip, time, ctypes
|
||||||
|
|
||||||
|
def switch_chat(name):
|
||||||
|
win = gw.getWindowsWithTitle('微信')[0]
|
||||||
|
if win.isMinimized: win.restore()
|
||||||
|
win.activate(); time.sleep(0.3)
|
||||||
|
S = 1 / ljqCtrl.dpi_scale
|
||||||
|
ljqCtrl.Click(int((win.left+150)*S), int((win.top+40)*S)); time.sleep(0.5)
|
||||||
|
pyperclip.copy(name); ljqCtrl.Press('ctrl+v'); time.sleep(1.5)
|
||||||
|
ljqCtrl.Click(int((win.left+150)*S), int((win.top+130)*S)); time.sleep(0.8)
|
||||||
|
|
||||||
|
def read_addrs(pid, addrs):
|
||||||
|
k32 = ctypes.windll.kernel32
|
||||||
|
hp = k32.OpenProcess(0x10, False, pid)
|
||||||
|
buf = ctypes.create_string_buffer(256)
|
||||||
|
rd = ctypes.c_size_t()
|
||||||
|
result = {}
|
||||||
|
for a in addrs:
|
||||||
|
a = int(a, 16) if isinstance(a, str) else a
|
||||||
|
k32.ReadProcessMemory(hp, ctypes.c_void_p(a), buf, 256, ctypes.byref(rd))
|
||||||
|
result[a] = buf.raw.split(b'\x00')[0].decode('utf-8', errors='ignore').strip()
|
||||||
|
k32.CloseHandle(hp)
|
||||||
|
return result # {addr: text}
|
||||||
|
```
|
||||||
|
|
||||||
**坑点:**
|
**坑点:**
|
||||||
- 搜索切换会污染结果(搜索框缓存也含人名),最终验证应用列表点击而非搜索
|
- 进程名Weixin.exe(非WeChat.exe);地址字符串先`int(addr,16)`
|
||||||
- 地址是绝对虚拟地址,进程重启后失效,需重新校准(约10秒)
|
- 步骤3筛≠A(排除空/乱码),步骤4筛==A(正向确认),交替最快
|
||||||
- ReadProcessMemory读UTF-8,用`raw.split(b'\x00')[0].decode('utf-8')`提取干净文本
|
- 搜索框粘贴后≥1.5s再点结果;"文件传输助手"等常见词首条可能是广告,建议用不常见的联系人名做差集(如真实好友昵称),或搜索后点第2条结果
|
||||||
- 微信主进程名为Weixin.exe(非WeChat.exe)
|
- 切换后用read_addrs验证确实切成功了再继续
|
||||||
|
- 最终候选>1时的消歧:不要用搜索切换,直接在联系人列表中点一个靠后的人(不经过搜索框),然后read_addrs看哪个地址变了——变的才是标题栏,没变的是搜索框残留
|
||||||
@@ -10,6 +10,8 @@
|
|||||||
- 不满足map模式的任务 → 主agent顺序执行即可,别用subagent
|
- 不满足map模式的任务 → 主agent顺序执行即可,别用subagent
|
||||||
|
|
||||||
**额外场景:SOP dry-run验证**——启动单个subagent执行目标SOP,通过output日志发现SOP缺陷(缺参数/选择器不准/步骤模糊),主agent据此patch优化SOP。单subagent不存在资源冲突。
|
**额外场景:SOP dry-run验证**——启动单个subagent执行目标SOP,通过output日志发现SOP缺陷(缺参数/选择器不准/步骤模糊),主agent据此patch优化SOP。单subagent不存在资源冲突。
|
||||||
|
- 测SOP质量:input指定SOP名(如"用ezgmail_sop查看最近3封未读邮件"),排除导航干扰,失败即SOP问题
|
||||||
|
- 测导航能力:input只写目标,验证subagent能自主从insight找到正确SOP。禁止内联SOP内容
|
||||||
|
|
||||||
**标准流程(map-reduce)**:
|
**标准流程(map-reduce)**:
|
||||||
1. 主agent准备阶段:爬取/dump数据,存为多个独立输入文件
|
1. 主agent准备阶段:爬取/dump数据,存为多个独立输入文件
|
||||||
@@ -17,16 +19,23 @@
|
|||||||
3. 收集:等所有subagent完成,主agent读取各输出文件,汇总结果
|
3. 收集:等所有subagent完成,主agent读取各输出文件,汇总结果
|
||||||
|
|
||||||
## Task Mode 文件IO协议
|
## Task Mode 文件IO协议
|
||||||
- 目录:`./{task_name}/`,启动:`python agentmain.py --task {task_name} [--llm_no N]`(cwd=GenericAgent根)
|
- 目录:`temp/{task_name}/`(相对代码根GenericAgent/),主agent cwd在temp/时即 `./{task_name}/`
|
||||||
|
- 启动:`python agentmain.py --task {task_name} [--llm_no N]`(cwd=代码根)
|
||||||
- 流程:写 input.txt → 启动 → 轮询 output.txt → 读回复 → 写 reply.txt 继续 → 不写则5min自动退出
|
- 流程:写 input.txt → 启动 → 轮询 output.txt → 读回复 → 写 reply.txt 继续 → 不写则5min自动退出
|
||||||
|
- input.txt原则:写目标+约束,可指定SOP名。禁写具体实现步骤——除非主agent已读过该SOP确认正确。凭印象猜的步骤会误导subagent
|
||||||
|
- reply后subagent的后续输出在 output1.txt, output2.txt ... 中(递增编号),主agent需轮询这些文件
|
||||||
- output.txt 每轮覆盖写,用 mtime/size 判断新轮次
|
- output.txt 每轮覆盖写,用 mtime/size 判断新轮次
|
||||||
|
|
||||||
## 后台调用要点
|
## 后台调用要点
|
||||||
```python
|
```python
|
||||||
|
task_dir = os.path.join(agent_root, 'temp', task_name)
|
||||||
proc = subprocess.Popen(
|
proc = subprocess.Popen(
|
||||||
[sys.executable, 'agentmain.py', '--task', task_name],
|
[sys.executable, 'agentmain.py', '--task', task_name],
|
||||||
cwd=agent_root, creationflags=0x08000000)
|
cwd=agent_root, creationflags=0x08000000,
|
||||||
|
stdout=open(os.path.join(task_dir, 'stdout.log'), 'w', encoding='utf-8'),
|
||||||
|
stderr=open(os.path.join(task_dir, 'stderr.log'), 'w', encoding='utf-8'))
|
||||||
```
|
```
|
||||||
- 必须 Popen,禁止 subprocess.run(会阻塞)
|
- 必须 Popen,禁止 subprocess.run(会阻塞)
|
||||||
|
- stdout.log/stderr.log 用于调试subagent卡死、LLM调用失败等问题
|
||||||
- `--llm_no` 默认=sonnet 4.5,`--llm_no 1`=opus 4.6
|
- `--llm_no` 默认=sonnet 4.5,`--llm_no 1`=opus 4.6
|
||||||
- 文件统一 UTF-8,subagent 无 reply 5min 自动退出无需清理
|
- 文件统一 UTF-8,subagent 无 reply 5min 自动退出无需清理
|
||||||
Reference in New Issue
Block a user