refactor: agent loop done_hooks, inline_eval, scheduler logging & health_check
- agent_loop: for→while, _done_hooks callback mechanism, max_turns=40 - agentmain: pass self to handler, abort clears task queue - stapp: heartbeat yield for Streamlit StopException detection - ga: _inline_eval param for in-process eval, no_tool thresholds tuned - scheduler: logging, max_delay_hours, weekday repeat, health_check()
This commit is contained in:
@@ -46,14 +46,15 @@ def get_pretty_json(data):
|
|||||||
data["script"] = data["script"].replace("; ", ";\n ")
|
data["script"] = data["script"].replace("; ", ";\n ")
|
||||||
return json.dumps(data, indent=2, ensure_ascii=False).replace('\\n', '\n')
|
return json.dumps(data, indent=2, ensure_ascii=False).replace('\\n', '\n')
|
||||||
|
|
||||||
def agent_runner_loop(client, system_prompt, user_input, handler, tools_schema, max_turns=15, verbose=True, initial_user_content=None):
|
def agent_runner_loop(client, system_prompt, user_input, handler, tools_schema, max_turns=40, verbose=True, initial_user_content=None):
|
||||||
messages = [
|
messages = [
|
||||||
{"role": "system", "content": system_prompt},
|
{"role": "system", "content": system_prompt},
|
||||||
{"role": "user", "content": initial_user_content if initial_user_content is not None else user_input}
|
{"role": "user", "content": initial_user_content if initial_user_content is not None else user_input}
|
||||||
]
|
]
|
||||||
for turn in range(max_turns):
|
turn = 0; handler._done_hooks = []; handler.max_turns = max_turns
|
||||||
yield f"**LLM Running (Turn {turn+1}) ...**\n\n"
|
while turn < handler.max_turns:
|
||||||
if (turn+1) % 10 == 0: client.last_tools = '' # 每10轮重置一次工具描述,避免上下文过大导致的模型性能下降
|
turn += 1; yield f"**LLM Running (Turn {turn}) ...**\n\n"
|
||||||
|
if turn%10 == 0: client.last_tools = '' # 每10轮重置一次工具描述,避免上下文过大导致的模型性能下降
|
||||||
response_gen = client.chat(messages=messages, tools=tools_schema)
|
response_gen = client.chat(messages=messages, tools=tools_schema)
|
||||||
if verbose:
|
if verbose:
|
||||||
response = yield from response_gen
|
response = yield from response_gen
|
||||||
@@ -66,7 +67,7 @@ def agent_runner_loop(client, system_prompt, user_input, handler, tools_schema,
|
|||||||
else: tool_calls = [{'tool_name': tc.function.name, 'args': json.loads(tc.function.arguments)}
|
else: tool_calls = [{'tool_name': tc.function.name, 'args': json.loads(tc.function.arguments)}
|
||||||
for tc in response.tool_calls]
|
for tc in response.tool_calls]
|
||||||
|
|
||||||
next_prompt = ""
|
next_prompt = ""; should_exit = None
|
||||||
for ii, tc in enumerate(tool_calls):
|
for ii, tc in enumerate(tool_calls):
|
||||||
tool_name, args = tc['tool_name'], tc['args']
|
tool_name, args = tc['tool_name'], tc['args']
|
||||||
if tool_name == 'no_tool': pass
|
if tool_name == 'no_tool': pass
|
||||||
@@ -74,7 +75,7 @@ def agent_runner_loop(client, system_prompt, user_input, handler, tools_schema,
|
|||||||
showarg = get_pretty_json(args)
|
showarg = get_pretty_json(args)
|
||||||
if not verbose and len(showarg) > 200: showarg = showarg[:200] + ' ...'
|
if not verbose and len(showarg) > 200: showarg = showarg[:200] + ' ...'
|
||||||
yield f"🛠️ **正在调用工具:** `{tool_name}` 📥**参数:**\n````text\n{showarg}\n````\n"
|
yield f"🛠️ **正在调用工具:** `{tool_name}` 📥**参数:**\n````text\n{showarg}\n````\n"
|
||||||
handler.current_turn = turn + 1
|
handler.current_turn = turn
|
||||||
gen = handler.dispatch(tool_name, args, response, index=ii)
|
gen = handler.dispatch(tool_name, args, response, index=ii)
|
||||||
if verbose:
|
if verbose:
|
||||||
yield '`````\n'
|
yield '`````\n'
|
||||||
@@ -82,14 +83,17 @@ def agent_runner_loop(client, system_prompt, user_input, handler, tools_schema,
|
|||||||
yield '`````\n'
|
yield '`````\n'
|
||||||
else: outcome = exhaust(gen)
|
else: outcome = exhaust(gen)
|
||||||
|
|
||||||
if outcome.next_prompt is None: return {'result': 'CURRENT_TASK_DONE', 'data': outcome.data}
|
if outcome.should_exit: return {'result': 'EXITED', 'data': outcome.data} # should_exit is only used for immediate exit
|
||||||
if outcome.should_exit: return {'result': 'EXITED', 'data': outcome.data}
|
if not outcome.next_prompt:
|
||||||
|
should_exit = {'result': 'CURRENT_TASK_DONE', 'data': outcome.data}; break
|
||||||
if outcome.next_prompt.startswith('未知工具'): client.last_tools = ''
|
if outcome.next_prompt.startswith('未知工具'): client.last_tools = ''
|
||||||
|
|
||||||
if outcome.data is not None:
|
if outcome.data is not None:
|
||||||
datastr = json.dumps(outcome.data, ensure_ascii=False, default=json_default) if type(outcome.data) in [dict, list] else str(outcome.data)
|
datastr = json.dumps(outcome.data, ensure_ascii=False, default=json_default) if type(outcome.data) in [dict, list] else str(outcome.data)
|
||||||
next_prompt += f"<tool_result>\n{datastr}\n</tool_result>\n\n"
|
next_prompt += f"<tool_result>\n{datastr}\n</tool_result>\n\n"
|
||||||
next_prompt += outcome.next_prompt
|
next_prompt += outcome.next_prompt;
|
||||||
next_prompt = handler.next_prompt_patcher(next_prompt, None, turn+1)
|
if not next_prompt:
|
||||||
|
if len(handler._done_hooks) == 0: return should_exit
|
||||||
|
next_prompt += handler._done_hooks.pop(0)
|
||||||
|
next_prompt = handler.next_prompt_patcher(next_prompt, None, turn)
|
||||||
messages = [{"role": "user", "content": next_prompt}] # just new message, history is kept in *Session
|
messages = [{"role": "user", "content": next_prompt}] # just new message, history is kept in *Session
|
||||||
return {'result': 'MAX_TURNS_EXCEEDED'}
|
return {'result': 'MAX_TURNS_EXCEEDED'}
|
||||||
|
|||||||
@@ -100,7 +100,7 @@ class GeneraticAgent:
|
|||||||
|
|
||||||
sys_prompt = get_system_prompt()
|
sys_prompt = get_system_prompt()
|
||||||
script_dir = os.path.dirname(os.path.abspath(__file__))
|
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||||
handler = GenericAgentHandler(None, self.history, os.path.join(script_dir, 'temp'))
|
handler = GenericAgentHandler(self, self.history, os.path.join(script_dir, 'temp'))
|
||||||
if self.handler and 'key_info' in self.handler.working:
|
if self.handler and 'key_info' in self.handler.working:
|
||||||
ki = re.sub(r'\n\[SYSTEM\] 此为.*?工作记忆[。\n]*', '', self.handler.working['key_info']) # 去旧
|
ki = re.sub(r'\n\[SYSTEM\] 此为.*?工作记忆[。\n]*', '', self.handler.working['key_info']) # 去旧
|
||||||
handler.working['key_info'] = ki
|
handler.working['key_info'] = ki
|
||||||
@@ -115,6 +115,7 @@ class GeneraticAgent:
|
|||||||
initial_user_content = build_multimodal_content(user_input, images)
|
initial_user_content = build_multimodal_content(user_input, images)
|
||||||
elif images:
|
elif images:
|
||||||
print(f"[INFO] backend {type(self.llmclient.backend).__name__} does not support direct multimodal input, fallback to text attachment hints.")
|
print(f"[INFO] backend {type(self.llmclient.backend).__name__} does not support direct multimodal input, fallback to text attachment hints.")
|
||||||
|
# although new handler, the **full** history is in llmclient, so it is full history!
|
||||||
gen = agent_runner_loop(self.llmclient, sys_prompt, user_input,
|
gen = agent_runner_loop(self.llmclient, sys_prompt, user_input,
|
||||||
handler, TOOLS_SCHEMA, max_turns=40, verbose=self.verbose,
|
handler, TOOLS_SCHEMA, max_turns=40, verbose=self.verbose,
|
||||||
initial_user_content=initial_user_content)
|
initial_user_content=initial_user_content)
|
||||||
@@ -135,6 +136,9 @@ class GeneraticAgent:
|
|||||||
print(f"Backend Error: {format_error(e)}")
|
print(f"Backend Error: {format_error(e)}")
|
||||||
display_queue.put({'done': full_resp + f'\n```\n{format_error(e)}\n```', 'source': source})
|
display_queue.put({'done': full_resp + f'\n```\n{format_error(e)}\n```', 'source': source})
|
||||||
finally:
|
finally:
|
||||||
|
if self.stop_sig:
|
||||||
|
print('User aborted the task.')
|
||||||
|
with self.task_queue.mutex: self.task_queue.queue.clear()
|
||||||
self.is_running = self.stop_sig = False
|
self.is_running = self.stop_sig = False
|
||||||
self.task_queue.task_done()
|
self.task_queue.task_done()
|
||||||
if self.handler is not None: self.handler.code_stop_signal.append(1)
|
if self.handler is not None: self.handler.code_stop_signal.append(1)
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ except: pass
|
|||||||
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
||||||
|
|
||||||
import streamlit as st
|
import streamlit as st
|
||||||
import time, json, re, threading
|
import time, json, re, threading, queue
|
||||||
from agentmain import GeneraticAgent
|
from agentmain import GeneraticAgent
|
||||||
|
|
||||||
st.set_page_config(page_title="Cowork", layout="wide")
|
st.set_page_config(page_title="Cowork", layout="wide")
|
||||||
@@ -69,14 +69,18 @@ with st.sidebar: render_sidebar()
|
|||||||
|
|
||||||
def agent_backend_stream(prompt):
|
def agent_backend_stream(prompt):
|
||||||
display_queue = agent.put_task(prompt, source="user")
|
display_queue = agent.put_task(prompt, source="user")
|
||||||
|
response = ''
|
||||||
try:
|
try:
|
||||||
while True:
|
while True:
|
||||||
item = display_queue.get()
|
try: item = display_queue.get(timeout=1)
|
||||||
if 'next' in item: yield item['next']
|
except queue.Empty:
|
||||||
|
yield response # heartbeat: let outer st.markdown() run → Streamlit checks StopException
|
||||||
|
continue
|
||||||
|
if 'next' in item:
|
||||||
|
response = item['next']; yield response
|
||||||
if 'done' in item:
|
if 'done' in item:
|
||||||
yield item['done']; break
|
yield item['done']; break
|
||||||
finally:
|
finally: agent.abort()
|
||||||
agent.abort()
|
|
||||||
|
|
||||||
if "messages" not in st.session_state: st.session_state.messages = []
|
if "messages" not in st.session_state: st.session_state.messages = []
|
||||||
for msg in st.session_state.messages:
|
for msg in st.session_state.messages:
|
||||||
|
|||||||
28
ga.py
28
ga.py
@@ -299,7 +299,12 @@ class GenericAgentHandler(BaseHandler):
|
|||||||
raw_path = os.path.join(self.cwd, args.get("cwd", './'))
|
raw_path = os.path.join(self.cwd, args.get("cwd", './'))
|
||||||
cwd = os.path.normpath(os.path.abspath(raw_path))
|
cwd = os.path.normpath(os.path.abspath(raw_path))
|
||||||
code_cwd = os.path.normpath(self.cwd)
|
code_cwd = os.path.normpath(self.cwd)
|
||||||
result = yield from code_run(code, code_type, timeout, cwd, code_cwd=code_cwd, stop_signal=self.code_stop_signal)
|
if args.get("_inline_eval"):
|
||||||
|
ns = {'handler': self, 'parent': self.parent}
|
||||||
|
try: result = repr(eval(code, ns))
|
||||||
|
except SyntaxError: exec(code, ns); result = ns.get('_r', 'OK')
|
||||||
|
except Exception as e: result = f'Error: {e}'
|
||||||
|
else: result = yield from code_run(code, code_type, timeout, cwd, code_cwd=code_cwd, stop_signal=self.code_stop_signal)
|
||||||
next_prompt = self._get_anchor_prompt()
|
next_prompt = self._get_anchor_prompt()
|
||||||
return StepOutcome(result, next_prompt=next_prompt)
|
return StepOutcome(result, next_prompt=next_prompt)
|
||||||
|
|
||||||
@@ -394,8 +399,7 @@ class GenericAgentHandler(BaseHandler):
|
|||||||
with open(path, 'a' if mode == "append" else 'w', encoding="utf-8") as f: f.write(new_content)
|
with open(path, 'a' if mode == "append" else 'w', encoding="utf-8") as f: f.write(new_content)
|
||||||
yield f"[Status] ✅ {mode.capitalize()} 成功 ({len(new_content)} bytes)\n"
|
yield f"[Status] ✅ {mode.capitalize()} 成功 ({len(new_content)} bytes)\n"
|
||||||
next_prompt = self._get_anchor_prompt()
|
next_prompt = self._get_anchor_prompt()
|
||||||
return StepOutcome({"status": "success", 'writed_bytes': len(new_content)},
|
return StepOutcome({"status": "success", 'writed_bytes': len(new_content)}, next_prompt=next_prompt)
|
||||||
next_prompt=next_prompt)
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
yield f"[Status] ❌ 写入异常: {str(e)}\n"
|
yield f"[Status] ❌ 写入异常: {str(e)}\n"
|
||||||
return StepOutcome({"status": "error", "msg": str(e)}, next_prompt="\n")
|
return StepOutcome({"status": "error", "msg": str(e)}, next_prompt="\n")
|
||||||
@@ -437,31 +441,29 @@ class GenericAgentHandler(BaseHandler):
|
|||||||
def do_no_tool(self, args, response):
|
def do_no_tool(self, args, response):
|
||||||
'''这是一个特殊工具,由引擎自主调用,不要包含在TOOLS_SCHEMA里。
|
'''这是一个特殊工具,由引擎自主调用,不要包含在TOOLS_SCHEMA里。
|
||||||
当模型在一轮中未显式调用任何工具时,由引擎自动触发。
|
当模型在一轮中未显式调用任何工具时,由引擎自动触发。
|
||||||
二次确认仅在回复几乎只包含<thinking>/<summary>和一段大代码块时触发。
|
二次确认仅在回复几乎只包含<thinking>/<summary>和一段大代码块时触发。'''
|
||||||
'''
|
|
||||||
content = getattr(response, 'content', '') or ""
|
content = getattr(response, 'content', '') or ""
|
||||||
if not response or not content.strip():
|
if not response or not content.strip():
|
||||||
yield "[Warn] LLM returned an empty response. Retrying...\n"
|
yield "[Warn] LLM returned an empty response. Retrying...\n"
|
||||||
return StepOutcome({}, next_prompt="[System] Blank response, regenerate and tooluse", should_exit=False)
|
return StepOutcome({}, next_prompt="[System] Blank response, regenerate and tooluse")
|
||||||
if '流异常中断,未收到完整响应 !!!]' in content:
|
if '流异常中断,未收到完整响应 !!!]' in content:
|
||||||
return StepOutcome({}, next_prompt="[System] Incomplete response. Regenerate and tooluse.", should_exit=False)
|
return StepOutcome({}, next_prompt="[System] Incomplete response. Regenerate and tooluse.")
|
||||||
if 'max_tokens !!!]' in content:
|
if 'max_tokens !!!]' in content:
|
||||||
return StepOutcome({}, next_prompt="[System] max_tokens limit reached. Use multi small steps to do it.", should_exit=False)
|
return StepOutcome({}, next_prompt="[System] max_tokens limit reached. Use multi small steps to do it.")
|
||||||
# 2. 检测“包含较大代码块但未调用工具”的情况
|
# 2. 检测“包含较大代码块但未调用工具”的情况
|
||||||
# 这里通过三引号代码块 + 最少字符数的方式粗略判断“大段代码”
|
# 这里通过三引号代码块 + 最少字符数的方式粗略判断“大段代码”
|
||||||
code_block_pattern = r"```[a-zA-Z0-9_]*\n[\s\S]{100,}?```"
|
code_block_pattern = r"```[a-zA-Z0-9_]*\n[\s\S]{300,}?```"
|
||||||
m = re.search(code_block_pattern, content)
|
m = re.search(code_block_pattern, content)
|
||||||
if m:
|
if m:
|
||||||
# 仅当 content 由 <thinking> / <summary> 和该代码块构成时才触发二次确认
|
# 仅当 content 由 <thinking> / <summary> 和该代码块构成时才触发二次确认
|
||||||
residual = content
|
residual = content
|
||||||
# 去掉代码块本身
|
|
||||||
residual = residual.replace(m.group(0), "")
|
residual = residual.replace(m.group(0), "")
|
||||||
# 去掉<thinking>和<summary>块(大小写不敏感)
|
# 去掉<thinking>和<summary>块(大小写不敏感)
|
||||||
residual = re.sub(r"<thinking>[\s\S]*?</thinking>", "", residual, flags=re.IGNORECASE)
|
residual = re.sub(r"<thinking>[\s\S]*?</thinking>", "", residual, flags=re.IGNORECASE)
|
||||||
residual = re.sub(r"<summary>[\s\S]*?</summary>", "", residual, flags=re.IGNORECASE)
|
residual = re.sub(r"<summary>[\s\S]*?</summary>", "", residual, flags=re.IGNORECASE)
|
||||||
# 如果去除上述结构后的非空白字符很少,说明没有额外自然语言说明
|
# 如果去除上述结构后的非空白字符很少,说明没有额外自然语言说明
|
||||||
clean_residual = re.sub(r"\s+", "", residual)
|
clean_residual = re.sub(r"\s+", "", residual)
|
||||||
if len(clean_residual) <= 50:
|
if len(clean_residual) <= 20:
|
||||||
yield "[Info] Detected large code block without tool call and no extra natural language. Requesting clarification.\n"
|
yield "[Info] Detected large code block without tool call and no extra natural language. Requesting clarification.\n"
|
||||||
next_prompt = (
|
next_prompt = (
|
||||||
"[System] 检测到你在上一轮回复中主要内容是较大代码块(仅配有<thinking>/<summary>),且本轮未调用任何工具。\n"
|
"[System] 检测到你在上一轮回复中主要内容是较大代码块(仅配有<thinking>/<summary>),且本轮未调用任何工具。\n"
|
||||||
@@ -470,10 +472,10 @@ class GenericAgentHandler(BaseHandler):
|
|||||||
"如果只是向用户展示或讲解代码片段,请在回复中补充自然语言说明,"
|
"如果只是向用户展示或讲解代码片段,请在回复中补充自然语言说明,"
|
||||||
"并明确是否还需要额外的实际操作。"
|
"并明确是否还需要额外的实际操作。"
|
||||||
)
|
)
|
||||||
return StepOutcome({}, next_prompt=next_prompt, should_exit=False)
|
return StepOutcome({}, next_prompt=next_prompt)
|
||||||
# 3. 正常情况:直接将回复返回给用户并结束循环
|
# 3. 正常情况:直接将回复返回给用户并结束循环
|
||||||
yield "[Info] Final response to user.\n"
|
yield "[Info] Final response to user.\n"
|
||||||
return StepOutcome(response, next_prompt=None, should_exit=True)
|
return StepOutcome(response, next_prompt=None)
|
||||||
|
|
||||||
def do_start_long_term_update(self, args, response):
|
def do_start_long_term_update(self, args, response):
|
||||||
'''Agent觉得当前任务完成后有重要信息需要记忆时调用此工具。'''
|
'''Agent觉得当前任务完成后有重要信息需要记忆时调用此工具。'''
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
import os, json, socket as _socket
|
import os, json, socket as _socket, logging
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
|
|
||||||
# 端口锁:防止重复启动,bind失败时agentmain会直接崩溃退出
|
# 端口锁:防止重复启动,bind失败时agentmain会直接崩溃退出
|
||||||
@@ -14,11 +14,24 @@ ONCE = False
|
|||||||
_dir = os.path.dirname(os.path.abspath(__file__))
|
_dir = os.path.dirname(os.path.abspath(__file__))
|
||||||
TASKS = os.path.join(_dir, '../sche_tasks')
|
TASKS = os.path.join(_dir, '../sche_tasks')
|
||||||
DONE = os.path.join(_dir, '../sche_tasks/done')
|
DONE = os.path.join(_dir, '../sche_tasks/done')
|
||||||
|
_LOG = os.path.join(_dir, '../sche_tasks/scheduler.log')
|
||||||
|
|
||||||
|
# --- 日志 ---
|
||||||
|
_logger = logging.getLogger('scheduler')
|
||||||
|
if not _logger.handlers:
|
||||||
|
_logger.setLevel(logging.INFO)
|
||||||
|
_fh = logging.FileHandler(_LOG, encoding='utf-8')
|
||||||
|
_fh.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s',
|
||||||
|
datefmt='%Y-%m-%d %H:%M'))
|
||||||
|
_logger.addHandler(_fh)
|
||||||
|
|
||||||
|
# 默认最大延迟窗口(小时),超过此时间不触发
|
||||||
|
DEFAULT_MAX_DELAY = 6
|
||||||
|
|
||||||
def _parse_cooldown(repeat):
|
def _parse_cooldown(repeat):
|
||||||
"""解析repeat为冷却时间(比实际周期略短,防漂移)"""
|
"""解析repeat为冷却时间(比实际周期略短,防漂移)"""
|
||||||
if repeat == 'once': return timedelta(days=999999)
|
if repeat == 'once': return timedelta(days=999999)
|
||||||
if repeat == 'daily': return timedelta(hours=20)
|
if repeat in ('daily', 'weekday'): return timedelta(hours=20)
|
||||||
if repeat == 'weekly': return timedelta(days=6)
|
if repeat == 'weekly': return timedelta(days=6)
|
||||||
if repeat == 'monthly': return timedelta(days=27)
|
if repeat == 'monthly': return timedelta(days=27)
|
||||||
if repeat.startswith('every_'):
|
if repeat.startswith('every_'):
|
||||||
@@ -28,6 +41,7 @@ def _parse_cooldown(repeat):
|
|||||||
if u == 'h': return timedelta(hours=n)
|
if u == 'h': return timedelta(hours=n)
|
||||||
if u == 'm': return timedelta(minutes=n)
|
if u == 'm': return timedelta(minutes=n)
|
||||||
if u == 'd': return timedelta(days=n)
|
if u == 'd': return timedelta(days=n)
|
||||||
|
_logger.warning(f'Unknown repeat type: {repeat}, fallback to 20h cooldown')
|
||||||
return timedelta(hours=20)
|
return timedelta(hours=20)
|
||||||
|
|
||||||
def _last_run(tid, done_files):
|
def _last_run(tid, done_files):
|
||||||
@@ -51,22 +65,42 @@ def check():
|
|||||||
tid = f[:-5]
|
tid = f[:-5]
|
||||||
try:
|
try:
|
||||||
task = json.loads(open(os.path.join(TASKS, f), encoding='utf-8').read())
|
task = json.loads(open(os.path.join(TASKS, f), encoding='utf-8').read())
|
||||||
except: continue
|
except Exception as e:
|
||||||
|
_logger.error(f'JSON parse error for {f}: {e}')
|
||||||
|
continue
|
||||||
if not task.get('enabled', False): continue
|
if not task.get('enabled', False): continue
|
||||||
|
|
||||||
repeat = task.get('repeat', 'daily')
|
repeat = task.get('repeat', 'daily')
|
||||||
sched = task.get('schedule', '00:00')
|
sched = task.get('schedule', '00:00')
|
||||||
|
try:
|
||||||
h, m = map(int, sched.split(':'))
|
h, m = map(int, sched.split(':'))
|
||||||
|
except Exception as e:
|
||||||
|
_logger.error(f'Invalid schedule format in {f}: {sched!r} ({e})')
|
||||||
|
continue
|
||||||
|
|
||||||
|
# weekday任务:周末跳过
|
||||||
|
if repeat == 'weekday' and now.weekday() >= 5: continue
|
||||||
|
|
||||||
# 还没到schedule时间就跳过
|
# 还没到schedule时间就跳过
|
||||||
if now.hour < h or (now.hour == h and now.minute < m): continue
|
if now.hour < h or (now.hour == h and now.minute < m): continue
|
||||||
|
|
||||||
|
# 执行窗口检查:超过max_delay小时则跳过(防止开机太晚触发过时任务)
|
||||||
|
max_delay = task.get('max_delay_hours', DEFAULT_MAX_DELAY)
|
||||||
|
sched_minutes = h * 60 + m
|
||||||
|
now_minutes = now.hour * 60 + now.minute
|
||||||
|
if (now_minutes - sched_minutes) > max_delay * 60:
|
||||||
|
_logger.info(f'SKIP {tid}: {now_minutes - sched_minutes}min past schedule, '
|
||||||
|
f'exceeds max_delay={max_delay}h')
|
||||||
|
continue
|
||||||
|
|
||||||
# 检查冷却
|
# 检查冷却
|
||||||
last = _last_run(tid, done_files)
|
last = _last_run(tid, done_files)
|
||||||
cooldown = _parse_cooldown(repeat)
|
cooldown = _parse_cooldown(repeat)
|
||||||
if last and (now - last) < cooldown: continue
|
if last and (now - last) < cooldown: continue
|
||||||
|
|
||||||
# 触发
|
# 触发
|
||||||
|
_logger.info(f'TRIGGER {tid} (repeat={repeat}, schedule={sched}, '
|
||||||
|
f'last_run={last})')
|
||||||
ts = now.strftime('%Y-%m-%d_%H%M')
|
ts = now.strftime('%Y-%m-%d_%H%M')
|
||||||
rpt = os.path.join(DONE, f'{ts}_{tid}.md')
|
rpt = os.path.join(DONE, f'{ts}_{tid}.md')
|
||||||
prompt = task.get('prompt', '')
|
prompt = task.get('prompt', '')
|
||||||
@@ -76,3 +110,49 @@ def check():
|
|||||||
f'{prompt}\n\n'
|
f'{prompt}\n\n'
|
||||||
f'完成后将执行报告写入 {rpt}。')
|
f'完成后将执行报告写入 {rpt}。')
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def health_check():
|
||||||
|
"""检查所有定时任务的健康状态,返回结构化报告"""
|
||||||
|
if not os.path.isdir(TASKS):
|
||||||
|
return {'error': 'TASKS directory not found'}
|
||||||
|
now = datetime.now()
|
||||||
|
os.makedirs(DONE, exist_ok=True)
|
||||||
|
done_files = set(os.listdir(DONE))
|
||||||
|
results = []
|
||||||
|
for f in sorted(os.listdir(TASKS)):
|
||||||
|
if not f.endswith('.json'): continue
|
||||||
|
tid = f[:-5]
|
||||||
|
try:
|
||||||
|
task = json.loads(open(os.path.join(TASKS, f), encoding='utf-8').read())
|
||||||
|
except Exception as e:
|
||||||
|
results.append({'task': tid, 'status': 'ERROR', 'detail': f'JSON parse: {e}'})
|
||||||
|
continue
|
||||||
|
|
||||||
|
enabled = task.get('enabled', False)
|
||||||
|
repeat = task.get('repeat', 'daily')
|
||||||
|
sched = task.get('schedule', '00:00')
|
||||||
|
last = _last_run(tid, done_files)
|
||||||
|
cooldown = _parse_cooldown(repeat)
|
||||||
|
|
||||||
|
# 判断健康状态
|
||||||
|
if not enabled:
|
||||||
|
status = 'DISABLED'
|
||||||
|
elif last is None:
|
||||||
|
status = 'NEVER_RUN'
|
||||||
|
elif repeat == 'once':
|
||||||
|
status = 'COMPLETED' if last else 'PENDING'
|
||||||
|
else:
|
||||||
|
# 检查是否超过预期间隔的1.5倍
|
||||||
|
expected_gap = cooldown * 1.25 # 略大于冷却时间
|
||||||
|
if (now - last) > expected_gap:
|
||||||
|
status = 'OVERDUE'
|
||||||
|
else:
|
||||||
|
status = 'HEALTHY'
|
||||||
|
|
||||||
|
results.append({
|
||||||
|
'task': tid, 'status': status, 'enabled': enabled,
|
||||||
|
'repeat': repeat, 'schedule': sched,
|
||||||
|
'last_run': last.strftime('%Y-%m-%d %H:%M') if last else None,
|
||||||
|
'cooldown_hours': cooldown.total_seconds() / 3600,
|
||||||
|
})
|
||||||
|
return results
|
||||||
|
|||||||
Reference in New Issue
Block a user