Merge PR #39: Add reasoning_effort config support

This commit is contained in:
Jiaqing Liang
2026-03-19 14:38:23 +08:00
2 changed files with 8 additions and 0 deletions

View File

@@ -132,6 +132,10 @@ class LLMSession:
self.max_retries = max(0, int(cfg.get('max_retries', 2))) self.max_retries = max(0, int(cfg.get('max_retries', 2)))
self.connect_timeout = max(1, int(cfg.get('connect_timeout', 10))) self.connect_timeout = max(1, int(cfg.get('connect_timeout', 10)))
self.read_timeout = max(5, int(cfg.get('read_timeout', 120))) self.read_timeout = max(5, int(cfg.get('read_timeout', 120)))
effort = cfg.get('reasoning_effort')
effort = None if effort is None else str(effort).strip().lower()
self.reasoning_effort = effort if effort in ['none', 'minimal','low', 'medium', 'high', 'xhigh'] else None
if effort and self.reasoning_effort is None: print(f"[WARN] Invalid reasoning_effort {effort!r}, ignored.")
mode = str(cfg.get('api_mode', 'chat_completions')).strip().lower().replace('-', '_') mode = str(cfg.get('api_mode', 'chat_completions')).strip().lower().replace('-', '_')
if mode in ["responses", "response"]: self.api_mode = "responses" if mode in ["responses", "response"]: self.api_mode = "responses"
else: self.api_mode = "chat_completions" else: self.api_mode = "chat_completions"
@@ -176,9 +180,11 @@ class LLMSession:
if self.api_mode == "responses": if self.api_mode == "responses":
url = auto_make_url(self.api_base, "responses") url = auto_make_url(self.api_base, "responses")
payload = {"model": model, "input": self._to_responses_input(messages), "temperature": temperature, "stream": True} payload = {"model": model, "input": self._to_responses_input(messages), "temperature": temperature, "stream": True}
if self.reasoning_effort: payload["reasoning"] = {"effort": self.reasoning_effort}
else: else:
url = auto_make_url(self.api_base, "chat/completions") url = auto_make_url(self.api_base, "chat/completions")
payload = {"model": model, "messages": messages, "temperature": temperature, "stream": True} payload = {"model": model, "messages": messages, "temperature": temperature, "stream": True}
if self.reasoning_effort: payload["reasoning_effort"] = self.reasoning_effort
for attempt in range(self.max_retries + 1): for attempt in range(self.max_retries + 1):
streamed_any = False streamed_any = False
try: try:

View File

@@ -4,6 +4,8 @@ oai_config = {
'apibase':"http://243.55.19.137:2001", 'apibase':"http://243.55.19.137:2001",
'model':"openai/gpt-5.1", 'model':"openai/gpt-5.1",
'api_mode':"chat_completions", # optional: "chat_completions" | "responses" 'api_mode':"chat_completions", # optional: "chat_completions" | "responses"
# 'reasoning_effort': "low", # optional: none | low | medium | high | xhigh;
# Only OPENAI models support this parameter, and it may not be supported by all models. It is used to control the amount of reasoning effort the model should use when generating a response. Higher values may result in more accurate and detailed responses, but may also take longer to generate.
'max_retries': 2, # optional: retries for 429/timeout/5xx 'max_retries': 2, # optional: retries for 429/timeout/5xx
'connect_timeout': 10, # optional: seconds 'connect_timeout': 10, # optional: seconds
'read_timeout': 120 # optional: seconds (stream read) 'read_timeout': 120 # optional: seconds (stream read)