From 70ed09ad2b42c3787da110848164bd01c7f7a16b Mon Sep 17 00:00:00 2001 From: YangChen-pro <1369792882@qq.com> Date: Wed, 18 Mar 2026 22:54:58 +0800 Subject: [PATCH] Support configurable OpenAI reasoning effort --- llmcore.py | 6 ++++++ mykey_template.py | 2 ++ 2 files changed, 8 insertions(+) diff --git a/llmcore.py b/llmcore.py index 2c46a0f..8df4fb9 100644 --- a/llmcore.py +++ b/llmcore.py @@ -132,6 +132,10 @@ class LLMSession: self.max_retries = max(0, int(cfg.get('max_retries', 2))) self.connect_timeout = max(1, int(cfg.get('connect_timeout', 10))) self.read_timeout = max(5, int(cfg.get('read_timeout', 120))) + effort = cfg.get('reasoning_effort') + effort = None if effort is None else str(effort).strip().lower() + self.reasoning_effort = effort if effort in ['none', 'minimal','low', 'medium', 'high', 'xhigh'] else None + if effort and self.reasoning_effort is None: print(f"[WARN] Invalid reasoning_effort {effort!r}, ignored.") mode = str(cfg.get('api_mode', 'chat_completions')).strip().lower().replace('-', '_') if mode in ["responses", "response"]: self.api_mode = "responses" else: self.api_mode = "chat_completions" @@ -176,9 +180,11 @@ class LLMSession: if self.api_mode == "responses": url = auto_make_url(self.api_base, "responses") payload = {"model": model, "input": self._to_responses_input(messages), "temperature": temperature, "stream": True} + if self.reasoning_effort: payload["reasoning"] = {"effort": self.reasoning_effort} else: url = auto_make_url(self.api_base, "chat/completions") payload = {"model": model, "messages": messages, "temperature": temperature, "stream": True} + if self.reasoning_effort: payload["reasoning_effort"] = self.reasoning_effort for attempt in range(self.max_retries + 1): streamed_any = False try: diff --git a/mykey_template.py b/mykey_template.py index bf7c34f..69447a9 100644 --- a/mykey_template.py +++ b/mykey_template.py @@ -4,6 +4,8 @@ oai_config = { 'apibase':"http://243.55.19.137:2001", 'model':"openai/gpt-5.1", 'api_mode':"chat_completions", # optional: "chat_completions" | "responses" + # 'reasoning_effort': "low", # optional: none | low | medium | high | xhigh; + # Only OPENAI models support this parameter, and it may not be supported by all models. It is used to control the amount of reasoning effort the model should use when generating a response. Higher values may result in more accurate and detailed responses, but may also take longer to generate. 'max_retries': 2, # optional: retries for 429/timeout/5xx 'connect_timeout': 10, # optional: seconds 'read_timeout': 120 # optional: seconds (stream read)