refactor: use with statement for requests.post in NativeClaudeSession
This commit is contained in:
18
llmcore.py
18
llmcore.py
@@ -1,6 +1,7 @@
|
|||||||
import os, json, re, time, requests, sys, threading, urllib3, base64, mimetypes, uuid
|
import os, json, re, time, requests, sys, threading, urllib3, base64, mimetypes, uuid
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||||
|
_RESP_CACHE_KEY = str(uuid.uuid4())
|
||||||
|
|
||||||
def _load_mykeys():
|
def _load_mykeys():
|
||||||
try:
|
try:
|
||||||
@@ -274,7 +275,7 @@ def _openai_stream(api_base, api_key, messages, model, api_mode='chat_completion
|
|||||||
headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json", "Accept": "text/event-stream"}
|
headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json", "Accept": "text/event-stream"}
|
||||||
if api_mode == "responses":
|
if api_mode == "responses":
|
||||||
url = auto_make_url(api_base, "responses")
|
url = auto_make_url(api_base, "responses")
|
||||||
payload = {"model": model, "input": _to_responses_input(messages), "stream": True}
|
payload = {"model": model, "input": _to_responses_input(messages), "stream": True, "prompt_cache_key": _RESP_CACHE_KEY}
|
||||||
if reasoning_effort: payload["reasoning"] = {"effort": reasoning_effort}
|
if reasoning_effort: payload["reasoning"] = {"effort": reasoning_effort}
|
||||||
else:
|
else:
|
||||||
url = auto_make_url(api_base, "chat/completions")
|
url = auto_make_url(api_base, "chat/completions")
|
||||||
@@ -446,6 +447,7 @@ class BaseSession:
|
|||||||
self.api_mode = 'responses' if mode in ('responses', 'response') else 'chat_completions'
|
self.api_mode = 'responses' if mode in ('responses', 'response') else 'chat_completions'
|
||||||
self.temperature = cfg.get('temperature', 1.0)
|
self.temperature = cfg.get('temperature', 1.0)
|
||||||
self.max_tokens = cfg.get('max_tokens', 8192)
|
self.max_tokens = cfg.get('max_tokens', 8192)
|
||||||
|
self.stream = cfg.get('stream', True)
|
||||||
def _apply_claude_thinking(self, payload):
|
def _apply_claude_thinking(self, payload):
|
||||||
if self.thinking_type:
|
if self.thinking_type:
|
||||||
thinking = {"type": self.thinking_type}
|
thinking = {"type": self.thinking_type}
|
||||||
@@ -542,7 +544,7 @@ class NativeClaudeSession(BaseSession):
|
|||||||
"user-agent": "claude-cli/2.1.90 (external, cli)", "x-app": "cli"}
|
"user-agent": "claude-cli/2.1.90 (external, cli)", "x-app": "cli"}
|
||||||
if self.api_key.startswith("sk-ant-"): headers["x-api-key"] = self.api_key
|
if self.api_key.startswith("sk-ant-"): headers["x-api-key"] = self.api_key
|
||||||
else: headers["authorization"] = f"Bearer {self.api_key}"
|
else: headers["authorization"] = f"Bearer {self.api_key}"
|
||||||
payload = {"model": model, "messages": messages, "temperature": self.temperature, "max_tokens": self.max_tokens, "stream": True}
|
payload = {"model": model, "messages": messages, "temperature": self.temperature, "max_tokens": self.max_tokens, "stream": self.stream}
|
||||||
self._apply_claude_thinking(payload)
|
self._apply_claude_thinking(payload)
|
||||||
payload["metadata"] = {"user_id": json.dumps({"device_id": self._device_id, "account_uuid": self._account_uuid, "session_id": self._session_id}, separators=(',', ':'))}
|
payload["metadata"] = {"user_id": json.dumps({"device_id": self._device_id, "account_uuid": self._account_uuid, "session_id": self._session_id}, separators=(',', ':'))}
|
||||||
if self.tools:
|
if self.tools:
|
||||||
@@ -559,9 +561,17 @@ class NativeClaudeSession(BaseSession):
|
|||||||
messages[idx] = {**messages[idx], "content": list(messages[idx]["content"])}
|
messages[idx] = {**messages[idx], "content": list(messages[idx]["content"])}
|
||||||
messages[idx]["content"][-1] = dict(messages[idx]["content"][-1], cache_control={"type": "ephemeral"})
|
messages[idx]["content"][-1] = dict(messages[idx]["content"][-1], cache_control={"type": "ephemeral"})
|
||||||
try:
|
try:
|
||||||
resp = requests.post(auto_make_url(self.api_base, "messages")+'?beta=true', headers=headers, json=payload, stream=True, timeout=(self.connect_timeout, self.read_timeout))
|
with requests.post(auto_make_url(self.api_base, "messages")+'?beta=true', headers=headers, json=payload, stream=self.stream, timeout=(self.connect_timeout, self.read_timeout)) as resp:
|
||||||
if resp.status_code != 200: raise Exception(f"HTTP {resp.status_code} {resp.content.decode('utf-8', errors='replace')[:500]}")
|
if resp.status_code != 200: raise Exception(f"HTTP {resp.status_code} {resp.content.decode('utf-8', errors='replace')[:500]}")
|
||||||
return (yield from _parse_claude_sse(resp.iter_lines())) or []
|
if self.stream: return (yield from _parse_claude_sse(resp.iter_lines())) or []
|
||||||
|
else:
|
||||||
|
data = resp.json(); content_blocks = data.get("content", [])
|
||||||
|
usage = data.get("usage", {})
|
||||||
|
print(f"[Cache] input={usage.get('input_tokens',0)} creation={usage.get('cache_creation_input_tokens',0)} read={usage.get('cache_read_input_tokens',0)}")
|
||||||
|
for b in content_blocks:
|
||||||
|
if b.get("type") == "text": yield b.get("text", "")
|
||||||
|
elif b.get("type") == "thinking": yield ""
|
||||||
|
return content_blocks
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
yield (err := f"Error: {e}")
|
yield (err := f"Error: {e}")
|
||||||
return [{"type": "text", "text": err}]
|
return [{"type": "text", "text": err}]
|
||||||
|
|||||||
Reference in New Issue
Block a user