From 086599a5d6dd02bb6a37b1c5144279e05087e164 Mon Sep 17 00:00:00 2001 From: Jiaqing Liang Date: Mon, 13 Apr 2026 14:59:38 +0800 Subject: [PATCH] fix: scroll ghost height reflow via overflow toggle; extend cache markers to last 2 user msgs; simplify cursor & merge JS fixes --- frontends/stapp.py | 29 ++++++++++++++++++++++++----- llmcore.py | 26 ++++++++++++++++++++++---- 2 files changed, 46 insertions(+), 9 deletions(-) diff --git a/frontends/stapp.py b/frontends/stapp.py index f4ee915..ed271cd 100644 --- a/frontends/stapp.py +++ b/frontends/stapp.py @@ -27,6 +27,7 @@ agent = init() st.title("🖥️ Cowork") + if 'autonomous_enabled' not in st.session_state: st.session_state.autonomous_enabled = False @st.fragment @@ -97,7 +98,7 @@ def render_segments(segments, suffix=''): if seg['type'] == 'fold': with st.expander(seg['title'], expanded=False): st.markdown(seg['content']) else: - st.markdown(seg['content'] + suffix, unsafe_allow_html=not not suffix) + st.markdown(seg['content'] + suffix) def agent_backend_stream(prompt): display_queue = agent.put_task(prompt, source="user") @@ -123,10 +124,28 @@ for msg in st.session_state.messages: if msg["role"] == "assistant": render_segments(fold_turns(msg["content"])) else: st.markdown(msg["content"]) +# Scroll-height ghost fix: during streaming, expander open/close mid-animation can leave +# phantom height → scrollbar long but can't scroll to bottom. Periodically detect & reflow. +import streamlit.components.v1 as components +_js_scroll_fix = ("!function(){var p=window.parent;if(p.__sfx)return;p.__sfx=1;" + "var d=p.document;setInterval(function(){" + "var m=d.querySelector('section.main');if(!m)return;" + "var b=m.querySelector('.block-container');if(!b)return;" + "if(m.scrollHeight>b.scrollHeight+150){" + "m.style.overflow='hidden';void m.offsetHeight;m.style.overflow=''}" + "},3000)}()") # IME composition fix (macOS only) - prevents Enter from submitting during CJK input -if os.name != 'nt': - import streamlit.components.v1 as components - components.html('', height=0) +_js_ime_fix = ("" if os.name == 'nt' else + "!function(){if(window.parent.__imeFix)return;window.parent.__imeFix=1;" + "var d=window.parent.document,c=0;" + "d.addEventListener('compositionstart',()=>c=1,!0);" + "d.addEventListener('compositionend',()=>c=0,!0);" + "function f(){d.querySelectorAll('textarea[data-testid=stChatInputTextArea]')" + ".forEach(t=>{t.__imeFix||(t.__imeFix=1,t.addEventListener('keydown',e=>{" + "e.key==='Enter'&&!e.shiftKey&&(e.isComposing||c||e.keyCode===229)&&" + "(e.stopImmediatePropagation(),e.preventDefault())},!0))})}" + "f();new MutationObserver(f).observe(d.body,{childList:1,subtree:1})}()") +components.html(f'', height=0) if prompt := st.chat_input("请输入指令"): st.session_state.messages.append({"role": "user", "content": prompt}) @@ -134,7 +153,7 @@ if prompt := st.chat_input("请输入指令"): with st.chat_message("assistant"): slot = st.empty(); response = '' - CURSOR = '' + CURSOR = ' ▌' for response in agent_backend_stream(prompt): # 每轮整块重画(含 heartbeat 空转):segments 不变时 Streamlit diff 零变更 → 不闪烁; # 而 slot.container() 调用本身保证 Streamlit 能抛 StopException(abort 生效) diff --git a/llmcore.py b/llmcore.py index fa4c15e..4236a2f 100644 --- a/llmcore.py +++ b/llmcore.py @@ -267,6 +267,19 @@ def _parse_openai_sse(resp_lines, api_mode="chat_completions"): blocks.append({"type": "tool_use", "id": tc["id"], "name": tc["name"], "input": inp}) return blocks +def _stamp_oai_cache_markers(messages, model): + """Add cache_control to last 2 user messages for Anthropic models via OAI-compatible relay.""" + ml = model.lower() + if not any(k in ml for k in ('claude', 'anthropic')): return + user_idxs = [i for i, m in enumerate(messages) if m.get('role') == 'user'] + for idx in user_idxs[-2:]: + c = messages[idx].get('content') + if isinstance(c, str): + messages[idx] = {**messages[idx], 'content': [{'type': 'text', 'text': c, 'cache_control': {'type': 'ephemeral'}}]} + elif isinstance(c, list) and c: + c = list(c); c[-1] = dict(c[-1], cache_control={'type': 'ephemeral'}) + messages[idx] = {**messages[idx], 'content': c} + def _openai_stream(api_base, api_key, messages, model, api_mode='chat_completions', *, temperature=0.5, max_tokens=None, tools=None, reasoning_effort=None, max_retries=0, connect_timeout=10, read_timeout=300, proxies=None): @@ -281,6 +294,7 @@ def _openai_stream(api_base, api_key, messages, model, api_mode='chat_completion if reasoning_effort: payload["reasoning"] = {"effort": reasoning_effort} else: url = auto_make_url(api_base, "chat/completions") + _stamp_oai_cache_markers(messages, model) payload = {"model": model, "messages": messages, "temperature": temperature, "stream": True, "stream_options": {"include_usage": True}} if max_tokens: payload["max_tokens"] = max_tokens if reasoning_effort: payload["reasoning_effort"] = reasoning_effort @@ -483,8 +497,9 @@ class ClaudeSession(BaseSession): return [{"type": "text", "text": err}] def make_messages(self, raw_list): msgs = [{"role": m['role'], "content": list(m['content'])} for m in raw_list] - c = msgs[-1]["content"] - c[-1] = dict(c[-1], cache_control={"type": "ephemeral"}) + user_idxs = [i for i, m in enumerate(msgs) if m['role'] == 'user'] + for idx in user_idxs[-2:]: + msgs[idx]["content"][-1] = dict(msgs[idx]["content"][-1], cache_control={"type": "ephemeral"}) return msgs class LLMSession(BaseSession): @@ -544,8 +559,10 @@ class NativeClaudeSession(BaseSession): if self.system: if self.fake_cc_system_prompt: messages[0]["content"].insert(0, {"type": "text", "text": self.system}) else: payload["system"] = [{"type": "text", "text": self.system}] - messages[-1] = {**messages[-1], "content": list(messages[-1]["content"])} - messages[-1]["content"][-1] = dict(messages[-1]["content"][-1], cache_control={"type": "ephemeral"}) + user_idxs = [i for i, m in enumerate(messages) if m['role'] == 'user'] + for idx in user_idxs[-2:]: + messages[idx] = {**messages[idx], "content": list(messages[idx]["content"])} + messages[idx]["content"][-1] = dict(messages[idx]["content"][-1], cache_control={"type": "ephemeral"}) try: resp = requests.post(auto_make_url(self.api_base, "messages")+'?beta=true', headers=headers, json=payload, stream=True, timeout=(self.connect_timeout, self.read_timeout)) if resp.status_code != 200: raise Exception(f"HTTP {resp.status_code} {resp.content.decode('utf-8', errors='replace')[:500]}") @@ -819,6 +836,7 @@ class MixinSession: for attempt in range(self._retries + 1): idx = (base + attempt) % n gen = self._orig_raw_asks[idx](*args, **kwargs) + print(f'[MixinSession] Using session ({self._sessions[idx].name})') last_chunk, return_val, yielded = None, [], False try: while True: