Spaces:

SiennaClarke
/

ChatBoxApp

Sleeping

App Files Files Community

SiennaClarke commited on about 1 month ago

Commit

d7f5026

verified ·

1 Parent(s): 4c6dc8e

Update app.py

Browse files

Files changed (1) hide show

app.py +55 -63

app.py CHANGED Viewed

@@ -1,90 +1,82 @@
 import streamlit as st
-from llama_cpp import Llama
-import re
-# Page configuration
-st.set_page_config(page_title="Qwen 3 Advanced AI", page_icon="🧠", layout="wide")
-# 1. Model Configuration
-# Qwen 3 4B Thinking is the flagship 2026 small model with deep reasoning
-MODEL_REPO = "unsloth/Qwen3-4B-Thinking-2507-GGUF"
-MODEL_FILE = "Qwen3-4B-Thinking-2507-Q4_K_M.gguf"
 @st.cache_resource
-def load_qwen():
-    return Llama.from_pretrained(
-        repo_id=MODEL_REPO,
-        filename=MODEL_FILE,
-        n_ctx=8192,    # Sufficient context for long reasoning chains
-        n_threads=4,   # Optimized for standard multi-core CPUs
-        verbose=False
     )
-llm = load_qwen()
-# 2. UI Elements
-st.title("🧠 Qwen 3 Reasoning Hub")
-st.markdown("This model uses **Native Thinking** to solve logic, math, and code.")
 if "messages" not in st.session_state:
     st.session_state.messages = []
-# Sidebar for Mode Toggle
-with st.sidebar:
-    st.header("Settings")
-    reasoning_on = st.toggle("Enable Deep Reasoning (/think)", value=True)
-    if st.button("Clear Chat"):
         st.session_state.messages = []
         st.rerun()
-# Display Chat History
 for msg in st.session_state.messages:
     with st.chat_message(msg["role"]):
         st.markdown(msg["content"])
-# 3. Main Chat Logic
-if prompt := st.chat_input("Ask a difficult logic question..."):
     st.session_state.messages.append({"role": "user", "content": prompt})
     with st.chat_message("user"):
         st.markdown(prompt)
     with st.chat_message("assistant"):
-        # Qwen 3 Template with 'Soft Switch'
-        prefix = "/think " if reasoning_on else "/no_think "
-        formatted_prompt = f"<|im_start|>user\n{prefix}{prompt}<|im_end|>\n<|im_start|>assistant\n"
-        response_placeholder = st.empty()
-        full_text = ""
-        # Stream the response
-        # Using Temperature 0.6 as per Qwen 3 official best practices for thinking
-        for chunk in llm(
-            formatted_prompt,
-            max_tokens=2048,
-            stream=True,
-            stop=["<|im_end|>"],
-            temperature=0.6,
-            top_p=0.95
-        ):
-            token = chunk['choices'][0]['text']
-            full_text += token
-            # Format the <think> block for better UI
-            # This hides the thinking process inside a blockquote
-            display_text = full_text
-            if "<think>" in display_text:
-                parts = re.split(r'(<think>.*?</think>)', display_text, flags=re.DOTALL)
-                clean_display = ""
-                for part in parts:
-                    if part.startswith("<think>"):
-                        thought = part.replace("<think>", "").replace("</think>", "").strip()
-                        clean_display += f"> 💭 **Reasoning:**\n> {thought}\n\n"
-                    else:
-                        clean_display += part
-                response_placeholder.markdown(clean_display + "▌")
-            else:
-                response_placeholder.markdown(display_text + "▌")
-        # Final render without the cursor
-        response_placeholder.markdown(clean_display if "<think>" in full_text else full_text)
-        st.session_state.messages.append({"role": "assistant", "content": full_text})

 import streamlit as st
+from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
+from threading import Thread
+import torch
+# Clean, centered layout without sidebar
+st.set_page_config(page_title="Qwen 3 4B Stream", page_icon="⚡", layout="centered", initial_sidebar_state="collapsed")
+# 1. Model Configuration (Qwen 3 4B - 4-bit for speed)
+MODEL_ID = "unsloth/Qwen3-4B-Instruct-2507-bnb-4bit"
 @st.cache_resource
+def load_resource():
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+    model = AutoModelForCausalLM.from_pretrained(
+        MODEL_ID,
+        device_map="auto",
+        torch_dtype="auto"
     )
+    return tokenizer, model
+tokenizer, model = load_resource()
+# Custom CSS to hide the sidebar toggle
+st.markdown("<style>[data-testid='collapsedControl'] { display: none; }</style>", unsafe_allow_html=True)
+st.title("⚡ Qwen 3 4B Stream")
+st.caption("Real-time local generation | No Sidebar")
 if "messages" not in st.session_state:
     st.session_state.messages = []
+# Action Buttons
+col1, col2 = st.columns([5, 1])
+with col2:
+    if st.button("Reset"):
         st.session_state.messages = []
         st.rerun()
+# Display chat history
 for msg in st.session_state.messages:
     with st.chat_message(msg["role"]):
         st.markdown(msg["content"])
+# 2. Streaming Chat Input
+if prompt := st.chat_input("Ask Qwen 3..."):
     st.session_state.messages.append({"role": "user", "content": prompt})
     with st.chat_message("user"):
         st.markdown(prompt)
     with st.chat_message("assistant"):
+        # Setup the Streamer
+        streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
+        # Prepare input
+        input_text = tokenizer.apply_chat_template(st.session_state.messages, tokenize=False, add_generation_prompt=True)
+        inputs = tokenizer([input_text], return_tensors="pt").to(model.device)
+        # 3. Generation in a separate thread
+        generation_kwargs = dict(
+            **inputs,
+            streamer=streamer,
+            max_new_tokens=1024,
+            do_sample=True,
+            temperature=0.7,
+            top_p=0.8,
+            pad_token_id=tokenizer.eos_token_id
+        )
+        thread = Thread(target=model.generate, kwargs=generation_kwargs)
+        thread.start()
+        # 4. Stream to UI
+        placeholder = st.empty()
+        full_response = ""
+        for new_text in streamer:
+            full_response += new_text
+            placeholder.markdown(full_response + "▌")
+        placeholder.markdown(full_response)
+        st.session_state.messages.append({"role": "assistant", "content": full_response})