Spaces:

SiennaClarke
/

ChatBoxApp

Sleeping

App Files Files Community

SiennaClarke commited on 27 days ago

Commit

fb30778

verified ·

1 Parent(s): bb97fab

Create app.py

Browse files

Files changed (1) hide show

app.py +90 -0

app.py ADDED Viewed

	@@ -0,0 +1,90 @@

+import streamlit as st
+from llama_cpp import Llama
+import re
+# Page configuration
+st.set_page_config(page_title="Qwen 3 Advanced AI", page_icon="🧠", layout="wide")
+# 1. Model Configuration
+# Qwen 3 4B Thinking is the flagship 2026 small model with deep reasoning
+MODEL_REPO = "unsloth/Qwen3-4B-Thinking-2507-GGUF"
+MODEL_FILE = "Qwen3-4B-Thinking-2507-Q4_K_M.gguf"
+@st.cache_resource
+def load_qwen():
+    return Llama.from_pretrained(
+        repo_id=MODEL_REPO,
+        filename=MODEL_FILE,
+        n_ctx=8192,    # Sufficient context for long reasoning chains
+        n_threads=4,   # Optimized for standard multi-core CPUs
+        verbose=False
+    )
+llm = load_qwen()
+# 2. UI Elements
+st.title("🧠 Qwen 3 Reasoning Hub")
+st.markdown("This model uses **Native Thinking** to solve logic, math, and code.")
+if "messages" not in st.session_state:
+    st.session_state.messages = []
+# Sidebar for Mode Toggle
+with st.sidebar:
+    st.header("Settings")
+    reasoning_on = st.toggle("Enable Deep Reasoning (/think)", value=True)
+    if st.button("Clear Chat"):
+        st.session_state.messages = []
+        st.rerun()
+# Display Chat History
+for msg in st.session_state.messages:
+    with st.chat_message(msg["role"]):
+        st.markdown(msg["content"])
+# 3. Main Chat Logic
+if prompt := st.chat_input("Ask a difficult logic question..."):
+    st.session_state.messages.append({"role": "user", "content": prompt})
+    with st.chat_message("user"):
+        st.markdown(prompt)
+    with st.chat_message("assistant"):
+        # Qwen 3 Template with 'Soft Switch'
+        prefix = "/think " if reasoning_on else "/no_think "
+        formatted_prompt = f"<|im_start|>user\n{prefix}{prompt}<|im_end|>\n<|im_start|>assistant\n"
+        response_placeholder = st.empty()
+        full_text = ""
+        # Stream the response
+        # Using Temperature 0.6 as per Qwen 3 official best practices for thinking
+        for chunk in llm(
+            formatted_prompt,
+            max_tokens=2048,
+            stream=True,
+            stop=["<|im_end|>"],
+            temperature=0.6,
+            top_p=0.95
+        ):
+            token = chunk['choices'][0]['text']
+            full_text += token
+            # Format the <think> block for better UI
+            # This hides the thinking process inside a blockquote
+            display_text = full_text
+            if "<think>" in display_text:
+                parts = re.split(r'(<think>.*?</think>)', display_text, flags=re.DOTALL)
+                clean_display = ""
+                for part in parts:
+                    if part.startswith("<think>"):
+                        thought = part.replace("<think>", "").replace("</think>", "").strip()
+                        clean_display += f"> 💭 **Reasoning:**\n> {thought}\n\n"
+                    else:
+                        clean_display += part
+                response_placeholder.markdown(clean_display + "▌")
+            else:
+                response_placeholder.markdown(display_text + "▌")
+        # Final render without the cursor
+        response_placeholder.markdown(clean_display if "<think>" in full_text else full_text)
+        st.session_state.messages.append({"role": "assistant", "content": full_text})