SiennaClarke commited on
Commit
fb30778
·
verified ·
1 Parent(s): bb97fab

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +90 -0
app.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from llama_cpp import Llama
3
+ import re
4
+
5
+ # Page configuration
6
+ st.set_page_config(page_title="Qwen 3 Advanced AI", page_icon="🧠", layout="wide")
7
+
8
+ # 1. Model Configuration
9
+ # Qwen 3 4B Thinking is the flagship 2026 small model with deep reasoning
10
+ MODEL_REPO = "unsloth/Qwen3-4B-Thinking-2507-GGUF"
11
+ MODEL_FILE = "Qwen3-4B-Thinking-2507-Q4_K_M.gguf"
12
+
13
+ @st.cache_resource
14
+ def load_qwen():
15
+ return Llama.from_pretrained(
16
+ repo_id=MODEL_REPO,
17
+ filename=MODEL_FILE,
18
+ n_ctx=8192, # Sufficient context for long reasoning chains
19
+ n_threads=4, # Optimized for standard multi-core CPUs
20
+ verbose=False
21
+ )
22
+
23
+ llm = load_qwen()
24
+
25
+ # 2. UI Elements
26
+ st.title("🧠 Qwen 3 Reasoning Hub")
27
+ st.markdown("This model uses **Native Thinking** to solve logic, math, and code.")
28
+
29
+ if "messages" not in st.session_state:
30
+ st.session_state.messages = []
31
+
32
+ # Sidebar for Mode Toggle
33
+ with st.sidebar:
34
+ st.header("Settings")
35
+ reasoning_on = st.toggle("Enable Deep Reasoning (/think)", value=True)
36
+ if st.button("Clear Chat"):
37
+ st.session_state.messages = []
38
+ st.rerun()
39
+
40
+ # Display Chat History
41
+ for msg in st.session_state.messages:
42
+ with st.chat_message(msg["role"]):
43
+ st.markdown(msg["content"])
44
+
45
+ # 3. Main Chat Logic
46
+ if prompt := st.chat_input("Ask a difficult logic question..."):
47
+ st.session_state.messages.append({"role": "user", "content": prompt})
48
+ with st.chat_message("user"):
49
+ st.markdown(prompt)
50
+
51
+ with st.chat_message("assistant"):
52
+ # Qwen 3 Template with 'Soft Switch'
53
+ prefix = "/think " if reasoning_on else "/no_think "
54
+ formatted_prompt = f"<|im_start|>user\n{prefix}{prompt}<|im_end|>\n<|im_start|>assistant\n"
55
+
56
+ response_placeholder = st.empty()
57
+ full_text = ""
58
+
59
+ # Stream the response
60
+ # Using Temperature 0.6 as per Qwen 3 official best practices for thinking
61
+ for chunk in llm(
62
+ formatted_prompt,
63
+ max_tokens=2048,
64
+ stream=True,
65
+ stop=["<|im_end|>"],
66
+ temperature=0.6,
67
+ top_p=0.95
68
+ ):
69
+ token = chunk['choices'][0]['text']
70
+ full_text += token
71
+
72
+ # Format the <think> block for better UI
73
+ # This hides the thinking process inside a blockquote
74
+ display_text = full_text
75
+ if "<think>" in display_text:
76
+ parts = re.split(r'(<think>.*?</think>)', display_text, flags=re.DOTALL)
77
+ clean_display = ""
78
+ for part in parts:
79
+ if part.startswith("<think>"):
80
+ thought = part.replace("<think>", "").replace("</think>", "").strip()
81
+ clean_display += f"> 💭 **Reasoning:**\n> {thought}\n\n"
82
+ else:
83
+ clean_display += part
84
+ response_placeholder.markdown(clean_display + "▌")
85
+ else:
86
+ response_placeholder.markdown(display_text + "▌")
87
+
88
+ # Final render without the cursor
89
+ response_placeholder.markdown(clean_display if "<think>" in full_text else full_text)
90
+ st.session_state.messages.append({"role": "assistant", "content": full_text})