SiennaClarke commited on
Commit
63bf561
·
verified ·
1 Parent(s): 5258109

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -27
app.py CHANGED
@@ -3,14 +3,23 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStream
3
  from threading import Thread
4
  import torch
5
 
6
- # 1. Page Config - No Sidebar
7
- st.set_page_config(page_title="Qwen 3 0.6B Instant", page_icon="⚡", layout="centered", initial_sidebar_state="collapsed")
 
 
 
 
 
8
 
9
- # Model ID: The 2026 ultra-lightweight version
10
- MODEL_ID = "Qwen/Qwen3-0.6B-Instruct"
 
 
 
11
 
12
  @st.cache_resource
13
- def load_resource():
 
14
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
15
  model = AutoModelForCausalLM.from_pretrained(
16
  MODEL_ID,
@@ -19,45 +28,41 @@ def load_resource():
19
  )
20
  return tokenizer, model
21
 
22
- tokenizer, model = load_resource()
23
-
24
- # Hide Sidebar Toggle
25
- st.markdown("<style>[data-testid='collapsedControl'] { display: none; }</style>", unsafe_allow_html=True)
26
 
27
- st.title("⚡ Qwen 3 0.6B: Instant")
28
- st.caption("The fastest chat model of 2026 | Pure CPU Speed")
 
29
 
30
  if "messages" not in st.session_state:
31
  st.session_state.messages = []
32
 
33
- # Header button for clearing
34
- if st.button("Reset Chat"):
35
- st.session_state.messages = []
36
- st.rerun()
37
-
38
- # Display chat history
39
  for msg in st.session_state.messages:
40
  with st.chat_message(msg["role"]):
41
  st.markdown(msg["content"])
42
 
43
- # 2. Chat Input & Generation
44
- if prompt := st.chat_input("Ask me anything..."):
 
45
  st.session_state.messages.append({"role": "user", "content": prompt})
46
  with st.chat_message("user"):
47
  st.markdown(prompt)
48
 
49
  with st.chat_message("assistant"):
 
50
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
51
 
52
- # Apply Qwen 3 template (disabling 'thinking' for maximum chat speed)
53
- input_text = tokenizer.apply_chat_template(
54
- st.session_state.messages,
55
- tokenize=False,
56
  add_generation_prompt=True,
57
- enable_thinking=False
58
- )
59
- inputs = tokenizer([input_text], return_tensors="pt").to(model.device)
 
60
 
 
61
  generation_kwargs = dict(
62
  **inputs,
63
  streamer=streamer,
@@ -65,14 +70,16 @@ if prompt := st.chat_input("Ask me anything..."):
65
  do_sample=True,
66
  temperature=0.7,
67
  top_p=0.8,
 
68
  )
69
 
70
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
71
  thread.start()
72
 
73
- # Word-by-word streaming
74
  placeholder = st.empty()
75
  full_response = ""
 
76
  for new_text in streamer:
77
  full_response += new_text
78
  placeholder.markdown(full_response + "▌")
 
3
  from threading import Thread
4
  import torch
5
 
6
+ # 1. Page Configuration (Centered and No Sidebar)
7
+ st.set_page_config(
8
+ page_title="Qwen 3 0.6B Chat",
9
+ page_icon="⚡",
10
+ layout="centered",
11
+ initial_sidebar_state="collapsed"
12
+ )
13
 
14
+ # Custom CSS to hide the sidebar toggle button entirely
15
+ st.markdown("<style>[data-testid='collapsedControl'] { display: none; }</style>", unsafe_allow_html=True)
16
+
17
+ # 2. Model & Tokenizer Initialization (Using your direct load logic)
18
+ MODEL_ID = "Qwen/Qwen3-0.6B"
19
 
20
  @st.cache_resource
21
+ def load_llm():
22
+ # Loading the tokenizer and model directly as requested
23
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
24
  model = AutoModelForCausalLM.from_pretrained(
25
  MODEL_ID,
 
28
  )
29
  return tokenizer, model
30
 
31
+ tokenizer, model = load_llm()
 
 
 
32
 
33
+ # 3. Chat UI Logic
34
+ st.title(" Qwen 3 0.6B")
35
+ st.caption("Using your direct-load logic with real-time streaming.")
36
 
37
  if "messages" not in st.session_state:
38
  st.session_state.messages = []
39
 
40
+ # Display history
 
 
 
 
 
41
  for msg in st.session_state.messages:
42
  with st.chat_message(msg["role"]):
43
  st.markdown(msg["content"])
44
 
45
+ # 4. Input & Streaming Generation
46
+ if prompt := st.chat_input("Ask Qwen 3..."):
47
+ # Store and display user message
48
  st.session_state.messages.append({"role": "user", "content": prompt})
49
  with st.chat_message("user"):
50
  st.markdown(prompt)
51
 
52
  with st.chat_message("assistant"):
53
+ # Initialize the streamer
54
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
55
 
56
+ # Using your chat template logic
57
+ inputs = tokenizer.apply_chat_template(
58
+ st.session_state.messages,
 
59
  add_generation_prompt=True,
60
+ tokenize=True,
61
+ return_dict=True,
62
+ return_tensors="pt",
63
+ ).to(model.device)
64
 
65
+ # Background thread for generation
66
  generation_kwargs = dict(
67
  **inputs,
68
  streamer=streamer,
 
70
  do_sample=True,
71
  temperature=0.7,
72
  top_p=0.8,
73
+ pad_token_id=tokenizer.eos_token_id
74
  )
75
 
76
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
77
  thread.start()
78
 
79
+ # Update the UI as tokens arrive
80
  placeholder = st.empty()
81
  full_response = ""
82
+
83
  for new_text in streamer:
84
  full_response += new_text
85
  placeholder.markdown(full_response + "▌")