SiennaClarke commited on
Commit
3b3aef1
·
verified ·
1 Parent(s): d43ab8a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -40
app.py CHANGED
@@ -1,72 +1,75 @@
1
  import streamlit as st
2
- from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 
3
  import torch
4
 
5
- # 1. Page Configuration (Hide Sidebar & Set Theme)
6
  st.set_page_config(page_title="Claude Clone", page_icon="🤖", layout="centered")
7
 
8
- # Custom CSS to force-hide the sidebar button and clean up the UI
9
  st.markdown("""
10
  <style>
11
  [data-testid="stSidebar"] {display: none;}
12
- [data-testid="stHeader"] {background: rgba(0,0,0,0);}
13
  .stChatMessage {border-radius: 15px; padding: 10px; margin-bottom: 10px;}
14
  </style>
15
  """, unsafe_allow_html=True)
16
 
17
- st.title("Qwen 2.5 Coder 🤖")
18
- st.caption("A lightweight, powerful Claude-style clone powered by Alibaba's Qwen 2.5 1.5B")
19
 
20
- # 2. Load Model & Tokenizer
21
  @st.cache_resource
22
  def load_model():
23
  model_id = "Qwen/Qwen2.5-Coder-1.5B-Instruct"
24
  tokenizer = AutoTokenizer.from_pretrained(model_id)
25
- # Using torch_dtype="auto" to handle CPU/GPU environments automatically
 
26
  model = AutoModelForCausalLM.from_pretrained(
27
  model_id,
28
- torch_dtype="auto",
29
  device_map="auto"
30
  )
31
- return pipeline("text-generation", model=model, tokenizer=tokenizer)
32
 
33
- generator = load_model()
34
 
35
- # 3. Initialize Chat History
36
  if "messages" not in st.session_state:
37
- st.session_state.messages = [
38
- {"role": "system", "content": "You are a helpful assistant named Claude-Clone. You excel at coding and technical tasks."}
39
- ]
40
 
41
- # Display Chat History
42
  for message in st.session_state.messages:
43
- if message["role"] != "system":
44
- with st.chat_message(message["role"]):
45
- st.markdown(message["content"])
46
 
47
- # 4. Chat Input & Logic
48
- if prompt := st.chat_input("How can I help you today?"):
49
- # User Message
50
  st.session_state.messages.append({"role": "user", "content": prompt})
51
  with st.chat_message("user"):
52
  st.markdown(prompt)
53
 
54
- # Assistant Response
55
  with st.chat_message("assistant"):
56
- with st.spinner("Thinking..."):
57
- # Format history for the model
58
- full_prompt = st.session_state.messages
59
-
60
- # Generate response
61
- outputs = generator(
62
- full_prompt,
63
- max_new_tokens=512,
64
- do_sample=True,
65
- temperature=0.7,
66
- top_p=0.9
67
- )
68
-
69
- response = outputs[0]['generated_text'][-1]['content']
70
- st.markdown(response)
71
-
72
- st.session_state.messages.append({"role": "assistant", "content": response})
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
3
+ from threading import Thread
4
  import torch
5
 
6
+ # 1. Page Configuration (No Sidebar)
7
  st.set_page_config(page_title="Claude Clone", page_icon="🤖", layout="centered")
8
 
 
9
  st.markdown("""
10
  <style>
11
  [data-testid="stSidebar"] {display: none;}
 
12
  .stChatMessage {border-radius: 15px; padding: 10px; margin-bottom: 10px;}
13
  </style>
14
  """, unsafe_allow_html=True)
15
 
16
+ st.title("Qwen 2.5 Coder 1.5B 🚀")
17
+ st.caption("Now with real-time streaming and optimized CPU inference.")
18
 
19
+ # 2. Optimized Model Loading
20
  @st.cache_resource
21
  def load_model():
22
  model_id = "Qwen/Qwen2.5-Coder-1.5B-Instruct"
23
  tokenizer = AutoTokenizer.from_pretrained(model_id)
24
+
25
+ # Use bfloat16 for speed on modern CPUs, or float32 for maximum compatibility
26
  model = AutoModelForCausalLM.from_pretrained(
27
  model_id,
28
+ torch_dtype=torch.float32, # CPU-friendly
29
  device_map="auto"
30
  )
31
+ return model, tokenizer
32
 
33
+ model, tokenizer = load_model()
34
 
35
+ # 3. Session State
36
  if "messages" not in st.session_state:
37
+ st.session_state.messages = []
 
 
38
 
39
+ # Display History
40
  for message in st.session_state.messages:
41
+ with st.chat_message(message["role"]):
42
+ st.markdown(message["content"])
 
43
 
44
+ # 4. Chat Input & Streaming Logic
45
+ if prompt := st.chat_input("Ask me anything..."):
 
46
  st.session_state.messages.append({"role": "user", "content": prompt})
47
  with st.chat_message("user"):
48
  st.markdown(prompt)
49
 
 
50
  with st.chat_message("assistant"):
51
+ # Set up the streamer
52
+ streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
53
+
54
+ # Prepare the input
55
+ messages = [{"role": "system", "content": "You are a helpful coding assistant."}] + st.session_state.messages
56
+ inputs = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to(model.device)
57
+
58
+ # Run generation in a separate thread to allow UI to remain responsive
59
+ generation_kwargs = dict(
60
+ input_ids=inputs,
61
+ streamer=streamer,
62
+ max_new_tokens=512,
63
+ do_sample=True,
64
+ temperature=0.7,
65
+ top_p=0.9,
66
+ pad_token_id=tokenizer.eos_token_id
67
+ )
68
+
69
+ thread = Thread(target=model.generate, kwargs=generation_kwargs)
70
+ thread.start()
71
+
72
+ # Stream the response to the UI
73
+ full_response = st.write_stream(streamer)
74
+
75
+ st.session_state.messages.append({"role": "assistant", "content": full_response})