Spaces:

SiennaClarke
/

ChatBoxApp

Sleeping

App Files Files Community

SiennaClarke commited on Jan 22

Commit

3b3aef1

verified ·

1 Parent(s): d43ab8a

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -40

app.py CHANGED Viewed

@@ -1,72 +1,75 @@
 import streamlit as st
-from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 import torch
-# 1. Page Configuration (Hide Sidebar & Set Theme)
 st.set_page_config(page_title="Claude Clone", page_icon="🤖", layout="centered")
-# Custom CSS to force-hide the sidebar button and clean up the UI
 st.markdown("""
     <style>
         [data-testid="stSidebar"] {display: none;}
-        [data-testid="stHeader"] {background: rgba(0,0,0,0);}
         .stChatMessage {border-radius: 15px; padding: 10px; margin-bottom: 10px;}
     </style>
 """, unsafe_allow_html=True)
-st.title("Qwen 2.5 Coder 🤖")
-st.caption("A lightweight, powerful Claude-style clone powered by Alibaba's Qwen 2.5 1.5B")
-# 2. Load Model & Tokenizer
 @st.cache_resource
 def load_model():
     model_id = "Qwen/Qwen2.5-Coder-1.5B-Instruct"
     tokenizer = AutoTokenizer.from_pretrained(model_id)
-    # Using torch_dtype="auto" to handle CPU/GPU environments automatically
     model = AutoModelForCausalLM.from_pretrained(
         model_id,
-        torch_dtype="auto",
         device_map="auto"
     )
-    return pipeline("text-generation", model=model, tokenizer=tokenizer)
-generator = load_model()
-# 3. Initialize Chat History
 if "messages" not in st.session_state:
-    st.session_state.messages = [
-        {"role": "system", "content": "You are a helpful assistant named Claude-Clone. You excel at coding and technical tasks."}
-    ]
-# Display Chat History
 for message in st.session_state.messages:
-    if message["role"] != "system":
-        with st.chat_message(message["role"]):
-            st.markdown(message["content"])
-# 4. Chat Input & Logic
-if prompt := st.chat_input("How can I help you today?"):
-    # User Message
     st.session_state.messages.append({"role": "user", "content": prompt})
     with st.chat_message("user"):
         st.markdown(prompt)
-    # Assistant Response
     with st.chat_message("assistant"):
-        with st.spinner("Thinking..."):
-            # Format history for the model
-            full_prompt = st.session_state.messages
-            # Generate response
-            outputs = generator(
-                full_prompt,
-                max_new_tokens=512,
-                do_sample=True,
-                temperature=0.7,
-                top_p=0.9
-            )
-            response = outputs[0]['generated_text'][-1]['content']
-            st.markdown(response)
-    st.session_state.messages.append({"role": "assistant", "content": response})

 import streamlit as st
+from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
+from threading import Thread
 import torch
+# 1. Page Configuration (No Sidebar)
 st.set_page_config(page_title="Claude Clone", page_icon="🤖", layout="centered")
 st.markdown("""
     <style>
         [data-testid="stSidebar"] {display: none;}
         .stChatMessage {border-radius: 15px; padding: 10px; margin-bottom: 10px;}
     </style>
 """, unsafe_allow_html=True)
+st.title("Qwen 2.5 Coder 1.5B 🚀")
+st.caption("Now with real-time streaming and optimized CPU inference.")
+# 2. Optimized Model Loading
 @st.cache_resource
 def load_model():
     model_id = "Qwen/Qwen2.5-Coder-1.5B-Instruct"
     tokenizer = AutoTokenizer.from_pretrained(model_id)
+    # Use bfloat16 for speed on modern CPUs, or float32 for maximum compatibility
     model = AutoModelForCausalLM.from_pretrained(
         model_id,
+        torch_dtype=torch.float32, # CPU-friendly
         device_map="auto"
     )
+    return model, tokenizer
+model, tokenizer = load_model()
+# 3. Session State
 if "messages" not in st.session_state:
+    st.session_state.messages = []
+# Display History
 for message in st.session_state.messages:
+    with st.chat_message(message["role"]):
+        st.markdown(message["content"])
+# 4. Chat Input & Streaming Logic
+if prompt := st.chat_input("Ask me anything..."):
     st.session_state.messages.append({"role": "user", "content": prompt})
     with st.chat_message("user"):
         st.markdown(prompt)
     with st.chat_message("assistant"):
+        # Set up the streamer
+        streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
+        # Prepare the input
+        messages = [{"role": "system", "content": "You are a helpful coding assistant."}] + st.session_state.messages
+        inputs = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to(model.device)
+        # Run generation in a separate thread to allow UI to remain responsive
+        generation_kwargs = dict(
+            input_ids=inputs,
+            streamer=streamer,
+            max_new_tokens=512,
+            do_sample=True,
+            temperature=0.7,
+            top_p=0.9,
+            pad_token_id=tokenizer.eos_token_id
+        )
+        thread = Thread(target=model.generate, kwargs=generation_kwargs)
+        thread.start()
+        # Stream the response to the UI
+        full_response = st.write_stream(streamer)
+    st.session_state.messages.append({"role": "assistant", "content": full_response})