Spaces:

SiennaClarke
/

ChatBoxApp

Sleeping

App Files Files Community

SiennaClarke commited on Jan 22

Commit

aae3d86

verified ·

1 Parent(s): 366b9b9

Update app.py

Browse files

Files changed (1) hide show

app.py +52 -54

app.py CHANGED Viewed

@@ -1,72 +1,70 @@
 import streamlit as st
-from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
-import torch
-# 1. Page Configuration
-st.set_page_config(page_title="QwenCoder-Mini", page_icon="💻")
-st.title("💻 Qwen2.5 Coder: Mini-Claude")
-st.markdown("Running on **Qwen2.5-Coder-3B-Instruct** (CPU Optimized)")
-# 2. Model Loading (Cached to prevent reloading on every click)
 @st.cache_resource
-def load_model():
-    model_id = "Qwen/Qwen2.5-Coder-3B-Instruct"
-    # Load tokenizer
-    tokenizer = AutoTokenizer.from_pretrained(model_id)
-    # Load model with 4-bit quantization to save RAM (Crucial for 16GB limit)
-    model = AutoModelForCausalLM.from_pretrained(
-        model_id,
-        device_map="auto",
-        torch_dtype="auto",
-        trust_remote_code=True
-    )
-    # Create the pipeline
-    pipe = pipeline(
-        "text-generation",
-        model=model,
-        tokenizer=tokenizer,
-    )
-    return pipe
-# Initialize the pipeline
-generator = load_model()
-# 3. Chat History Setup
 if "messages" not in st.session_state:
     st.session_state.messages = [
-        {"role": "system", "content": "You are an expert software engineer like Claude. Provide complete, production-ready code with explanations."}
     ]
-# Display chat history
-for message in st.session_state.messages:
-    if message["role"] != "system":
-        with st.chat_message(message["role"]):
-            st.markdown(message["content"])
-# 4. Chat Input & Generation
-if prompt := st.chat_input("Ask me to write some code..."):
-    # Add user message to state
     st.session_state.messages.append({"role": "user", "content": prompt})
     with st.chat_message("user"):
         st.markdown(prompt)
-    with st.chat_message("assistant"):
-        with st.spinner("Writing code..."):
-            # Generate response
-            response = generator(
-                st.session_state.messages,
-                max_new_tokens=1024,
-                temperature=0.7,
-                top_p=0.9,
-                return_full_text=False
             )
-            answer = response[0]['generated_text']
-            st.markdown(answer)
-    # Add assistant message to state
-    st.session_state.messages.append({"role": "assistant", "content": answer})

 import streamlit as st
+from llama_cpp import Llama
+import os
+# 1. Page Config
+st.set_page_config(page_title="Qwen Coder GGUF", page_icon="🤖", layout="wide")
+st.title("🚀 Qwen2.5-Coder (GGUF CPU)")
+st.caption("Optimized for Hugging Face Free Tier")
+# 2. Model Loading with specific error handling
 @st.cache_resource
+def load_llm():
+    try:
+        # We use the 3B-Q4_K_M for a good balance of logic and RAM usage
+        return Llama.from_pretrained(
+            repo_id="Qwen/Qwen2.5-Coder-3B-Instruct-GGUF",
+            filename="qwen2.5-coder-3b-instruct-q4_k_m.gguf", # Explicit filename
+            n_ctx=4096,               # Context window
+            n_threads=2,              # Matches HF Free Tier vCPUs
+            verbose=False             # Reduces log clutter
+        )
+    except Exception as e:
+        st.error(f"Error loading model: {e}")
+        return None
+llm = load_llm()
+# 3. Enhanced "Claude-style" System Prompt
 if "messages" not in st.session_state:
     st.session_state.messages = [
+        {
+            "role": "system",
+            "content": "You are an expert AI programming assistant. When asked to write code, provide the full file content. Use clear comments, follow best practices, and ensure the code is production-ready."
+        }
     ]
+# Display history
+for msg in st.session_state.messages:
+    if msg["role"] != "system":
+        with st.chat_message(msg["role"]):
+            st.markdown(msg["content"])
+# 4. Generation Logic
+if prompt := st.chat_input("Write a Python script to scrape a website..."):
     st.session_state.messages.append({"role": "user", "content": prompt})
     with st.chat_message("user"):
         st.markdown(prompt)
+    if llm:
+        with st.chat_message("assistant"):
+            response_placeholder = st.empty()
+            full_response = ""
+            # Stream the response
+            output = llm.create_chat_completion(
+                messages=st.session_state.messages,
+                stream=True,
+                max_tokens=1500, # Increased for "Complete Code" tasks
+                temperature=0.1  # Lower temperature = more precise code
             )
+            for chunk in output:
+                if 'content' in chunk['choices'][0]['delta']:
+                    token = chunk['choices'][0]['delta']['content']
+                    full_response += token
+                    response_placeholder.markdown(full_response + "▌")
+            response_placeholder.markdown(full_response)
+        st.session_state.messages.append({"role": "assistant", "content": full_response})