SiennaClarke commited on
Commit
b56b0a9
·
verified ·
1 Parent(s): aae3d86

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -51
app.py CHANGED
@@ -1,70 +1,72 @@
1
  import streamlit as st
2
- from llama_cpp import Llama
3
- import os
4
 
5
- # 1. Page Config
6
- st.set_page_config(page_title="Qwen Coder GGUF", page_icon="🤖", layout="wide")
7
- st.title("🚀 Qwen2.5-Coder (GGUF CPU)")
8
- st.caption("Optimized for Hugging Face Free Tier")
9
 
10
- # 2. Model Loading with specific error handling
 
 
 
 
 
 
 
 
 
 
 
 
11
  @st.cache_resource
12
- def load_llm():
13
- try:
14
- # We use the 3B-Q4_K_M for a good balance of logic and RAM usage
15
- return Llama.from_pretrained(
16
- repo_id="Qwen/Qwen2.5-Coder-3B-Instruct-GGUF",
17
- filename="qwen2.5-coder-3b-instruct-q4_k_m.gguf", # Explicit filename
18
- n_ctx=4096, # Context window
19
- n_threads=2, # Matches HF Free Tier vCPUs
20
- verbose=False # Reduces log clutter
21
- )
22
- except Exception as e:
23
- st.error(f"Error loading model: {e}")
24
- return None
25
 
26
- llm = load_llm()
27
 
28
- # 3. Enhanced "Claude-style" System Prompt
29
  if "messages" not in st.session_state:
30
  st.session_state.messages = [
31
- {
32
- "role": "system",
33
- "content": "You are an expert AI programming assistant. When asked to write code, provide the full file content. Use clear comments, follow best practices, and ensure the code is production-ready."
34
- }
35
  ]
36
 
37
- # Display history
38
- for msg in st.session_state.messages:
39
- if msg["role"] != "system":
40
- with st.chat_message(msg["role"]):
41
- st.markdown(msg["content"])
42
 
43
- # 4. Generation Logic
44
- if prompt := st.chat_input("Write a Python script to scrape a website..."):
 
45
  st.session_state.messages.append({"role": "user", "content": prompt})
46
  with st.chat_message("user"):
47
  st.markdown(prompt)
48
 
49
- if llm:
50
- with st.chat_message("assistant"):
51
- response_placeholder = st.empty()
52
- full_response = ""
 
53
 
54
- # Stream the response
55
- output = llm.create_chat_completion(
56
- messages=st.session_state.messages,
57
- stream=True,
58
- max_tokens=1500, # Increased for "Complete Code" tasks
59
- temperature=0.1 # Lower temperature = more precise code
 
60
  )
61
 
62
- for chunk in output:
63
- if 'content' in chunk['choices'][0]['delta']:
64
- token = chunk['choices'][0]['delta']['content']
65
- full_response += token
66
- response_placeholder.markdown(full_response + "▌")
67
 
68
- response_placeholder.markdown(full_response)
69
-
70
- st.session_state.messages.append({"role": "assistant", "content": full_response})
 
1
  import streamlit as st
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
3
+ import torch
4
 
5
+ # 1. Page Configuration (Hide Sidebar & Set Theme)
6
+ st.set_page_config(page_title="Claude Clone", page_icon="🤖", layout="centered")
 
 
7
 
8
+ # Custom CSS to force-hide the sidebar button and clean up the UI
9
+ st.markdown("""
10
+ <style>
11
+ [data-testid="stSidebar"] {display: none;}
12
+ [data-testid="stHeader"] {background: rgba(0,0,0,0);}
13
+ .stChatMessage {border-radius: 15px; padding: 10px; margin-bottom: 10px;}
14
+ </style>
15
+ """, unsafe_allow_html=True)
16
+
17
+ st.title("Qwen 2.5 Coder 🤖")
18
+ st.caption("A lightweight, powerful Claude-style clone powered by Alibaba's Qwen 2.5 1.5B")
19
+
20
+ # 2. Load Model & Tokenizer
21
  @st.cache_resource
22
+ def load_model():
23
+ model_id = "Qwen/Qwen2.5-Coder-1.5B-Instruct"
24
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
25
+ # Using torch_dtype="auto" to handle CPU/GPU environments automatically
26
+ model = AutoModelForCausalLM.from_pretrained(
27
+ model_id,
28
+ torch_dtype="auto",
29
+ device_map="auto"
30
+ )
31
+ return pipeline("text-generation", model=model, tokenizer=tokenizer)
 
 
 
32
 
33
+ generator = load_model()
34
 
35
+ # 3. Initialize Chat History
36
  if "messages" not in st.session_state:
37
  st.session_state.messages = [
38
+ {"role": "system", "content": "You are a helpful assistant named Claude-Clone. You excel at coding and technical tasks."}
 
 
 
39
  ]
40
 
41
+ # Display Chat History
42
+ for message in st.session_state.messages:
43
+ if message["role"] != "system":
44
+ with st.chat_message(message["role"]):
45
+ st.markdown(message["content"])
46
 
47
+ # 4. Chat Input & Logic
48
+ if prompt := st.chat_input("How can I help you today?"):
49
+ # User Message
50
  st.session_state.messages.append({"role": "user", "content": prompt})
51
  with st.chat_message("user"):
52
  st.markdown(prompt)
53
 
54
+ # Assistant Response
55
+ with st.chat_message("assistant"):
56
+ with st.spinner("Thinking..."):
57
+ # Format history for the model
58
+ full_prompt = st.session_state.messages
59
 
60
+ # Generate response
61
+ outputs = generator(
62
+ full_prompt,
63
+ max_new_tokens=512,
64
+ do_sample=True,
65
+ temperature=0.7,
66
+ top_p=0.9
67
  )
68
 
69
+ response = outputs[0]['generated_text'][-1]['content']
70
+ st.markdown(response)
 
 
 
71
 
72
+ st.session_state.messages.append({"role": "assistant", "content": response})