SiennaClarke commited on
Commit
aae3d86
·
verified ·
1 Parent(s): 366b9b9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -54
app.py CHANGED
@@ -1,72 +1,70 @@
1
  import streamlit as st
2
- from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
3
- import torch
4
 
5
- # 1. Page Configuration
6
- st.set_page_config(page_title="QwenCoder-Mini", page_icon="💻")
7
- st.title("💻 Qwen2.5 Coder: Mini-Claude")
8
- st.markdown("Running on **Qwen2.5-Coder-3B-Instruct** (CPU Optimized)")
9
 
10
- # 2. Model Loading (Cached to prevent reloading on every click)
11
  @st.cache_resource
12
- def load_model():
13
- model_id = "Qwen/Qwen2.5-Coder-3B-Instruct"
14
-
15
- # Load tokenizer
16
- tokenizer = AutoTokenizer.from_pretrained(model_id)
17
-
18
- # Load model with 4-bit quantization to save RAM (Crucial for 16GB limit)
19
- model = AutoModelForCausalLM.from_pretrained(
20
- model_id,
21
- device_map="auto",
22
- torch_dtype="auto",
23
- trust_remote_code=True
24
- )
25
-
26
- # Create the pipeline
27
- pipe = pipeline(
28
- "text-generation",
29
- model=model,
30
- tokenizer=tokenizer,
31
- )
32
- return pipe
33
 
34
- # Initialize the pipeline
35
- generator = load_model()
36
 
37
- # 3. Chat History Setup
38
  if "messages" not in st.session_state:
39
  st.session_state.messages = [
40
- {"role": "system", "content": "You are an expert software engineer like Claude. Provide complete, production-ready code with explanations."}
 
 
 
41
  ]
42
 
43
- # Display chat history
44
- for message in st.session_state.messages:
45
- if message["role"] != "system":
46
- with st.chat_message(message["role"]):
47
- st.markdown(message["content"])
48
 
49
- # 4. Chat Input & Generation
50
- if prompt := st.chat_input("Ask me to write some code..."):
51
- # Add user message to state
52
  st.session_state.messages.append({"role": "user", "content": prompt})
53
-
54
  with st.chat_message("user"):
55
  st.markdown(prompt)
56
 
57
- with st.chat_message("assistant"):
58
- with st.spinner("Writing code..."):
59
- # Generate response
60
- response = generator(
61
- st.session_state.messages,
62
- max_new_tokens=1024,
63
- temperature=0.7,
64
- top_p=0.9,
65
- return_full_text=False
 
 
66
  )
67
 
68
- answer = response[0]['generated_text']
69
- st.markdown(answer)
 
 
 
70
 
71
- # Add assistant message to state
72
- st.session_state.messages.append({"role": "assistant", "content": answer})
 
 
1
  import streamlit as st
2
+ from llama_cpp import Llama
3
+ import os
4
 
5
+ # 1. Page Config
6
+ st.set_page_config(page_title="Qwen Coder GGUF", page_icon="🤖", layout="wide")
7
+ st.title("🚀 Qwen2.5-Coder (GGUF CPU)")
8
+ st.caption("Optimized for Hugging Face Free Tier")
9
 
10
+ # 2. Model Loading with specific error handling
11
  @st.cache_resource
12
+ def load_llm():
13
+ try:
14
+ # We use the 3B-Q4_K_M for a good balance of logic and RAM usage
15
+ return Llama.from_pretrained(
16
+ repo_id="Qwen/Qwen2.5-Coder-3B-Instruct-GGUF",
17
+ filename="qwen2.5-coder-3b-instruct-q4_k_m.gguf", # Explicit filename
18
+ n_ctx=4096, # Context window
19
+ n_threads=2, # Matches HF Free Tier vCPUs
20
+ verbose=False # Reduces log clutter
21
+ )
22
+ except Exception as e:
23
+ st.error(f"Error loading model: {e}")
24
+ return None
 
 
 
 
 
 
 
 
25
 
26
+ llm = load_llm()
 
27
 
28
+ # 3. Enhanced "Claude-style" System Prompt
29
  if "messages" not in st.session_state:
30
  st.session_state.messages = [
31
+ {
32
+ "role": "system",
33
+ "content": "You are an expert AI programming assistant. When asked to write code, provide the full file content. Use clear comments, follow best practices, and ensure the code is production-ready."
34
+ }
35
  ]
36
 
37
+ # Display history
38
+ for msg in st.session_state.messages:
39
+ if msg["role"] != "system":
40
+ with st.chat_message(msg["role"]):
41
+ st.markdown(msg["content"])
42
 
43
+ # 4. Generation Logic
44
+ if prompt := st.chat_input("Write a Python script to scrape a website..."):
 
45
  st.session_state.messages.append({"role": "user", "content": prompt})
 
46
  with st.chat_message("user"):
47
  st.markdown(prompt)
48
 
49
+ if llm:
50
+ with st.chat_message("assistant"):
51
+ response_placeholder = st.empty()
52
+ full_response = ""
53
+
54
+ # Stream the response
55
+ output = llm.create_chat_completion(
56
+ messages=st.session_state.messages,
57
+ stream=True,
58
+ max_tokens=1500, # Increased for "Complete Code" tasks
59
+ temperature=0.1 # Lower temperature = more precise code
60
  )
61
 
62
+ for chunk in output:
63
+ if 'content' in chunk['choices'][0]['delta']:
64
+ token = chunk['choices'][0]['delta']['content']
65
+ full_response += token
66
+ response_placeholder.markdown(full_response + "▌")
67
 
68
+ response_placeholder.markdown(full_response)
69
+
70
+ st.session_state.messages.append({"role": "assistant", "content": full_response})