Sachin5112 commited on
Commit
91ce3bd
·
verified ·
1 Parent(s): 40a8619

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +83 -36
app.py CHANGED
@@ -3,62 +3,109 @@ import gradio as gr
3
  from llama_cpp import Llama
4
  from huggingface_hub import hf_hub_download
5
 
6
- # 1. Download the 1.5B model (Snappy for CPU)
 
 
7
  model_path = hf_hub_download(
8
- repo_id="Qwen/Qwen2.5-Coder-1.5B-Instruct-GGUF",
9
- filename="qwen2.5-coder-1.5b-instruct-q8_0.gguf"
10
  )
11
 
12
- # 2. Initialize Model
 
 
13
  llm = Llama(
14
  model_path=model_path,
15
- n_ctx=2048,
16
- n_threads=os.cpu_count() or 2,
 
17
  n_gpu_layers=0,
18
  verbose=False
19
  )
20
 
21
- custom_css = """
22
- .gradio-container { background-color: #0b0f19 !important; color: #ffffff !important; }
23
- #title-text { text-align: center; color: #00d4ff; padding: 10px; }
24
- footer { display: none !important; }
25
- """
26
 
 
 
 
 
27
  def generate_response(message, history):
28
- prompt = "<|im_start|>system\nYou are Zenith, an expert code explainer. Be concise and clear.<|im_end|>\n"
 
29
 
30
  for msg in history:
31
- role = msg.get("role") if isinstance(msg, dict) else "user"
32
- content = msg.get("content") if isinstance(msg, dict) else msg[0]
33
- prompt += f"<|im_start|>{role}\n{content}<|im_end|>\n"
 
 
 
 
 
34
 
35
  prompt += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
36
-
37
- stream = llm(prompt, max_tokens=1024, temperature=0.1, stream=True)
 
 
 
 
 
 
 
38
 
39
  partial = ""
40
  for token in stream:
41
- piece = token["choices"][0]["text"]
42
- partial += piece
43
  yield partial
44
 
45
- # 3. Fixed UI for Gradio 6.0
46
- with gr.Blocks() as demo:
47
- # Changed gr.Div to gr.Group
48
- with gr.Group():
49
- gr.Markdown("# ⚡ ZENITH CODER", elem_id="title-text")
50
- gr.Markdown("Fast 1.5B CPU Assistant", elem_id="title-text")
51
 
52
- gr.ChatInterface(
53
- fn=generate_response,
54
- type="messages",
55
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
 
 
 
57
  if __name__ == "__main__":
58
- # Moved theme and css here to satisfy Gradio 6.0 requirements
59
- demo.launch(
60
- server_name="0.0.0.0",
61
- server_port=7860,
62
- theme=gr.themes.Soft(primary_hue="blue"),
63
- css=custom_css
64
- )
 
3
  from llama_cpp import Llama
4
  from huggingface_hub import hf_hub_download
5
 
6
+ # ----------------------------
7
+ # Model Download
8
+ # ----------------------------
9
  model_path = hf_hub_download(
10
+ repo_id="Qwen/Qwen2.5-Coder-7B-Instruct-GGUF",
11
+ filename="qwen2.5-coder-7b-instruct-q8_0.gguf"
12
  )
13
 
14
+ # ----------------------------
15
+ # Load Model (CPU)
16
+ # ----------------------------
17
  llm = Llama(
18
  model_path=model_path,
19
+ n_ctx=4096,
20
+ n_threads=os.cpu_count(),
21
+ n_batch=512,
22
  n_gpu_layers=0,
23
  verbose=False
24
  )
25
 
26
+ llm("Hello", max_tokens=1) # warmup
 
 
 
 
27
 
28
+
29
+ # ----------------------------
30
+ # Chat Function
31
+ # ----------------------------
32
  def generate_response(message, history):
33
+
34
+ prompt = "<|im_start|>system\nYou are an expert coding assistant.<|im_end|>\n"
35
 
36
  for msg in history:
37
+ if isinstance(msg, dict):
38
+ role = msg.get("role")
39
+ content = msg.get("content")
40
+
41
+ if role == "user":
42
+ prompt += f"<|im_start|>user\n{content}<|im_end|>\n"
43
+ elif role == "assistant":
44
+ prompt += f"<|im_start|>assistant\n{content}<|im_end|>\n"
45
 
46
  prompt += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
47
+
48
+ stream = llm(
49
+ prompt,
50
+ max_tokens=1024,
51
+ temperature=0.1,
52
+ top_p=0.9,
53
+ repeat_penalty=1.1,
54
+ stream=True
55
+ )
56
 
57
  partial = ""
58
  for token in stream:
59
+ partial += token["choices"][0]["text"]
 
60
  yield partial
61
 
 
 
 
 
 
 
62
 
63
+ # ----------------------------
64
+ # UI Styling (Hugging Face inspired)
65
+ # ----------------------------
66
+ custom_css = """
67
+ #title {
68
+ text-align: center;
69
+ font-size: 28px;
70
+ font-weight: bold;
71
+ }
72
+ #subtitle {
73
+ text-align: center;
74
+ color: #888;
75
+ margin-bottom: 20px;
76
+ }
77
+ """
78
+
79
+ # ----------------------------
80
+ # UI
81
+ # ----------------------------
82
+ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
83
+
84
+ # Header
85
+ gr.HTML("""
86
+ <div style="text-align:center;">
87
+ <img src="https://huggingface.co/front/assets/huggingface_logo-noborder.svg"
88
+ width="80"/>
89
+ <h1 id="title">Qwen2.5 Coder (CPU Edition)</h1>
90
+ <p id="subtitle">Local AI Coding Assistant powered by GGUF + llama.cpp</p>
91
+ </div>
92
+ """)
93
+
94
+ with gr.Row():
95
+ with gr.Column(scale=1):
96
+ gr.Markdown("### ⚡ Features\n- Runs fully on CPU\n- Streaming responses\n- Lightweight GGUF model")
97
+
98
+ with gr.Column(scale=3):
99
+ chatbot = gr.ChatInterface(
100
+ fn=generate_response,
101
+ chatbot=gr.Chatbot(height=500),
102
+ textbox=gr.Textbox(placeholder="Ask me to write code, debug, or explain...", scale=7),
103
+ title="",
104
+ description=""
105
+ )
106
 
107
+ # ----------------------------
108
+ # Launch
109
+ # ----------------------------
110
  if __name__ == "__main__":
111
+ demo.launch(server_name="0.0.0.0", server_port=7860)