Sachin5112 commited on
Commit
f3ec931
Β·
verified Β·
1 Parent(s): 91ce3bd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -48
app.py CHANGED
@@ -4,16 +4,13 @@ from llama_cpp import Llama
4
  from huggingface_hub import hf_hub_download
5
 
6
  # ----------------------------
7
- # Model Download
8
  # ----------------------------
9
  model_path = hf_hub_download(
10
  repo_id="Qwen/Qwen2.5-Coder-7B-Instruct-GGUF",
11
  filename="qwen2.5-coder-7b-instruct-q8_0.gguf"
12
  )
13
 
14
- # ----------------------------
15
- # Load Model (CPU)
16
- # ----------------------------
17
  llm = Llama(
18
  model_path=model_path,
19
  n_ctx=4096,
@@ -23,25 +20,19 @@ llm = Llama(
23
  verbose=False
24
  )
25
 
26
- llm("Hello", max_tokens=1) # warmup
27
 
28
 
29
  # ----------------------------
30
- # Chat Function
31
  # ----------------------------
32
  def generate_response(message, history):
33
 
34
  prompt = "<|im_start|>system\nYou are an expert coding assistant.<|im_end|>\n"
35
 
36
- for msg in history:
37
- if isinstance(msg, dict):
38
- role = msg.get("role")
39
- content = msg.get("content")
40
-
41
- if role == "user":
42
- prompt += f"<|im_start|>user\n{content}<|im_end|>\n"
43
- elif role == "assistant":
44
- prompt += f"<|im_start|>assistant\n{content}<|im_end|>\n"
45
 
46
  prompt += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
47
 
@@ -54,58 +45,80 @@ def generate_response(message, history):
54
  stream=True
55
  )
56
 
57
- partial = ""
58
  for token in stream:
59
- partial += token["choices"][0]["text"]
60
- yield partial
61
 
62
 
63
  # ----------------------------
64
- # UI Styling (Hugging Face inspired)
65
  # ----------------------------
66
- custom_css = """
67
- #title {
68
- text-align: center;
69
- font-size: 28px;
70
- font-weight: bold;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  }
72
- #subtitle {
73
- text-align: center;
74
- color: #888;
75
- margin-bottom: 20px;
 
 
 
76
  }
 
 
 
 
 
 
 
 
 
77
  """
78
 
 
79
  # ----------------------------
80
  # UI
81
  # ----------------------------
82
- with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
83
 
84
- # Header
85
  gr.HTML("""
86
- <div style="text-align:center;">
87
  <img src="https://huggingface.co/front/assets/huggingface_logo-noborder.svg"
88
- width="80"/>
89
- <h1 id="title">Qwen2.5 Coder (CPU Edition)</h1>
90
- <p id="subtitle">Local AI Coding Assistant powered by GGUF + llama.cpp</p>
91
  </div>
92
  """)
93
 
94
- with gr.Row():
95
- with gr.Column(scale=1):
96
- gr.Markdown("### ⚑ Features\n- Runs fully on CPU\n- Streaming responses\n- Lightweight GGUF model")
 
 
 
97
 
98
- with gr.Column(scale=3):
99
- chatbot = gr.ChatInterface(
100
- fn=generate_response,
101
- chatbot=gr.Chatbot(height=500),
102
- textbox=gr.Textbox(placeholder="Ask me to write code, debug, or explain...", scale=7),
103
- title="",
104
- description=""
105
- )
106
 
107
- # ----------------------------
108
- # Launch
109
- # ----------------------------
110
  if __name__ == "__main__":
111
  demo.launch(server_name="0.0.0.0", server_port=7860)
 
4
  from huggingface_hub import hf_hub_download
5
 
6
  # ----------------------------
7
+ # Model
8
  # ----------------------------
9
  model_path = hf_hub_download(
10
  repo_id="Qwen/Qwen2.5-Coder-7B-Instruct-GGUF",
11
  filename="qwen2.5-coder-7b-instruct-q8_0.gguf"
12
  )
13
 
 
 
 
14
  llm = Llama(
15
  model_path=model_path,
16
  n_ctx=4096,
 
20
  verbose=False
21
  )
22
 
23
+ llm("warmup", max_tokens=1)
24
 
25
 
26
  # ----------------------------
27
+ # Chat Logic
28
  # ----------------------------
29
  def generate_response(message, history):
30
 
31
  prompt = "<|im_start|>system\nYou are an expert coding assistant.<|im_end|>\n"
32
 
33
+ for user, assistant in history:
34
+ prompt += f"<|im_start|>user\n{user}<|im_end|>\n"
35
+ prompt += f"<|im_start|>assistant\n{assistant}<|im_end|>\n"
 
 
 
 
 
 
36
 
37
  prompt += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
38
 
 
45
  stream=True
46
  )
47
 
48
+ out = ""
49
  for token in stream:
50
+ out += token["choices"][0]["text"]
51
+ yield out
52
 
53
 
54
  # ----------------------------
55
+ # MOBILE APP CSS
56
  # ----------------------------
57
+ css = """
58
+ /* Center mobile frame */
59
+ .gradio-container {
60
+ max-width: 430px !important;
61
+ margin: auto !important;
62
+ height: 100vh;
63
+ border-radius: 20px;
64
+ box-shadow: 0 10px 40px rgba(0,0,0,0.2);
65
+ overflow: hidden;
66
+ background: #0b0f19;
67
+ }
68
+
69
+ /* Chat background */
70
+ .chatbot {
71
+ background: #0b0f19 !important;
72
+ }
73
+
74
+ /* User bubble */
75
+ .user {
76
+ background: #2b6fff !important;
77
+ color: white !important;
78
+ border-radius: 18px 18px 4px 18px !important;
79
+ padding: 10px !important;
80
  }
81
+
82
+ /* Bot bubble */
83
+ .assistant {
84
+ background: #1c1f2a !important;
85
+ color: white !important;
86
+ border-radius: 18px 18px 18px 4px !important;
87
+ padding: 10px !important;
88
  }
89
+
90
+ /* Input bar */
91
+ textarea {
92
+ border-radius: 20px !important;
93
+ padding: 12px !important;
94
+ }
95
+
96
+ /* Hide footer */
97
+ footer {display:none !important;}
98
  """
99
 
100
+
101
  # ----------------------------
102
  # UI
103
  # ----------------------------
104
+ with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
105
 
 
106
  gr.HTML("""
107
+ <div style="text-align:center; padding:10px;">
108
  <img src="https://huggingface.co/front/assets/huggingface_logo-noborder.svg"
109
+ width="60"/>
110
+ <h2 style="color:white; margin:5px;">Qwen Coder</h2>
111
+ <p style="color:gray; font-size:12px;">Mobile AI Coding Assistant</p>
112
  </div>
113
  """)
114
 
115
+ gr.ChatInterface(
116
+ fn=generate_response,
117
+ fill_height=True,
118
+ textbox=gr.Textbox(placeholder="Message...", container=False),
119
+ chatbot=gr.Chatbot(height=600, type="tuples"),
120
+ )
121
 
 
 
 
 
 
 
 
 
122
 
 
 
 
123
  if __name__ == "__main__":
124
  demo.launch(server_name="0.0.0.0", server_port=7860)