gusreinaos commited on
Commit
691a18b
·
1 Parent(s): ce67100
Files changed (3) hide show
  1. README.md +5 -31
  2. app.py +60 -169
  3. requirements.txt +1 -3
README.md CHANGED
@@ -1,38 +1,12 @@
1
  ---
2
- title: Fine-Tuned Llama 3.2 Chatbot
3
  emoji: 🦙
4
- colorFrom: blue
5
- colorTo: purple
6
  sdk: gradio
7
  sdk_version: 4.44.1
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
11
- ---
12
-
13
- # Fine-Tuned Llama 3.2 3B Chatbot
14
-
15
- This Space hosts a chatbot powered by a fine-tuned Llama 3.2 3B model.
16
-
17
- ## Model Details
18
-
19
- - **Base Model:** Llama 3.2 3B Instruct
20
- - **Fine-tuning Method:** LoRA (Low-Rank Adaptation)
21
- - **Dataset:** FineTome-100k instruction dataset
22
- - **Format:** GGUF (q4_k_m quantization)
23
- - **Inference:** CPU-based using llama.cpp
24
-
25
- ## Training
26
-
27
- The model was fine-tuned using:
28
- - Parameter Efficient Fine-Tuning (PEFT) with LoRA
29
- - 4-bit quantization during training
30
- - Trained on 100,000 high-quality instruction-response pairs
31
-
32
- ## Usage
33
-
34
- Simply type your message in the chat box and the model will respond!
35
-
36
- ## Course
37
-
38
- This project was completed as part of the ID2223 Scalable Machine Learning course at KTH.
 
1
  ---
2
+ title: Llama Terminal
3
  emoji: 🦙
4
+ colorFrom: green
5
+ colorTo: black
6
  sdk: gradio
7
  sdk_version: 4.44.1
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
11
+ hardware: cpu-upgrade # Unlocks 16GB RAM — free for public Spaces
12
+ ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py CHANGED
@@ -1,199 +1,93 @@
1
  import gradio as gr
2
- from llama_cpp import Llama
3
- from huggingface_hub import hf_hub_download
4
  import os
5
 
6
- # Download a pre-made GGUF model from HuggingFace
7
- MODEL_NAME = "TheBloke/Llama-2-7B-Chat-GGUF"
8
- MODEL_FILE = "llama-2-7b-chat.Q4_K_M.gguf"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
- print("📥 Downloading model from HuggingFace...")
11
  model_path = hf_hub_download(
12
  repo_id=MODEL_NAME,
13
  filename=MODEL_FILE,
14
- local_dir="./models"
 
15
  )
16
- print(f"Model downloaded to: {model_path}")
17
 
18
- print("🚀 Loading model...")
19
  llm = Llama(
20
  model_path=model_path,
21
- n_ctx=2048,
22
- n_threads=4,
23
  n_gpu_layers=0,
 
24
  verbose=False
25
  )
26
- print("Model loaded!")
27
 
28
  def chat(message, history):
29
- # Build prompt from history
30
- prompt = ""
31
 
 
32
  for user_msg, bot_msg in history:
33
- prompt += f"User: {user_msg}\nAssistant: {bot_msg}\n"
 
 
 
34
 
35
- prompt += f"User: {message}\nAssistant:"
36
-
37
- # Generate response
38
- response = llm(
39
- prompt,
40
  max_tokens=512,
41
  temperature=0.7,
42
  top_p=0.9,
43
- stop=["\nUser:", "User:"],
44
- echo=False
45
  )
46
 
47
- bot_response = response['choices'][0]['text'].strip()
48
  history.append((message, bot_response))
49
  return history, ""
50
 
51
- # Clean terminal/hacker CSS
52
  custom_css = """
53
  @import url('https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@400;700&family=Source+Code+Pro:wght@400;600&display=swap');
54
-
55
- /* Terminal background */
56
- body, .gradio-container {
57
- background: #0c0c0c !important;
58
- font-family: 'JetBrains Mono', 'Source Code Pro', monospace !important;
59
- }
60
-
61
- .gradio-container {
62
- max-width: 1400px !important;
63
- border: 1px solid #00ff00 !important;
64
- box-shadow: 0 0 10px rgba(0, 255, 0, 0.3) !important;
65
- }
66
-
67
- /* Text colors */
68
- *, h1, h2, h3, label, p {
69
- color: #00ff00 !important;
70
- font-family: 'JetBrains Mono', monospace !important;
71
- }
72
-
73
- h1 {
74
- font-size: 28px !important;
75
- font-weight: 700 !important;
76
- letter-spacing: 2px !important;
77
- }
78
-
79
- /* Chatbot messages */
80
- .message {
81
- background: #1a1a1a !important;
82
- border-left: 3px solid #00ff00 !important;
83
- color: #00ff00 !important;
84
- padding: 12px !important;
85
- font-family: 'JetBrains Mono', monospace !important;
86
- }
87
-
88
- .user {
89
- border-left: 3px solid #00cc00 !important;
90
- }
91
-
92
- .bot {
93
- border-left: 3px solid #00ff00 !important;
94
- }
95
-
96
- /* Input field */
97
- input, textarea {
98
- background: #1a1a1a !important;
99
- border: 1px solid #00ff00 !important;
100
- color: #00ff00 !important;
101
- font-family: 'JetBrains Mono', monospace !important;
102
- font-size: 14px !important;
103
- }
104
-
105
- input:focus, textarea:focus {
106
- border: 1px solid #00ff00 !important;
107
- outline: none !important;
108
- box-shadow: 0 0 5px rgba(0, 255, 0, 0.5) !important;
109
- }
110
-
111
- input::placeholder, textarea::placeholder {
112
- color: #006600 !important;
113
- }
114
-
115
- /* Buttons */
116
- button {
117
- background: #1a1a1a !important;
118
- border: 1px solid #00ff00 !important;
119
- color: #00ff00 !important;
120
- font-family: 'JetBrains Mono', monospace !important;
121
- font-weight: 600 !important;
122
- transition: all 0.2s !important;
123
- }
124
-
125
- button:hover {
126
- background: #00ff00 !important;
127
- color: #0c0c0c !important;
128
- }
129
-
130
- .primary {
131
- background: #00ff00 !important;
132
- color: #0c0c0c !important;
133
- }
134
-
135
- .primary:hover {
136
- background: #00cc00 !important;
137
- }
138
-
139
- /* Examples */
140
- .examples {
141
- background: #1a1a1a !important;
142
- border: 1px solid #00ff00 !important;
143
- }
144
-
145
- /* Scrollbar */
146
- ::-webkit-scrollbar {
147
- width: 8px !important;
148
- background: #0c0c0c !important;
149
- }
150
-
151
- ::-webkit-scrollbar-thumb {
152
- background: #00ff00 !important;
153
- }
154
-
155
- ::-webkit-scrollbar-thumb:hover {
156
- background: #00cc00 !important;
157
- }
158
-
159
- footer {
160
- display: none !important;
161
- }
162
-
163
- /* Code blocks */
164
- pre, code {
165
- background: #1a1a1a !important;
166
- border: 1px solid #00ff00 !important;
167
- color: #00ff00 !important;
168
- }
169
  """
170
 
171
- # Create interface using Blocks
172
- with gr.Blocks(
173
- theme=gr.themes.Base(primary_hue="green"),
174
- css=custom_css,
175
- title="$ LLAMA TERMINAL"
176
- ) as demo:
177
-
178
- gr.Markdown(
179
- """
180
- # $ LLAMA TERMINAL
181
- ```
182
- > System Online | Neural Network Active
183
- > Type your query below...
184
- ```
185
- """
186
- )
187
-
188
  chatbot = gr.Chatbot(height=600)
189
-
190
  with gr.Row():
191
- msg = gr.Textbox(
192
- placeholder="$ Enter command...",
193
- show_label=False,
194
- scale=8,
195
- container=False
196
- )
197
  submit = gr.Button("SEND", scale=1, variant="primary")
198
 
199
  gr.Examples(
@@ -201,16 +95,13 @@ with gr.Blocks(
201
  "What is the capital of France?",
202
  "Explain quantum computing",
203
  "Write fibonacci in Python",
204
- "Optimize sleep patterns",
205
- "Continue: 2, 4, 6, 8...",
206
  "Write a haiku about AI",
207
  ],
208
  inputs=msg
209
  )
 
 
210
 
211
- clear = gr.ClearButton([msg, chatbot], value="CLEAR")
212
-
213
- # Event handlers
214
  submit.click(chat, [msg, chatbot], [chatbot, msg])
215
  msg.submit(chat, [msg, chatbot], [chatbot, msg])
216
 
@@ -221,4 +112,4 @@ if __name__ == "__main__":
221
  server_name="0.0.0.0",
222
  server_port=7860,
223
  show_error=True
224
- )
 
1
  import gradio as gr
2
+ import subprocess
3
+ import sys
4
  import os
5
 
6
+ # Install llama-cpp-python at runtime if missing (fixes HF build issues)
7
+ try:
8
+ from llama_cpp import Llama
9
+ print("llama-cpp-python already installed.")
10
+ except ImportError:
11
+ print("Installing llama-cpp-python (runtime fix for HF Spaces)...")
12
+ subprocess.check_call([
13
+ sys.executable, "-m", "pip", "install", "--no-cache-dir",
14
+ "https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.89/llama_cpp_python-0.2.89-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl"
15
+ ])
16
+ from llama_cpp import Llama
17
+
18
+ from huggingface_hub import hf_hub_download
19
+
20
+ # === CHANGE THESE TO YOUR FINE-TUNED MODEL ONCE UPLOADED ===
21
+ MODEL_NAME = "TheBloke/Llama-2-7B-Chat-GGUF" # ← replace later
22
+ MODEL_FILE = "llama-2-7b-chat.Q4_K_M.gguf" # ← replace later
23
 
24
+ print("Downloading model from HuggingFace...")
25
  model_path = hf_hub_download(
26
  repo_id=MODEL_NAME,
27
  filename=MODEL_FILE,
28
+ local_dir="./models",
29
+ local_dir_use_symlinks=False
30
  )
31
+ print(f"Model downloaded: {model_path}")
32
 
33
+ print("Loading model into memory...")
34
  llm = Llama(
35
  model_path=model_path,
36
+ n_ctx=4096,
37
+ n_threads=8,
38
  n_gpu_layers=0,
39
+ n_batch=512,
40
  verbose=False
41
  )
42
+ print("Model loaded successfully!")
43
 
44
  def chat(message, history):
45
+ if not message.strip():
46
+ return history, ""
47
 
48
+ messages = [{"role": "system", "content": "You are a helpful AI assistant."}]
49
  for user_msg, bot_msg in history:
50
+ messages.append({"role": "user", "content": user_msg})
51
+ if bot_msg:
52
+ messages.append({"role": "assistant", "content": bot_msg})
53
+ messages.append({"role": "user", "content": message})
54
 
55
+ response = llm.create_chat_completion(
56
+ messages=messages,
 
 
 
57
  max_tokens=512,
58
  temperature=0.7,
59
  top_p=0.9,
60
+ stop=["User:", "\nUser:", "</s>"],
61
+ stream=False
62
  )
63
 
64
+ bot_response = response['choices'][0]['message']['content'].strip()
65
  history.append((message, bot_response))
66
  return history, ""
67
 
68
+ # === Your awesome CSS (unchanged) ===
69
  custom_css = """
70
  @import url('https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@400;700&family=Source+Code+Pro:wght@400;600&display=swap');
71
+ body, .gradio-container { background: #0c0c0c !important; font-family: 'JetBrains Mono', monospace !important; }
72
+ .gradio-container { max-width: 1400px !important; border: 1px solid #00ff00 !important; box-shadow: 0 0 10px rgba(0,255,0,0.3) !important; }
73
+ *, h1, h2, h3, label, p { color: #00ff00 !important; }
74
+ .message { background: #1a1a1a !important; border-left: 3px solid #00ff00 !important; padding: 12px !important; }
75
+ .user { border-left-color: #00cc00 !important; }
76
+ input, textarea { background: #1a1a1a !important; border: 1px solid #00ff00 !important; color: #00ff00 !important; }
77
+ button { background: #1a1a1a !important; border: 1px solid #00ff00 !important; color: #00ff00 !important; }
78
+ button:hover { background: #00ff00 !important; color: #000 !important; }
79
+ .primary { background: #00ff00 !important; color: #000 !important; }
80
+ footer { display: none !important; }
81
+ ::-webkit-scrollbar { width: 8px; background: #0c0c0c; }
82
+ ::-webkit-scrollbar-thumb { background: #00ff00; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  """
84
 
85
+ with gr.Blocks(theme=gr.themes.Base(primary_hue="green"), css=custom_css, title="$ LLAMA TERMINAL") as demo:
86
+ gr.Markdown("# $ LLAMA TERMINAL\n```\n> System Online | Neural Network Active\n> Type your query below...\n```")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  chatbot = gr.Chatbot(height=600)
88
+
89
  with gr.Row():
90
+ msg = gr.Textbox(placeholder="$ Enter command...", show_label=False, scale=8, container=False)
 
 
 
 
 
91
  submit = gr.Button("SEND", scale=1, variant="primary")
92
 
93
  gr.Examples(
 
95
  "What is the capital of France?",
96
  "Explain quantum computing",
97
  "Write fibonacci in Python",
 
 
98
  "Write a haiku about AI",
99
  ],
100
  inputs=msg
101
  )
102
+
103
+ gr.ClearButton([msg, chatbot], value="CLEAR")
104
 
 
 
 
105
  submit.click(chat, [msg, chatbot], [chatbot, msg])
106
  msg.submit(chat, [msg, chatbot], [chatbot, msg])
107
 
 
112
  server_name="0.0.0.0",
113
  server_port=7860,
114
  show_error=True
115
+ )
requirements.txt CHANGED
@@ -1,5 +1,3 @@
1
  gradio==4.44.1
2
  huggingface_hub==0.25.2
3
- gradio-client==0.17.0
4
- # Direct working wheel — builds in <60 seconds
5
- https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.89/llama_cpp_python-0.2.89-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
 
1
  gradio==4.44.1
2
  huggingface_hub==0.25.2
3
+ gradio-client==0.17.0