Spaces:

oscarrgkth
/

lab2

Build error

App Files Files Community

gusreinaos commited on 13 days ago

Commit

691a18b

1 Parent(s): ce67100

Fixed

Browse files

Files changed (3) hide show

README.md +5 -31
app.py +60 -169
requirements.txt +1 -3

README.md CHANGED Viewed

@@ -1,38 +1,12 @@
 ---
-title: Fine-Tuned Llama 3.2 Chatbot
 emoji: 🦙
-colorFrom: blue
-colorTo: purple
 sdk: gradio
 sdk_version: 4.44.1
 app_file: app.py
 pinned: false
 license: apache-2.0
----
-# Fine-Tuned Llama 3.2 3B Chatbot
-This Space hosts a chatbot powered by a fine-tuned Llama 3.2 3B model.
-## Model Details
-- **Base Model:** Llama 3.2 3B Instruct
-- **Fine-tuning Method:** LoRA (Low-Rank Adaptation)
-- **Dataset:** FineTome-100k instruction dataset
-- **Format:** GGUF (q4_k_m quantization)
-- **Inference:** CPU-based using llama.cpp
-## Training
-The model was fine-tuned using:
-- Parameter Efficient Fine-Tuning (PEFT) with LoRA
-- 4-bit quantization during training
-- Trained on 100,000 high-quality instruction-response pairs
-## Usage
-Simply type your message in the chat box and the model will respond!
-## Course
-This project was completed as part of the ID2223 Scalable Machine Learning course at KTH.

 ---
+title: Llama Terminal
 emoji: 🦙
+colorFrom: green
+colorTo: black
 sdk: gradio
 sdk_version: 4.44.1
 app_file: app.py
 pinned: false
 license: apache-2.0
+hardware: cpu-upgrade  # Unlocks 16GB RAM — free for public Spaces
+---

app.py CHANGED Viewed

@@ -1,199 +1,93 @@
 import gradio as gr
-from llama_cpp import Llama
-from huggingface_hub import hf_hub_download
 import os
-# Download a pre-made GGUF model from HuggingFace
-MODEL_NAME = "TheBloke/Llama-2-7B-Chat-GGUF"
-MODEL_FILE = "llama-2-7b-chat.Q4_K_M.gguf"
-print("📥 Downloading model from HuggingFace...")
 model_path = hf_hub_download(
     repo_id=MODEL_NAME,
     filename=MODEL_FILE,
-    local_dir="./models"
 )
-print(f"✅ Model downloaded to: {model_path}")
-print("🚀 Loading model...")
 llm = Llama(
     model_path=model_path,
-    n_ctx=2048,
-    n_threads=4,
     n_gpu_layers=0,
     verbose=False
 )
-print("✅ Model loaded!")
 def chat(message, history):
-    # Build prompt from history
-    prompt = ""
     for user_msg, bot_msg in history:
-        prompt += f"User: {user_msg}\nAssistant: {bot_msg}\n"
-    prompt += f"User: {message}\nAssistant:"
-    # Generate response
-    response = llm(
-        prompt,
         max_tokens=512,
         temperature=0.7,
         top_p=0.9,
-        stop=["\nUser:", "User:"],
-        echo=False
     )
-    bot_response = response['choices'][0]['text'].strip()
     history.append((message, bot_response))
     return history, ""
-# Clean terminal/hacker CSS
 custom_css = """
 @import url('https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@400;700&family=Source+Code+Pro:wght@400;600&display=swap');
-/* Terminal background */
-body, .gradio-container {
-    background: #0c0c0c !important;
-    font-family: 'JetBrains Mono', 'Source Code Pro', monospace !important;
-}
-.gradio-container {
-    max-width: 1400px !important;
-    border: 1px solid #00ff00 !important;
-    box-shadow: 0 0 10px rgba(0, 255, 0, 0.3) !important;
-}
-/* Text colors */
-*, h1, h2, h3, label, p {
-    color: #00ff00 !important;
-    font-family: 'JetBrains Mono', monospace !important;
-}
-h1 {
-    font-size: 28px !important;
-    font-weight: 700 !important;
-    letter-spacing: 2px !important;
-}
-/* Chatbot messages */
-.message {
-    background: #1a1a1a !important;
-    border-left: 3px solid #00ff00 !important;
-    color: #00ff00 !important;
-    padding: 12px !important;
-    font-family: 'JetBrains Mono', monospace !important;
-}
-.user {
-    border-left: 3px solid #00cc00 !important;
-}
-.bot {
-    border-left: 3px solid #00ff00 !important;
-}
-/* Input field */
-input, textarea {
-    background: #1a1a1a !important;
-    border: 1px solid #00ff00 !important;
-    color: #00ff00 !important;
-    font-family: 'JetBrains Mono', monospace !important;
-    font-size: 14px !important;
-}
-input:focus, textarea:focus {
-    border: 1px solid #00ff00 !important;
-    outline: none !important;
-    box-shadow: 0 0 5px rgba(0, 255, 0, 0.5) !important;
-}
-input::placeholder, textarea::placeholder {
-    color: #006600 !important;
-}
-/* Buttons */
-button {
-    background: #1a1a1a !important;
-    border: 1px solid #00ff00 !important;
-    color: #00ff00 !important;
-    font-family: 'JetBrains Mono', monospace !important;
-    font-weight: 600 !important;
-    transition: all 0.2s !important;
-}
-button:hover {
-    background: #00ff00 !important;
-    color: #0c0c0c !important;
-}
-.primary {
-    background: #00ff00 !important;
-    color: #0c0c0c !important;
-}
-.primary:hover {
-    background: #00cc00 !important;
-}
-/* Examples */
-.examples {
-    background: #1a1a1a !important;
-    border: 1px solid #00ff00 !important;
-}
-/* Scrollbar */
-::-webkit-scrollbar {
-    width: 8px !important;
-    background: #0c0c0c !important;
-}
-::-webkit-scrollbar-thumb {
-    background: #00ff00 !important;
-}
-::-webkit-scrollbar-thumb:hover {
-    background: #00cc00 !important;
-}
-footer {
-    display: none !important;
-}
-/* Code blocks */
-pre, code {
-    background: #1a1a1a !important;
-    border: 1px solid #00ff00 !important;
-    color: #00ff00 !important;
-}
 """
-# Create interface using Blocks
-with gr.Blocks(
-    theme=gr.themes.Base(primary_hue="green"),
-    css=custom_css,
-    title="$ LLAMA TERMINAL"
-) as demo:
-    gr.Markdown(
-        """
-        # $ LLAMA TERMINAL
-        ```
-        > System Online | Neural Network Active
-        > Type your query below...
-        ```
-        """
-    )
     chatbot = gr.Chatbot(height=600)
     with gr.Row():
-        msg = gr.Textbox(
-            placeholder="$ Enter command...",
-            show_label=False,
-            scale=8,
-            container=False
-        )
         submit = gr.Button("SEND", scale=1, variant="primary")
     gr.Examples(
@@ -201,16 +95,13 @@ with gr.Blocks(
             "What is the capital of France?",
             "Explain quantum computing",
             "Write fibonacci in Python",
-            "Optimize sleep patterns",
-            "Continue: 2, 4, 6, 8...",
             "Write a haiku about AI",
         ],
         inputs=msg
     )
-    clear = gr.ClearButton([msg, chatbot], value="CLEAR")
-    # Event handlers
     submit.click(chat, [msg, chatbot], [chatbot, msg])
     msg.submit(chat, [msg, chatbot], [chatbot, msg])
@@ -221,4 +112,4 @@ if __name__ == "__main__":
         server_name="0.0.0.0",
         server_port=7860,
         show_error=True
-    )

 import gradio as gr
+import subprocess
+import sys
 import os
+# Install llama-cpp-python at runtime if missing (fixes HF build issues)
+try:
+    from llama_cpp import Llama
+    print("llama-cpp-python already installed.")
+except ImportError:
+    print("Installing llama-cpp-python (runtime fix for HF Spaces)...")
+    subprocess.check_call([
+        sys.executable, "-m", "pip", "install", "--no-cache-dir",
+        "https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.89/llama_cpp_python-0.2.89-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl"
+    ])
+    from llama_cpp import Llama
+from huggingface_hub import hf_hub_download
+# === CHANGE THESE TO YOUR FINE-TUNED MODEL ONCE UPLOADED ===
+MODEL_NAME = "TheBloke/Llama-2-7B-Chat-GGUF"           # ← replace later
+MODEL_FILE = "llama-2-7b-chat.Q4_K_M.gguf"             # ← replace later
+print("Downloading model from HuggingFace...")
 model_path = hf_hub_download(
     repo_id=MODEL_NAME,
     filename=MODEL_FILE,
+    local_dir="./models",
+    local_dir_use_symlinks=False
 )
+print(f"Model downloaded: {model_path}")
+print("Loading model into memory...")
 llm = Llama(
     model_path=model_path,
+    n_ctx=4096,
+    n_threads=8,
     n_gpu_layers=0,
+    n_batch=512,
     verbose=False
 )
+print("Model loaded successfully!")
 def chat(message, history):
+    if not message.strip():
+        return history, ""
+    messages = [{"role": "system", "content": "You are a helpful AI assistant."}]
     for user_msg, bot_msg in history:
+        messages.append({"role": "user", "content": user_msg})
+        if bot_msg:
+            messages.append({"role": "assistant", "content": bot_msg})
+    messages.append({"role": "user", "content": message})
+    response = llm.create_chat_completion(
+        messages=messages,
         max_tokens=512,
         temperature=0.7,
         top_p=0.9,
+        stop=["User:", "\nUser:", "</s>"],
+        stream=False
     )
+    bot_response = response['choices'][0]['message']['content'].strip()
     history.append((message, bot_response))
     return history, ""
+# === Your awesome CSS (unchanged) ===
 custom_css = """
 @import url('https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@400;700&family=Source+Code+Pro:wght@400;600&display=swap');
+body, .gradio-container { background: #0c0c0c !important; font-family: 'JetBrains Mono', monospace !important; }
+.gradio-container { max-width: 1400px !important; border: 1px solid #00ff00 !important; box-shadow: 0 0 10px rgba(0,255,0,0.3) !important; }
+*, h1, h2, h3, label, p { color: #00ff00 !important; }
+.message { background: #1a1a1a !important; border-left: 3px solid #00ff00 !important; padding: 12px !important; }
+.user { border-left-color: #00cc00 !important; }
+input, textarea { background: #1a1a1a !important; border: 1px solid #00ff00 !important; color: #00ff00 !important; }
+button { background: #1a1a1a !important; border: 1px solid #00ff00 !important; color: #00ff00 !important; }
+button:hover { background: #00ff00 !important; color: #000 !important; }
+.primary { background: #00ff00 !important; color: #000 !important; }
+footer { display: none !important; }
+::-webkit-scrollbar { width: 8px; background: #0c0c0c; }
+::-webkit-scrollbar-thumb { background: #00ff00; }
 """
+with gr.Blocks(theme=gr.themes.Base(primary_hue="green"), css=custom_css, title="$ LLAMA TERMINAL") as demo:
+    gr.Markdown("# $ LLAMA TERMINAL\n```\n> System Online | Neural Network Active\n> Type your query below...\n```")
     chatbot = gr.Chatbot(height=600)
     with gr.Row():
+        msg = gr.Textbox(placeholder="$ Enter command...", show_label=False, scale=8, container=False)
         submit = gr.Button("SEND", scale=1, variant="primary")
     gr.Examples(
             "What is the capital of France?",
             "Explain quantum computing",
             "Write fibonacci in Python",
             "Write a haiku about AI",
         ],
         inputs=msg
     )
+    gr.ClearButton([msg, chatbot], value="CLEAR")
     submit.click(chat, [msg, chatbot], [chatbot, msg])
     msg.submit(chat, [msg, chatbot], [chatbot, msg])
         server_name="0.0.0.0",
         server_port=7860,
         show_error=True
+    )

requirements.txt CHANGED Viewed

@@ -1,5 +1,3 @@
 gradio==4.44.1
 huggingface_hub==0.25.2
-gradio-client==0.17.0
-# Direct working wheel — builds in <60 seconds
-https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.89/llama_cpp_python-0.2.89-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

 gradio==4.44.1
 huggingface_hub==0.25.2
+gradio-client==0.17.0