Spaces:

oscarrgkth
/

lab2

Build error

App Files Files Community

gusreinaos commited on 16 days ago

Commit

1a7c573

1 Parent(s): 7000311

Fixed

Browse files

Files changed (1) hide show

app.py +30 -22

app.py CHANGED Viewed

@@ -3,43 +3,51 @@ import subprocess
 import sys
 import os
-# Install llama-cpp-python at runtime if missing (fixes HF build issues)
 try:
     from llama_cpp import Llama
     print("llama-cpp-python already installed.")
 except ImportError:
-    print("Installing llama-cpp-python (runtime fix for HF Spaces)...")
-    subprocess.check_call([
-        sys.executable, "-m", "pip", "install", "--no-cache-dir",
-        "https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.89/llama_cpp_python-0.2.89-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl"
-    ])
-    from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
-# === CHANGE THESE TO YOUR FINE-TUNED MODEL ONCE UPLOADED ===
-MODEL_NAME = "TheBloke/Llama-2-7B-Chat-GGUF"           # ← replace later
-MODEL_FILE = "llama-2-7b-chat.Q4_K_M.gguf"             # ← replace later
-print("Downloading model from HuggingFace...")
 model_path = hf_hub_download(
-    repo_id=MODEL_NAME,
     filename=MODEL_FILE,
     local_dir="./models",
     local_dir_use_symlinks=False
 )
-print(f"Model downloaded: {model_path}")
-print("Loading model into memory...")
 llm = Llama(
     model_path=model_path,
-    n_ctx=4096,
     n_threads=8,
-    n_gpu_layers=0,
     n_batch=512,
     verbose=False
 )
-print("Model loaded successfully!")
 def chat(message, history):
     if not message.strip():
@@ -57,15 +65,15 @@ def chat(message, history):
         max_tokens=512,
         temperature=0.7,
         top_p=0.9,
-        stop=["User:", "\nUser:", "</s>"],
         stream=False
     )
-    bot_response = response['choices'][0]['message']['content'].strip()
     history.append((message, bot_response))
     return history, ""
-# === Your awesome CSS (unchanged) ===
 custom_css = """
 @import url('https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@400;700&family=Source+Code+Pro:wght@400;600&display=swap');
 body, .gradio-container { background: #0c0c0c !important; font-family: 'JetBrains Mono', monospace !important; }
@@ -85,7 +93,7 @@ footer { display: none !important; }
 with gr.Blocks(theme=gr.themes.Base(primary_hue="green"), css=custom_css, title="$ LLAMA TERMINAL") as demo:
     gr.Markdown("# $ LLAMA TERMINAL\n```\n> System Online | Neural Network Active\n> Type your query below...\n```")
     chatbot = gr.Chatbot(height=600)
     with gr.Row():
         msg = gr.Textbox(placeholder="$ Enter command...", show_label=False, scale=8, container=False)
         submit = gr.Button("SEND", scale=1, variant="primary")
@@ -99,7 +107,7 @@ with gr.Blocks(theme=gr.themes.Base(primary_hue="green"), css=custom_css, title=
         ],
         inputs=msg
     )
     gr.ClearButton([msg, chatbot], value="CLEAR")
     submit.click(chat, [msg, chatbot], [chatbot, msg])

 import sys
 import os
+# === RUNTIME INSTALL OF llama-cpp-python (fixes all HF issues) ===
 try:
     from llama_cpp import Llama
     print("llama-cpp-python already installed.")
 except ImportError:
+    print("Installing llama-cpp-python (fast CPU wheel)...")
+    try:
+        subprocess.check_call([
+            sys.executable, "-m", "pip", "install", "--no-cache-dir",
+            "https://github.com/yownas/llama-cpp-python-wheels/releases/download/v0.3.16/llama_cpp_python-0.3.16+cpuavx-cp310-cp310-linux_x86_64.whl"
+        ])
+        print("llama-cpp-python installed from wheel.")
+    except Exception as e:  # <-- fixed: added "as e" so it doesn't crash
+        print("Wheel failed → falling back to PyPI (2–4 min)...")
+        subprocess.check_call([
+            sys.executable, "-m", "pip", "install", "--no-cache-dir",
+            "llama-cpp-python==0.3.16", "--force-reinstall"
+        ])
+    from llama_cpp import Llama  # <-- fixed: must be inside the except block!
 from huggingface_hub import hf_hub_download
+# === YOUR FINE-TUNED LLAMA 3.2 3B ===
+MODEL_REPO = "your-username/your-model-repo"           # CHANGE THIS
+MODEL_FILE = "your-finetuned-llama-3.2-3b-q4_k_m.gguf"  # CHANGE THIS
+print("Downloading your fine-tuned Llama 3.2 3B model...")
 model_path = hf_hub_download(
+    repo_id=MODEL_REPO,
     filename=MODEL_FILE,
     local_dir="./models",
     local_dir_use_symlinks=False
 )
+print(f"Model ready: {model_path}")
+print("Loading model (Llama 3.2 3B)...")
 llm = Llama(
     model_path=model_path,
+    n_ctx=8192,
     n_threads=8,
     n_batch=512,
+    n_gpu_layers=0,
     verbose=False
 )
+print("Model loaded!")
 def chat(message, history):
     if not message.strip():
         max_tokens=512,
         temperature=0.7,
         top_p=0.9,
+        stop=["<|eot_id|>", "<|end_of_text|>"],  # <-- Llama 3.2 stop tokens
         stream=False
     )
+    bot_response = response["choices"][0]["message"]["content"].strip()
     history.append((message, bot_response))
     return history, ""
+# === CSS & INTERFACE (unchanged, perfect) ===
 custom_css = """
 @import url('https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@400;700&family=Source+Code+Pro:wght@400;600&display=swap');
 body, .gradio-container { background: #0c0c0c !important; font-family: 'JetBrains Mono', monospace !important; }
 with gr.Blocks(theme=gr.themes.Base(primary_hue="green"), css=custom_css, title="$ LLAMA TERMINAL") as demo:
     gr.Markdown("# $ LLAMA TERMINAL\n```\n> System Online | Neural Network Active\n> Type your query below...\n```")
     chatbot = gr.Chatbot(height=600)
     with gr.Row():
         msg = gr.Textbox(placeholder="$ Enter command...", show_label=False, scale=8, container=False)
         submit = gr.Button("SEND", scale=1, variant="primary")
         ],
         inputs=msg
     )
     gr.ClearButton([msg, chatbot], value="CLEAR")
     submit.click(chat, [msg, chatbot], [chatbot, msg])