Spaces:

Marcus719
/

ID2223_Lab2

Sleeping

App Files Files Community

Marcus719 commited on Dec 6, 2025

Commit

c380681

verified ·

1 Parent(s): 3d6744f

Update app.py

Browse files

Files changed (1) hide show

app.py +105 -94

app.py CHANGED Viewed

@@ -4,22 +4,21 @@ import time
 from huggingface_hub import snapshot_download
 import gradio as gr
-# Attempt to import llama_cpp, handle failure gracefully in UI
 try:
     from llama_cpp import Llama
 except Exception as e:
     Llama = None
     Llama_import_error = e
-# ---------- Configuration ----------
-# ★★★ Replace with your model repository if needed ★★★
-MODEL_REPO = "Marcus719/Llama-3.2-3B-Instruct-FineTome-Lab2-GGUF"
 GGUF_FILENAME = "unsloth.Q4_K_M.gguf"
-DEFAULT_N_CTX = 2048  # Context Window
 DEFAULT_MAX_TOKENS = 256 # Default generation length
-DEFAULT_N_THREADS = 2 # Recommended for free CPU tier
 # ------------------------------
 def log(msg: str):
@@ -27,178 +26,190 @@ def log(msg: str):
 def load_model_from_hub(repo_id: str, filename: str, n_ctx=DEFAULT_N_CTX, n_threads=DEFAULT_N_THREADS):
     if Llama is None:
-        raise RuntimeError(f"llama-cpp-python not installed: {Llama_import_error}")
-    log(f"Downloading model: {repo_id} / {filename} ...")
-    # Download specific GGUF file
     local_dir = snapshot_download(
         repo_id=repo_id,
         allow_patterns=[filename],
-        local_dir_use_symlinks=False
     )
     gguf_path = os.path.join(local_dir, filename)
-    # Fallback search if path joining fails
     if not os.path.exists(gguf_path):
         for root, dirs, files in os.walk(local_dir):
             if filename in files:
                 gguf_path = os.path.join(root, filename)
                 break
-    if not os.path.exists(gguf_path):
-        raise FileNotFoundError(f"Could not find {filename} in {local_dir}")
     log(f"Model path: {gguf_path}. Loading into memory...")
-    # Initialize Llama
     llm = Llama(model_path=gguf_path, n_ctx=n_ctx, n_threads=n_threads, verbose=False)
     log("Llama model loaded successfully!")
     return llm, gguf_path
 def init_model(state):
-    """Callback for Load Model button"""
     try:
         if state.get("llm") is not None:
-            return "Ready (Model already loaded)", state
-        log("Load request received...")
         llm, gguf_path = load_model_from_hub(MODEL_REPO, GGUF_FILENAME)
         state["llm"] = llm
         state["gguf_path"] = gguf_path
-        return "Ready", state
     except Exception as exc:
         tb = traceback.format_exc()
-        log(f"Init Error: {exc}\n{tb}")
-        return f"Load Failed: {exc}", state
 def generate_response(prompt: str, max_tokens: int, state):
-    """Callback for Generate button"""
     try:
         if not prompt or prompt.strip() == "":
-            return "Please enter a prompt.", "Idle", state
-        # Auto-load if not initialized
         if state.get("llm") is None:
             try:
-                log("Model not found, auto-loading...")
                 llm, gguf_path = load_model_from_hub(MODEL_REPO, GGUF_FILENAME)
                 state["llm"] = llm
                 state["gguf_path"] = gguf_path
             except Exception as e:
-                return f"Model Load Error: {e}", f"Error", state
         llm = state.get("llm")
-        log(f"Generating (Prompt len={len(prompt)})...")
-        # Llama 3 Prompt Format
         system_prompt = "You are a helpful AI assistant."
-        full_prompt = (
-            f"<|start_header_id|>system<|end_header_id|>\n\n{system_prompt}<|eot_id|>"
-            f"<|start_header_id|>user<|end_header_id|>\n\n{prompt}<|eot_id|>"
-            f"<|start_header_id|>assistant<|end_header_id|>\n\n"
-        )
         output = llm(
             full_prompt,
             max_tokens=max_tokens,
-            stop=["<|eot_id|>"],
             echo=False
         )
         text = output['choices'][0]['text']
         log("Generation complete.")
-        return text, "Done", state
     except Exception as exc:
         tb = traceback.format_exc()
         log(f"Generation Error: {exc}\n{tb}")
-        return f"Runtime Error: {exc}", f"Exception", state
 def soft_clear(current_state):
-    """Clear text only, keep model in memory"""
-    status = "Ready" if current_state.get("llm") else "Not initialized"
-    return "", status, current_state
-# ---------------- Gradio UI ----------------
-# Removed css and theme arguments for compatibility
-with gr.Blocks(title="Llama 3.2 Lab Interface") as demo:
     # Header
     with gr.Row():
-        with gr.Column(scale=5):
-            gr.Markdown("## Llama 3.2 (3B) Instruct - Lab Interface")
-            gr.Markdown(f"Running **{GGUF_FILENAME}** on CPU via `llama.cpp`.")
-        with gr.Column(scale=1, min_width=150):
-            status_label = gr.Label(value="Not initialized", label="Status", show_label=False)
-    gr.Markdown("---")
-    # Main Layout
     with gr.Row():
-        # LEFT: Controls
-        with gr.Column(scale=1):
-            prompt_in = gr.Textbox(
-                lines=8,
-                label="Input Prompt",
-                placeholder="Type your question here (e.g., Explain Quantum Mechanics)...",
-                elem_id="prompt-input"
-            )
-            with gr.Accordion("Settings", open=False):
-                max_tokens = gr.Slider(
-                    minimum=16,
-                    maximum=1024,
-                    step=16,
-                    value=DEFAULT_MAX_TOKENS,
-                    label="Max New Tokens",
-                    info="Higher values take longer to generate."
                 )
-            with gr.Row():
-                init_btn = gr.Button("Download & Load Model", variant="secondary")
-                clear_btn = gr.Button("Clear", variant="stop")
-            gen_btn = gr.Button("Generate Response", variant="primary")
-        # RIGHT: Output
-        with gr.Column(scale=1):
-            # Removed 'show_copy_button=True' to fix TypeError
             output_txt = gr.Textbox(
-                label="Model Response",
-                lines=14,
-                interactive=False
             )
     # Footer
-    gr.Markdown(
-        "**Note:** First run requires downloading ~2GB model. Inference runs on CPU and may be slow."
-    )
-    # State Management
-    state = gr.State({"llm": None, "gguf_path": None})
-    # Event Handlers
     init_btn.click(
         fn=init_model,
         inputs=state,
-        outputs=[status_label, state],
         show_progress=True
     )
     gen_btn.click(
         fn=generate_response,
         inputs=[prompt_in, max_tokens, state],
-        outputs=[output_txt, status_label, state],
         show_progress=True
     )
-    clear_btn.click(fn=soft_clear, inputs=[state], outputs=[prompt_in, status_label, state])
     clear_btn.click(lambda: "", outputs=[output_txt])
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=7860)

 from huggingface_hub import snapshot_download
 import gradio as gr
+# Attempt to import llama_cpp, if failed, prompt in the UI
 try:
     from llama_cpp import Llama
 except Exception as e:
     Llama = None
     Llama_import_error = e
+# ---------- Configuration Area ----------
+# ★★★ Please change this to your model repository ★★★
+MODEL_REPO = "Marcus719/Llama-3.2-1B-Compare1-Lab2-GGUF"
+# Specify to download only the q4_k_m file to prevent running out of disk space
 GGUF_FILENAME = "unsloth.Q4_K_M.gguf"
+DEFAULT_N_CTX = 2048  # Context length
 DEFAULT_MAX_TOKENS = 256 # Default generation length
+DEFAULT_N_THREADS = 2 # Recommended 2 for free CPU tier
 # ------------------------------
 def log(msg: str):
 def load_model_from_hub(repo_id: str, filename: str, n_ctx=DEFAULT_N_CTX, n_threads=DEFAULT_N_THREADS):
     if Llama is None:
+        raise RuntimeError(f"llama-cpp-python not installed or failed to load: {Llama_import_error}")
+    log(f"Starting model download: {repo_id} / {filename} ...")
+    # Use snapshot_download to download a single file
+    # allow_patterns ensures only the GGUF file is downloaded
     local_dir = snapshot_download(
         repo_id=repo_id,
         allow_patterns=[filename],
+        local_dir_use_symlinks=False # Disabling symlinks for stability in Spaces
     )
+    # Construct full path
+    # snapshot_download usually preserves directory structure, otherwise we search
     gguf_path = os.path.join(local_dir, filename)
+    # Search for the file if direct path fails (for robustness)
     if not os.path.exists(gguf_path):
         for root, dirs, files in os.walk(local_dir):
             if filename in files:
                 gguf_path = os.path.join(root, filename)
                 break
+        if not os.path.exists(gguf_path):
+            raise FileNotFoundError(f"Could not find {filename} in {local_dir}")
     log(f"Model path: {gguf_path}. Loading into memory...")
+    # Initialize the model
     llm = Llama(model_path=gguf_path, n_ctx=n_ctx, n_threads=n_threads, verbose=False)
     log("Llama model loaded successfully!")
     return llm, gguf_path
 def init_model(state):
+    """Callback function for the Load button"""
     try:
         if state.get("llm") is not None:
+            return state
+        log("Received load request...")
+        # Download and load
         llm, gguf_path = load_model_from_hub(MODEL_REPO, GGUF_FILENAME)
+        # Update state
         state["llm"] = llm
         state["gguf_path"] = gguf_path
+        return state
     except Exception as exc:
         tb = traceback.format_exc()
+        log(f"Initialization Error: {exc}\n{tb}")
+        return state
 def generate_response(prompt: str, max_tokens: int, state):
+    """Callback function for the Generate button"""
     try:
         if not prompt or prompt.strip() == "":
+            return "Please enter an instruction.", state
+        # Lazy loading: attempt to auto-load if Generate is clicked without explicit initialization
         if state.get("llm") is None:
             try:
+                log("Model not detected, attempting auto-load...")
                 llm, gguf_path = load_model_from_hub(MODEL_REPO, GGUF_FILENAME)
                 state["llm"] = llm
                 state["gguf_path"] = gguf_path
             except Exception as e:
+                return f"Model Load Failed: {e}", state
         llm = state.get("llm")
+        log(f"Generating (Prompt Length={len(prompt)})...")
+        # Construct Llama 3 format Prompt
         system_prompt = "You are a helpful AI assistant."
+        # Simple concatenation: System + User
+        # For strict formatting, use tokenizer.apply_chat_template
+        # Using simple text concatenation here for generality, Llama 3 usually understands
+        full_prompt = f"<|start_header_id|>system<|end_header_id|>\n\n{system_prompt}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
+        # Inference
         output = llm(
             full_prompt,
             max_tokens=max_tokens,
+            stop=["<|eot_id|>"], # Stop token
             echo=False
         )
         text = output['choices'][0]['text']
         log("Generation complete.")
+        return text, state
     except Exception as exc:
         tb = traceback.format_exc()
         log(f"Generation Error: {exc}\n{tb}")
+        return f"Runtime Error: {exc}", state
 def soft_clear(current_state):
+    """Clear button: only clears text, keeps the model loaded"""
+    return "", current_state
+# ---------------- Gradio UI Construction ----------------
+# Theme settings
+theme = gr.themes.Soft(
+    primary_hue="indigo",
+    secondary_hue="slate",
+    neutral_hue="slate")
+# Custom CSS
+custom_css = """.footer-text { font-size: 0.8em; color: gray; text-align: center; }"""
+with gr.Blocks(title="Llama 3.2 Lab2 Project") as demo:
     # Header
     with gr.Row():
+        gr.Markdown("# Llama 3.2 (1B) Fine-Tuned Chatbot")
+        gr.Markdown(
+            f"""
+            **ID2223 Lab 2 Project** | Fine-tuned on **FineTome-100k**.
+            Running on CPU (GGUF 4-bit) | Model: `{MODEL_REPO}`
+            """
+        )
+    # Main layout
     with gr.Row():
+        # Left: Input and Controls
+        with gr.Column(scale=4):
+            with gr.Group():
+                prompt_in = gr.Textbox(
+                    lines=5,
+                    label="User Instruction (User Input)",
+                    placeholder="e.g., Explain Quantum Mechanics...",
+                    elem_id="prompt-input"
                 )
+                with gr.Accordion("Advanced Parameters", open=False):
+                    max_tokens = gr.Slider(
+                        minimum=16,
+                        maximum=1024,
+                        step=16,
+                        value=DEFAULT_MAX_TOKENS,
+                        label="Max Generation Length (Max Tokens)",
+                        info="Longer generations will take more CPU time."
+                    )
+                with gr.Row():
+                    init_btn = gr.Button("1. Load Model", variant="secondary")
+                    gen_btn = gr.Button("2. Generate Response", variant="primary")
+                clear_btn = gr.Button("Clear Chat", variant="stop")
+        # Right: Output Display
+        with gr.Column(scale=6):
             output_txt = gr.Textbox(
+                label="Model Response (Response)",
+                lines=15,
             )
     # Footer
+    with gr.Row():
+        gr.Markdown(
+            "*Note: Inference runs on a free CPU, so speed may be slow. The model (approx. 2GB) must be downloaded on first run, please be patient.*",
+            elem_classes=["footer-text"]
+        )
+    # State storage
+    state = gr.State({"llm": None, "gguf_path": None, "status": "Not initialized"})
+    # Event binding
     init_btn.click(
         fn=init_model,
         inputs=state,
+        outputs=[state],
         show_progress=True
     )
     gen_btn.click(
         fn=generate_response,
         inputs=[prompt_in, max_tokens, state],
+        outputs=[output_txt, state],
         show_progress=True
     )
+    clear_btn.click(fn=soft_clear, inputs=[state], outputs=[prompt_in, state])
     clear_btn.click(lambda: "", outputs=[output_txt])
+# Launch the application
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=7860)