Spaces:

Emil-Matteus
/

Iris

Sleeping

App Files Files Community

BlazerApp commited on Nov 30, 2025

Commit

e839314

1 Parent(s): 5eee29c

added model selection

Browse files

Files changed (1) hide show

app.py +75 -15

app.py CHANGED Viewed

@@ -2,36 +2,88 @@ import gradio as gr
 from huggingface_hub import hf_hub_download
 from llama_cpp import Llama
-# Download the GGUF model from the new Organization Hub
-model_path = hf_hub_download(
-    repo_id="Emil-Matteus/llama-32-1b",
-    filename="llama-3.2-1b-instruct.Q4_K_M.gguf"
-)
-# Initialize the local Llama model
-# n_gpu_layers=0 forces CPU usage. n_ctx sets context window.
-llm = Llama(
-    model_path=model_path,
-    n_gpu_layers=0,
-    n_ctx=4096,
-    verbose=False
-)
 def respond(
     message,
     history: list[dict[str, str]],
     system_message,
     max_tokens,
     temperature,
     top_p,
 ):
     messages = [{"role": "system", "content": system_message}]
     messages.extend(history)
     messages.append({"role": "user", "content": message})
     response = ""
-    # Generate response using the local model
     completion = llm.create_chat_completion(
         messages=messages,
         max_tokens=max_tokens,
@@ -46,6 +98,7 @@ def respond(
             response += token
             yield response
 """
 For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
 """
@@ -53,6 +106,13 @@ chatbot = gr.ChatInterface(
     respond,
     type="messages",
     additional_inputs=[
         gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
         gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
@@ -67,4 +127,4 @@ chatbot = gr.ChatInterface(
 )
 if __name__ == "__main__":
-    chatbot.launch()

 from huggingface_hub import hf_hub_download
 from llama_cpp import Llama
+# --- Configuration ---
+# Define available models: Label -> (Repo ID, GGUF Filename)
+MODELS = {
+    "Llama-3.2-1B": {
+        "repo_id": "Emil-Matteus/llama-32-1b",
+        "filename": "llama-3.2-1b-instruct.Q4_K_M.gguf"
+    },
+    "Llama-3.2-3B": {
+        "repo_id": "Emil-Matteus/llama-3B_model-GGUF",
+        "filename": "llama-3B-Q4_K_M.gguf"
+    }
+}
+# Global state to hold the currently loaded model
+current_model_name = None
+llm = None
+def load_model(model_name):
+    """
+    Loads the specified model into memory, unloading the previous one.
+    """
+    global llm, current_model_name
+    # If this model is already loaded, do nothing
+    if llm is not None and current_model_name == model_name:
+        return llm
+    print(f"Loading new model: {model_name}...")
+    if model_name not in MODELS:
+        raise ValueError(f"Unknown model: {model_name}")
+    repo_id = MODELS[model_name]["repo_id"]
+    filename = MODELS[model_name]["filename"]
+    try:
+        model_path = hf_hub_download(
+            repo_id=repo_id,
+            filename=filename
+        )
+        # Initialize Llama model (n_gpu_layers=0 for CPU)
+        # n_ctx=4096 gives a decent context window
+        llm = Llama(
+            model_path=model_path,
+            n_gpu_layers=0,
+            n_ctx=4096,
+            verbose=True
+        )
+        current_model_name = model_name
+        print(f"Successfully loaded {model_name}")
+        return llm
+    except Exception as e:
+        print(f"Error loading model {model_name}: {e}")
+        raise e
 def respond(
     message,
     history: list[dict[str, str]],
+    model_selection,  # First additional input (Dropdown)
     system_message,
     max_tokens,
     temperature,
     top_p,
 ):
+    global llm
+    # Ensure the correct model is loaded
+    try:
+        load_model(model_selection)
+    except Exception as e:
+        yield f"Error loading model '{model_selection}': {str(e)}. Please check if the model has been uploaded to Hugging Face."
+        return
     messages = [{"role": "system", "content": system_message}]
     messages.extend(history)
     messages.append({"role": "user", "content": message})
     response = ""
+    # Generate response
     completion = llm.create_chat_completion(
         messages=messages,
         max_tokens=max_tokens,
             response += token
             yield response
+# --- UI Setup ---
 """
 For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
 """
     respond,
     type="messages",
     additional_inputs=[
+        # Model Selector Dropdown
+        gr.Dropdown(
+            choices=list(MODELS.keys()),
+            value="Llama-3.2-1B",
+            label="Select Model",
+            info="Switching models will take a few seconds to download/load."
+        ),
         gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
         gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
 )
 if __name__ == "__main__":
+    chatbot.launch()