Spaces:

Datangtang
/

iris

Sleeping

App Files Files Community

Datangtang commited on Dec 4, 2025

Commit

3570e52

verified ·

1 Parent(s): 258c757

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -66

app.py CHANGED Viewed

@@ -3,39 +3,34 @@ from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
 import os
-# ----------------------------------------
-# Global model cache
-# ----------------------------------------
-loaded_models = {}   # Cache loaded Llama models
-current_model_name = None
 MODEL_CONFIGS = {
-    "1B Model (Datangtang/GGUF1B)": {
         "repo_id": "Datangtang/GGUF1B",
         "filename": "llama-3.2-1b-instruct.Q4_K_M.gguf"
     },
-    "3B Model (Datangtang/GGUF3B)": {
-        "repo_id": "Datangtang/GGGF3B",
         "filename": "llama-3.2-3b-instruct.Q4_K_M.gguf"
     }
 }
-# ----------------------------------------
-# Load model function
-# ----------------------------------------
-def load_model(model_choice):
-    global loaded_models, current_model_name
-    if model_choice in loaded_models:
-        print(f"Reusing already loaded model: {model_choice}")
-        current_model_name = model_choice
-        return loaded_models[model_choice]
-    print(f"Downloading model: {model_choice}")
-    cfg = MODEL_CONFIGS[model_choice]
     model_path = hf_hub_download(
         repo_id=cfg["repo_id"],
         filename=cfg["filename"],
@@ -43,9 +38,7 @@ def load_model(model_choice):
         token=os.environ["HF_TOKEN"]
     )
-    print(f"Model downloaded to: {model_path}")
-    print("Loading GGUF model into memory...")
     llm = Llama(
         model_path=model_path,
         n_ctx=1024,
@@ -54,75 +47,76 @@ def load_model(model_choice):
         n_gpu_layers=0,
         use_mmap=True,
         use_mlock=True,
-        verbose=False,
     )
-    loaded_models[model_choice] = llm
-    current_model_name = model_choice
-    print("Model loaded successfully!")
     return llm
-# ----------------------------------------
 # Chat function
-# ----------------------------------------
-def chat(message, history, model_choice):
-    llm = load_model(model_choice)
-    # System prompt
-    conversation = "System: You are a helpful assistant.\n"
-    # Add last 3 messages
-    for human, assistant in history[-3:]:
-        conversation += f"User: {human}\nAssistant: {assistant}\n"
-    conversation += f"User: {message}\nAssistant:"
-    response = llm(
-        conversation,
         max_tokens=128,
         temperature=0.7,
         top_p=0.9,
         top_k=40,
         repeat_penalty=1.1,
         stop=["User:", "Assistant:"],
-        echo=False
     )
-    return response["choices"][0]["text"].strip()
-# ----------------------------------------
 # Gradio UI
-# ----------------------------------------
 with gr.Blocks() as demo:
-    gr.Markdown("# 🦙 Datangtang GGUF Model Demo")
-    gr.Markdown("Switch between **1B** and **3B** GGUF models in real-time.")
-    model_choice = gr.Dropdown(
-        label="Select Model",
-        choices=list(MODEL_CONFIGS.keys()),
-        value="1B Model (Datangtang/GGUF1B)",
     )
-    chat_iface = gr.ChatInterface(
-        fn=lambda message, history: chat(message, history, model_choice.value),
-        examples=[
-            "Explain deep learning in one paragraph.",
-            "What is the difference between supervised and unsupervised learning?",
-            "Explain what a transformer model is.",
-        ],
-        cache_examples=False,
-    )
-    model_choice.change(
-        fn=lambda x: f"🔄 Switched to: {x}",
-        inputs=[model_choice],
-        outputs=[],
     )
-if __name__ == "__main__":
-    demo.launch()

 from huggingface_hub import hf_hub_download
 import os
+# ------------------------------
+# Model configuration
+# ------------------------------
 MODEL_CONFIGS = {
+    "1B Model": {
         "repo_id": "Datangtang/GGUF1B",
         "filename": "llama-3.2-1b-instruct.Q4_K_M.gguf"
     },
+    "3B Model": {
+        "repo_id": "Datangtang/GGUF3B",
         "filename": "llama-3.2-3b-instruct.Q4_K_M.gguf"
     }
 }
+# Model cache
+loaded_models = {}
+# ------------------------------
+# Load model safely
+# ------------------------------
+def load_model(model_name):
+    if model_name in loaded_models:
+        return loaded_models[model_name]
+    cfg = MODEL_CONFIGS[model_name]
+    print(f"Downloading {model_name} ...")
     model_path = hf_hub_download(
         repo_id=cfg["repo_id"],
         filename=cfg["filename"],
         token=os.environ["HF_TOKEN"]
     )
+    print(f"Loading {model_name} ...")
     llm = Llama(
         model_path=model_path,
         n_ctx=1024,
         n_gpu_layers=0,
         use_mmap=True,
         use_mlock=True,
+        verbose=False
     )
+    loaded_models[model_name] = llm
     return llm
+# ------------------------------
 # Chat function
+# ------------------------------
+def chat_func(message, history, model_name):
+    llm = load_model(model_name)
+    # ------------------------------
+    # Build prompt
+    # ------------------------------
+    prompt = "System: You are a helpful assistant.\n"
+    for user, bot in history[-3:]:
+        prompt += f"User: {user}\nAssistant: {bot}\n"
+    prompt += f"User: {message}\nAssistant:"
+    # ------------------------------
+    # Model inference
+    # ------------------------------
+    output = llm(
+        prompt,
         max_tokens=128,
         temperature=0.7,
         top_p=0.9,
         top_k=40,
         repeat_penalty=1.1,
         stop=["User:", "Assistant:"],
     )
+    answer = output["choices"][0]["text"]
+    return answer
+# ------------------------------
 # Gradio UI
+# ------------------------------
 with gr.Blocks() as demo:
+    gr.Markdown("## 🦙 Datangtang Multi-Model GGUF Chat")
+    model_selector = gr.Dropdown(
+        label="Choose model",
+        choices=["1B Model", "3B Model"],
+        value="1B Model"
     )
+    chatbot = gr.Chatbot()
+    msg_box = gr.Textbox(label="Message")
+    def user_send(message, history):
+        history = history + [[message, None]]
+        return history, ""
+    def bot_reply(history, model_name):
+        user_msg = history[-1][0]
+        bot_msg = chat_func(user_msg, history[:-1], model_name)
+        history[-1][1] = bot_msg
+        return history
+    msg_box.submit(user_send, [msg_box, chatbot], [chatbot, msg_box]).then(
+        bot_reply, [chatbot, model_selector], chatbot
     )
+demo.launch()