Spaces:

Datangtang
/

iris

Sleeping

App Files Files Community

Datangtang commited on Dec 4, 2025

Commit

0beeef4

verified ·

1 Parent(s): 3570e52

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -33

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
 import os
 # ------------------------------
 # Model configuration
 # ------------------------------
@@ -17,20 +18,17 @@ MODEL_CONFIGS = {
     }
 }
-# Model cache
-loaded_models = {}
-# ------------------------------
-# Load model safely
-# ------------------------------
 def load_model(model_name):
     if model_name in loaded_models:
         return loaded_models[model_name]
     cfg = MODEL_CONFIGS[model_name]
-    print(f"Downloading {model_name} ...")
     model_path = hf_hub_download(
         repo_id=cfg["repo_id"],
         filename=cfg["filename"],
@@ -38,7 +36,7 @@ def load_model(model_name):
         token=os.environ["HF_TOKEN"]
     )
-    print(f"Loading {model_name} ...")
     llm = Llama(
         model_path=model_path,
         n_ctx=1024,
@@ -47,33 +45,33 @@ def load_model(model_name):
         n_gpu_layers=0,
         use_mmap=True,
         use_mlock=True,
-        verbose=False
     )
     loaded_models[model_name] = llm
     return llm
 # ------------------------------
-# Chat function
 # ------------------------------
-def chat_func(message, history, model_name):
     llm = load_model(model_name)
-    # ------------------------------
-    # Build prompt
-    # ------------------------------
     prompt = "System: You are a helpful assistant.\n"
-    for user, bot in history[-3:]:
-        prompt += f"User: {user}\nAssistant: {bot}\n"
-    prompt += f"User: {message}\nAssistant:"
-    # ------------------------------
-    # Model inference
-    # ------------------------------
     output = llm(
         prompt,
         max_tokens=128,
@@ -84,15 +82,14 @@ def chat_func(message, history, model_name):
         stop=["User:", "Assistant:"],
     )
-    answer = output["choices"][0]["text"]
-    return answer
 # ------------------------------
 # Gradio UI
 # ------------------------------
 with gr.Blocks() as demo:
     gr.Markdown("## 🦙 Datangtang Multi-Model GGUF Chat")
     model_selector = gr.Dropdown(
@@ -101,22 +98,26 @@ with gr.Blocks() as demo:
         value="1B Model"
     )
-    chatbot = gr.Chatbot()
     msg_box = gr.Textbox(label="Message")
-    def user_send(message, history):
-        history = history + [[message, None]]
         return history, ""
-    def bot_reply(history, model_name):
-        user_msg = history[-1][0]
-        bot_msg = chat_func(user_msg, history[:-1], model_name)
-        history[-1][1] = bot_msg
         return history
-    msg_box.submit(user_send, [msg_box, chatbot], [chatbot, msg_box]).then(
-        bot_reply, [chatbot, model_selector], chatbot
     )
 demo.launch()

 from huggingface_hub import hf_hub_download
 import os
 # ------------------------------
 # Model configuration
 # ------------------------------
     }
 }
+loaded_models = {}  # Cache
 def load_model(model_name):
     if model_name in loaded_models:
+        print(f"Reusing cached model: {model_name}")
         return loaded_models[model_name]
     cfg = MODEL_CONFIGS[model_name]
+    print(f"Downloading {model_name}...")
     model_path = hf_hub_download(
         repo_id=cfg["repo_id"],
         filename=cfg["filename"],
         token=os.environ["HF_TOKEN"]
     )
+    print(f"Loading model {model_name}...")
     llm = Llama(
         model_path=model_path,
         n_ctx=1024,
         n_gpu_layers=0,
         use_mmap=True,
         use_mlock=True,
+        verbose=False,
     )
     loaded_models[model_name] = llm
+    print(f"Model {model_name} loaded successfully!")
     return llm
 # ------------------------------
+# Chat logic
 # ------------------------------
+def generate_reply(history, model_name):
     llm = load_model(model_name)
+    # Construct prompt with system + chat history
     prompt = "System: You are a helpful assistant.\n"
+    for msg in history:
+        role = msg["role"]
+        content = msg["content"]
+        if role == "user":
+            prompt += f"User: {content}\n"
+        elif role == "assistant":
+            prompt += f"Assistant: {content}\n"
+    prompt += "Assistant:"
     output = llm(
         prompt,
         max_tokens=128,
         stop=["User:", "Assistant:"],
     )
+    reply = output["choices"][0]["text"]
+    return reply.strip()
 # ------------------------------
 # Gradio UI
 # ------------------------------
 with gr.Blocks() as demo:
     gr.Markdown("## 🦙 Datangtang Multi-Model GGUF Chat")
     model_selector = gr.Dropdown(
         value="1B Model"
     )
+    chatbot = gr.Chatbot(type="messages")
     msg_box = gr.Textbox(label="Message")
+    def user_message(message, history):
+        history = history + [{"role": "user", "content": message}]
         return history, ""
+    def bot_message(history, model_name):
+        reply = generate_reply(history, model_name)
+        history = history + [{"role": "assistant", "content": reply}]
         return history
+    msg_box.submit(
+        user_message,
+        [msg_box, chatbot],
+        [chatbot, msg_box]
+    ).then(
+        bot_message,
+        [chatbot, model_selector],
+        chatbot
     )
 demo.launch()