Spaces:

Datangtang
/

iris

Sleeping

App Files Files Community

Datangtang commited on Dec 4, 2025

Commit

a3cfd53

verified ·

1 Parent(s): d658b72

修改bug，运行时错误

Browse files

Files changed (1) hide show

app.py +33 -39

app.py CHANGED Viewed

@@ -11,7 +11,7 @@ current_model_name = None
 MODEL_CONFIGS = {
     "1B Model (Datangtang/GGUF1B)": {
-        "repo_id": "Datangtang/GGUF1B",
         "filename": "llama-3.2-1b-instruct.Q4_K_M.gguf"
     },
     "3B Model (Datangtang/GGUF3B)": {
@@ -25,13 +25,13 @@ MODEL_CONFIGS = {
 # Load model function
 # ----------------------------------------
 def load_model(model_choice):
-    global loaded_models, current_model_name
     if model_choice in loaded_models:
         return loaded_models[model_choice]
     cfg = MODEL_CONFIGS[model_choice]
     model_path = hf_hub_download(
         repo_id=cfg["repo_id"],
         filename=cfg["filename"],
@@ -39,6 +39,7 @@ def load_model(model_choice):
         token=os.environ["HF_TOKEN"]
     )
     llm = Llama(
         model_path=model_path,
         n_ctx=1024,
@@ -47,36 +48,30 @@ def load_model(model_choice):
         n_gpu_layers=0,
         use_mmap=True,
         use_mlock=True,
-        verbose=False,
     )
     loaded_models[model_choice] = llm
-    current_model_name = model_choice
     return llm
 # ----------------------------------------
-# Chat function (Gradio 4.x message format)
 # ----------------------------------------
-def chat(messages, model_choice):
     llm = load_model(model_choice)
-    # Construct conversation
     conversation = "System: You are a helpful assistant.\n"
-    for msg in messages[-3:]:
-        role = msg["role"]
-        text = msg["content"]
-        if role == "user":
-            conversation += f"User: {text}\n"
-        elif role == "assistant":
-            conversation += f"Assistant: {text}\n"
-    conversation += "Assistant:"
-    # LLM output
     response = llm(
         conversation,
         max_tokens=128,
@@ -91,37 +86,36 @@ def chat(messages, model_choice):
 # ----------------------------------------
-# Gradio UI (Gradio 4.x messages format)
 # ----------------------------------------
 with gr.Blocks() as demo:
-    gr.Markdown("# 🦙 Datangtang GGUF Model Demo (Gradio 4.x Compatible)")
     model_choice = gr.Dropdown(
         label="Select Model",
         choices=list(MODEL_CONFIGS.keys()),
-        value="1B Model (Datangtang/GGUF1B)",
     )
-    chatbot = gr.Chatbot(label="Chat", type="messages")
     msg_box = gr.Textbox(label="Message")
-    # User sends message
-    def add_user_message(user_msg, messages):
-        messages = messages + [{"role": "user", "content": user_msg}]
-        return messages, ""
-    # Bot replies
-    def add_bot_reply(messages, model_choice):
-        reply = chat(messages, model_choice)
-        messages = messages + [{"role": "assistant", "content": reply}]
-        return messages
-    msg_box.submit(
-        add_user_message, [msg_box, chatbot], [chatbot, msg_box]
-    ).then(
-        add_bot_reply, [chatbot, model_choice], chatbot
     )
 demo.launch()

 MODEL_CONFIGS = {
     "1B Model (Datangtang/GGUF1B)": {
+        "repo_id": "Datangtang/GFUF1B",
         "filename": "llama-3.2-1b-instruct.Q4_K_M.gguf"
     },
     "3B Model (Datangtang/GGUF3B)": {
 # Load model function
 # ----------------------------------------
 def load_model(model_choice):
     if model_choice in loaded_models:
+        print(f"Reusing already loaded model: {model_choice}")
         return loaded_models[model_choice]
     cfg = MODEL_CONFIGS[model_choice]
+    print(f"Downloading model: {model_choice}")
     model_path = hf_hub_download(
         repo_id=cfg["repo_id"],
         filename=cfg["filename"],
         token=os.environ["HF_TOKEN"]
     )
+    print("Loading model into memory...")
     llm = Llama(
         model_path=model_path,
         n_ctx=1024,
         n_gpu_layers=0,
         use_mmap=True,
         use_mlock=True,
+        verbose=False
     )
     loaded_models[model_choice] = llm
+    print("Model loaded successfully!")
     return llm
 # ----------------------------------------
+# Chat function (HuggingFace-compatible)
 # ----------------------------------------
+def chat(message, history, model_choice):
     llm = load_model(model_choice)
+    # Build conversation prompt
     conversation = "System: You are a helpful assistant.\n"
+    for human, assistant in history[-3:]:
+        conversation += f"User: {human}\n"
+        if assistant:
+            conversation += f"Assistant: {assistant}\n"
+    conversation += f"User: {message}\nAssistant:"
     response = llm(
         conversation,
         max_tokens=128,
 # ----------------------------------------
+# Gradio UI
 # ----------------------------------------
 with gr.Blocks() as demo:
+    gr.Markdown("## 🦙 Datangtang GGUF Model Demo")
     model_choice = gr.Dropdown(
         label="Select Model",
         choices=list(MODEL_CONFIGS.keys()),
+        value="1B Model (Datangtang/GGUF1B)"
     )
+    chatbot = gr.Chatbot()
     msg_box = gr.Textbox(label="Message")
+    # Add user message to history
+    def user_send(message, history):
+        history = history + [[message, None]]
+        return history, ""
+    # Generate bot response
+    def bot_reply(history, model_choice):
+        user_msg = history[-1][0]
+        bot_msg = chat(user_msg, history[:-1], model_choice)
+        history[-1][1] = bot_msg
+        return history
+    # Wire events
+    msg_box.submit(user_send, [msg_box, chatbot], [chatbot, msg_box]).then(
+        bot_reply, [chatbot, model_choice], chatbot
     )
 demo.launch()