Spaces:

Fu01978
/

VoxAI

Running

Fu01978 commited on Dec 11, 2025

Commit

f3dd530

verified ·

1 Parent(s): 6aa1120

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import gradio as gr
-from ctransformers import AutoModelForCausalLM
 import os
 # Download and load the GGUF model
@@ -14,27 +14,15 @@ if not os.path.exists(model_path):
     print("Model downloaded!")
 # Load the model
-llm = AutoModelForCausalLM.from_pretrained(
-    model_path,
-    model_type="llama",
-    context_length=2048,
-    gpu_layers=0  # Set higher if GPU available
 )
-def format_prompt(message, history):
-    """Format the conversation into Llama 3.2 chat format"""
-    prompt = ""
-    # Add chat history
-    for user_msg, bot_msg in history:
-        prompt += f"<|start_header_id|>user<|end_header_id|>\n\n{user_msg}<|eot_id|>"
-        prompt += f"<|start_header_id|>assistant<|end_header_id|>\n\n{bot_msg}<|eot_id|>"
-    # Add current message
-    prompt += f"<|start_header_id|>user<|end_header_id|>\n\n{message}<|eot_id|>"
-    prompt += "<|start_header_id|>assistant<|end_header_id|>\n\n"
-    return prompt
 def chat(message, history):
     """
@@ -44,19 +32,27 @@ def chat(message, history):
         message: Current user message
         history: List of [user_msg, bot_msg] pairs
     """
-    # Format the prompt
-    prompt = format_prompt(message, history)
     # Generate response
-    response = llm(
-        prompt,
-        max_new_tokens=512,
         temperature=0.7,
         top_p=0.9,
-        stop=["<|eot_id|>", "<|start_header_id|>"]
     )
-    return response.strip()
 # Create Gradio interface
 demo = gr.ChatInterface(

 import gradio as gr
+from llama_cpp import Llama
 import os
 # Download and load the GGUF model
     print("Model downloaded!")
 # Load the model
+print("Loading model...")
+llm = Llama(
+    model_path=model_path,
+    n_ctx=2048,
+    n_threads=4,
+    n_gpu_layers=0,
+    verbose=False
 )
+print("Model loaded!")
 def chat(message, history):
     """
         message: Current user message
         history: List of [user_msg, bot_msg] pairs
     """
+    # Build conversation with proper Llama format
+    messages = []
+    # Add chat history
+    for user_msg, bot_msg in history:
+        messages.append({"role": "user", "content": user_msg})
+        messages.append({"role": "assistant", "content": bot_msg})
+    # Add current message
+    messages.append({"role": "user", "content": message})
     # Generate response
+    response = llm.create_chat_completion(
+        messages=messages,
+        max_tokens=512,
         temperature=0.7,
         top_p=0.9,
     )
+    # Extract the assistant's response
+    return response["choices"][0]["message"]["content"]
 # Create Gradio interface
 demo = gr.ChatInterface(