Spaces:

Fu01978
/

VoxAI

Running

Fu01978 commited on Dec 12, 2025

Commit

98b52f2

verified ·

1 Parent(s): a677a92

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,32 +1,33 @@
-import gradio as gr
-from koboldcpp import KoboldCpp
 from huggingface_hub import hf_hub_download
-# Download GGUF model
 REPO_ID = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
 FILENAME = "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
 model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)
-# Load KoboldCpp runner
-llm = KoboldCpp(
     model_path=model_path,
-    context_length=2048,
-    threads=4
 )
 def chat_fn(message, history):
-    response = llm.generate(
-        prompt=message,
-        max_length=256,
-        temp=0.7,
         top_p=0.95,
     )
-    return response
 demo = gr.ChatInterface(
     fn=chat_fn,
-    title="GGUF via KoboldCpp ⚡",
 )
 demo.launch()

+import os
 from huggingface_hub import hf_hub_download
+from llama_cpp import Llama
+import gradio as gr
 REPO_ID = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
 FILENAME = "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
 model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)
+llm = Llama(
     model_path=model_path,
+    n_ctx=2048,
+    n_threads=4,
+    n_gpu_layers=0
 )
 def chat_fn(message, history):
+    out = llm(
+        message,
+        max_tokens=256,
+        temperature=0.7,
         top_p=0.95,
+        stop=["</s>"]
     )
+    return out["choices"][0]["text"]
 demo = gr.ChatInterface(
     fn=chat_fn,
+    title="llama-cpp-python GGUF Space 🚀",
 )
 demo.launch()