Spaces:
Runtime error
Runtime error
File size: 568 Bytes
d4e1b88 f37a8c9 d4e1b88 f37a8c9 d4e1b88 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 | import gradio as gr
from huggingface_hub import hf_hub_download
import subprocess
import os
# Download our GGUF model
model_path = hf_hub_download(
repo_id="Genie-AI-Lab/Omni-Genie",
filename="Qwen2.5-3B-Instruct.Q4_0.gguf"
)
def chat(message, history):
# Use llama.cpp to run inference on our model
result = subprocess.run([
"python", "-m", "llama_cpp.server",
"--model", model_path,
"--n_ctx", "2048"
], input=message.encode(), capture_output=True)
return result.stdout.decode()
gr.ChatInterface(chat).launch() |