Genie-AI / app.py
Genie-AI-Lab's picture
Update app.py
f37a8c9 verified
raw
history blame contribute delete
568 Bytes
import gradio as gr
from huggingface_hub import hf_hub_download
import subprocess
import os
# Download our GGUF model
model_path = hf_hub_download(
repo_id="Genie-AI-Lab/Omni-Genie",
filename="Qwen2.5-3B-Instruct.Q4_0.gguf"
)
def chat(message, history):
# Use llama.cpp to run inference on our model
result = subprocess.run([
"python", "-m", "llama_cpp.server",
"--model", model_path,
"--n_ctx", "2048"
], input=message.encode(), capture_output=True)
return result.stdout.decode()
gr.ChatInterface(chat).launch()