File size: 568 Bytes
d4e1b88
f37a8c9
 
 
d4e1b88
f37a8c9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d4e1b88
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
import gradio as gr
from huggingface_hub import hf_hub_download
import subprocess
import os

# Download our GGUF model
model_path = hf_hub_download(
    repo_id="Genie-AI-Lab/Omni-Genie",
    filename="Qwen2.5-3B-Instruct.Q4_0.gguf"
)

def chat(message, history):
    # Use llama.cpp to run inference on our model
    result = subprocess.run([
        "python", "-m", "llama_cpp.server",
        "--model", model_path,
        "--n_ctx", "2048"
    ], input=message.encode(), capture_output=True)
    
    return result.stdout.decode()

gr.ChatInterface(chat).launch()