File size: 401 Bytes
88b5c49
fbe83b1
 
88b5c49
 
fbe83b1
88b5c49
 
 
 
 
 
 
 
 
fbe83b1
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
from llama_cpp import Llama
import gradio as gr

# Load your quantized GGUF model
llm = Llama(model_path="/app/models/qwen2.5-1.5B-q4.gguf")

def generate_text(prompt):
    output = llm(prompt, max_tokens=200)
    return output['choices'][0]['text']

demo = gr.Interface(
    fn=generate_text,
    inputs=gr.Textbox(lines=2, placeholder="Type your prompt here..."),
    outputs="text"
)

demo.launch()