File size: 401 Bytes
88b5c49 fbe83b1 88b5c49 fbe83b1 88b5c49 fbe83b1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 |
from llama_cpp import Llama
import gradio as gr
# Load your quantized GGUF model
llm = Llama(model_path="/app/models/qwen2.5-1.5B-q4.gguf")
def generate_text(prompt):
output = llm(prompt, max_tokens=200)
return output['choices'][0]['text']
demo = gr.Interface(
fn=generate_text,
inputs=gr.Textbox(lines=2, placeholder="Type your prompt here..."),
outputs="text"
)
demo.launch() |