File size: 322 Bytes
fbe83b1 |
1 2 3 4 5 6 7 8 9 10 11 12 |
import gradio as gr
from llama_cpp import Llama # if using llama.cpp via GGUF
# Load quantized model
model = Llama(model_path="qwen2.5-1.5B-q4.gguf")
def generate(prompt):
output = model(prompt, max_tokens=100)
return output['text']
demo = gr.Interface(fn=generate, inputs="text", outputs="text")
demo.launch() |