ChatBot / app.py
Goated121's picture
Create app.py
fbe83b1 verified
raw
history blame
322 Bytes
import gradio as gr
from llama_cpp import Llama # if using llama.cpp via GGUF
# Load quantized model
model = Llama(model_path="qwen2.5-1.5B-q4.gguf")
def generate(prompt):
output = model(prompt, max_tokens=100)
return output['text']
demo = gr.Interface(fn=generate, inputs="text", outputs="text")
demo.launch()