| from llama_cpp import Llama | |
| import gradio as gr | |
| # Load your quantized GGUF model | |
| llm = Llama(model_path="/app/models/qwen2.5-1.5B-q4.gguf") | |
| def generate_text(prompt): | |
| output = llm(prompt, max_tokens=200) | |
| return output['choices'][0]['text'] | |
| demo = gr.Interface( | |
| fn=generate_text, | |
| inputs=gr.Textbox(lines=2, placeholder="Type your prompt here..."), | |
| outputs="text" | |
| ) | |
| demo.launch() |