| from llama_cpp import Llama | |
| import gradio as gr | |
| model = Llama( | |
| model_path="qwen2.5-1.5B-q4.gguf", | |
| n_ctx=4096, | |
| n_gpu_layers=0, | |
| ) | |
| def chat(prompt): | |
| out = model( | |
| prompt, | |
| max_tokens=256, | |
| temperature=0.7, | |
| ) | |
| return out["choices"][0]["text"] | |
| gr.Interface( | |
| fn=chat, | |
| inputs="text", | |
| outputs="text", | |
| title="Qwen2.5-1.5B Q4 Chatbot" | |
| ).launch() |