| import gradio as gr | |
| from llama_cpp import Llama | |
| llm = Llama( | |
| model_path="MKLLM-7B-Instruct-Q4_0.gguf", | |
| n_ctx=2048 | |
| ) | |
| def chat(message, history): | |
| response = llm.create_completion( | |
| f"USER: {message}\nASSISTANT:", | |
| max_tokens=512, | |
| temperature=0.7 | |
| ) | |
| return response['choices'][0]['text'] | |
| demo = gr.ChatInterface( | |
| chat, | |
| title="MKLLM Chat", | |
| ) | |
| demo.launch() | |