Spaces:
Running
Running
| import gradio as gr | |
| from koboldcpp import KoboldCpp | |
| from huggingface_hub import hf_hub_download | |
| # Download GGUF model | |
| REPO_ID = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF" | |
| FILENAME = "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf" | |
| model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME) | |
| # Load KoboldCpp runner | |
| llm = KoboldCpp( | |
| model_path=model_path, | |
| context_length=2048, | |
| threads=4 | |
| ) | |
| def chat_fn(message, history): | |
| response = llm.generate( | |
| prompt=message, | |
| max_length=256, | |
| temp=0.7, | |
| top_p=0.95, | |
| ) | |
| return response | |
| demo = gr.ChatInterface( | |
| fn=chat_fn, | |
| title="GGUF via KoboldCpp ⚡", | |
| ) | |
| demo.launch() | |