Spaces:
Running
Running
File size: 664 Bytes
7825d39 6a89ca3 cdd9448 6a89ca3 3ad9149 6a89ca3 fc580f0 d19644f 6a89ca3 cdd9448 6a89ca3 cdd9448 fc580f0 6a89ca3 fc580f0 7825d39 6a89ca3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 |
import gradio as gr
from koboldcpp import KoboldCpp
from huggingface_hub import hf_hub_download
# Download GGUF model
REPO_ID = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
FILENAME = "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)
# Load KoboldCpp runner
llm = KoboldCpp(
model_path=model_path,
context_length=2048,
threads=4
)
def chat_fn(message, history):
response = llm.generate(
prompt=message,
max_length=256,
temp=0.7,
top_p=0.95,
)
return response
demo = gr.ChatInterface(
fn=chat_fn,
title="GGUF via KoboldCpp ⚡",
)
demo.launch()
|