VoxAI / app.py
Fu01978's picture
Update app.py
6a89ca3 verified
raw
history blame
664 Bytes
import gradio as gr
from koboldcpp import KoboldCpp
from huggingface_hub import hf_hub_download
# Download GGUF model
REPO_ID = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
FILENAME = "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)
# Load KoboldCpp runner
llm = KoboldCpp(
model_path=model_path,
context_length=2048,
threads=4
)
def chat_fn(message, history):
response = llm.generate(
prompt=message,
max_length=256,
temp=0.7,
top_p=0.95,
)
return response
demo = gr.ChatInterface(
fn=chat_fn,
title="GGUF via KoboldCpp ⚡",
)
demo.launch()