Spaces:

Fu01978
/

VoxAI

Running

VoxAI / app.py

Update app.py

6a89ca3 verified 21 days ago

664 Bytes

	import gradio as gr
	from koboldcpp import KoboldCpp
	from huggingface_hub import hf_hub_download

	# Download GGUF model
	REPO_ID = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
	FILENAME = "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"

	model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)

	# Load KoboldCpp runner
	llm = KoboldCpp(
	model_path=model_path,
	context_length=2048,
	threads=4
	)

	def chat_fn(message, history):
	response = llm.generate(
	prompt=message,
	max_length=256,
	temp=0.7,
	top_p=0.95,
	)
	return response

	demo = gr.ChatInterface(
	fn=chat_fn,
	title="GGUF via KoboldCpp ⚡",
	)

	demo.launch()