Spaces:

Duplicated from JasonGordon/Q6

azimpolcu
/

Q6

Build error

App Files Files Community

Q6 / app.py

JasonGordon's picture

Create app.py

715abbd verified 5 months ago

history blame contribute delete

766 Bytes

	from llama_cpp import Llama
	import gradio as gr

	# Load the model from the repo (downloads on startup, uses runtime storage)
	llm = Llama.from_pretrained(
	repo_id="QuantFactory/DarkIdol-Llama-3.1-8B-Instruct-1.2-Uncensored-GGUF",
	filename="DarkIdol-Llama-3.1-8B-Instruct-1.2-Uncensored.Q6_K.gguf", # Use Q6 for good quality; change if needed
	n_ctx=2048, # For longer conversations
	n_gpu_layers=-1 # Offload to GPU if you upgrade hardware
	)

	def chat(user_input, history):
	messages = [{"role": "user", "content": user_input}]
	output = llm.create_chat_completion(messages, max_tokens=256, temperature=0.8)
	return output['choices'][0]['message']['content']

	demo = gr.ChatInterface(fn=chat, title="Q6 Uncensored Voice Agent")
	demo.launch()