Spaces:

JasonGordon
/

Q6

Paused

JasonGordon commited on Oct 11, 2025

Commit

715abbd

verified ·

1 Parent(s): 61675ea

Create app.py

Files changed (1) hide show

app.py ADDED Viewed

+from llama_cpp import Llama
+import gradio as gr
+# Load the model from the repo (downloads on startup, uses runtime storage)
+llm = Llama.from_pretrained(
+    repo_id="QuantFactory/DarkIdol-Llama-3.1-8B-Instruct-1.2-Uncensored-GGUF",
+    filename="DarkIdol-Llama-3.1-8B-Instruct-1.2-Uncensored.Q6_K.gguf",  # Use Q6 for good quality; change if needed
+    n_ctx=2048,  # For longer conversations
+    n_gpu_layers=-1  # Offload to GPU if you upgrade hardware
+)
+def chat(user_input, history):
+    messages = [{"role": "user", "content": user_input}]
+    output = llm.create_chat_completion(messages, max_tokens=256, temperature=0.8)
+    return output['choices'][0]['message']['content']
+demo = gr.ChatInterface(fn=chat, title="Q6 Uncensored Voice Agent")
+demo.launch()