JasonGordon commited on
Commit
715abbd
·
verified ·
1 Parent(s): 61675ea

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -0
app.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from llama_cpp import Llama
2
+ import gradio as gr
3
+
4
+ # Load the model from the repo (downloads on startup, uses runtime storage)
5
+ llm = Llama.from_pretrained(
6
+ repo_id="QuantFactory/DarkIdol-Llama-3.1-8B-Instruct-1.2-Uncensored-GGUF",
7
+ filename="DarkIdol-Llama-3.1-8B-Instruct-1.2-Uncensored.Q6_K.gguf", # Use Q6 for good quality; change if needed
8
+ n_ctx=2048, # For longer conversations
9
+ n_gpu_layers=-1 # Offload to GPU if you upgrade hardware
10
+ )
11
+
12
+ def chat(user_input, history):
13
+ messages = [{"role": "user", "content": user_input}]
14
+ output = llm.create_chat_completion(messages, max_tokens=256, temperature=0.8)
15
+ return output['choices'][0]['message']['content']
16
+
17
+ demo = gr.ChatInterface(fn=chat, title="Q6 Uncensored Voice Agent")
18
+ demo.launch()