| | from llama_cpp import Llama |
| | import gradio as gr |
| |
|
| | |
| | llm = Llama.from_pretrained( |
| | repo_id="QuantFactory/DarkIdol-Llama-3.1-8B-Instruct-1.2-Uncensored-GGUF", |
| | filename="DarkIdol-Llama-3.1-8B-Instruct-1.2-Uncensored.Q6_K.gguf", |
| | n_ctx=2048, |
| | n_gpu_layers=-1 |
| | ) |
| |
|
| | def chat(user_input, history): |
| | messages = [{"role": "user", "content": user_input}] |
| | output = llm.create_chat_completion(messages, max_tokens=256, temperature=0.8) |
| | return output['choices'][0]['message']['content'] |
| |
|
| | demo = gr.ChatInterface(fn=chat, title="Q6 Uncensored Voice Agent") |
| | demo.launch() |