File size: 1,031 Bytes
118ab46
8d2c89a
118ab46
8d2c89a
 
 
 
 
 
 
 
 
118ab46
 
 
 
 
 
 
 
193aa50
8d2c89a
193aa50
 
 
8d2c89a
 
193aa50
8d2c89a
118ab46
 
 
a9ba985
 
118ab46
 
3377d0d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import gradio as gr
from llama_cpp import Llama

# Load model locally - this will take a few minutes on first startup
llm = Llama.from_pretrained(
    repo_id="bartowski/Llama-3-8B-Lexi-Uncensored-GGUF",
    filename="*Q4_K_M.gguf",  # 4-bit quantization for CPU
    n_ctx=4096,
    n_threads=4,
    n_gpu_layers=0,
    verbose=False
)

def chat(message, history):
    messages = []
    for human, assistant in history:
        messages.append({"role": "user", "content": human})
        messages.append({"role": "assistant", "content": assistant})
    messages.append({"role": "user", "content": message})
    
    try:
        response = llm.create_chat_completion(
            messages=messages,
            max_tokens=512,
            temperature=0.7,
        )
        return response["choices"][0]["message"]["content"]
    except Exception as e:
        return f"Error: {str(e)}"

demo = gr.ChatInterface(
    chat,
    title="AI Chat",
    description="Context maintained during session, resets on refresh"
)

demo.launch()