File size: 1,748 Bytes
c00d91a 2f610ae c00d91a 2f610ae c00d91a 2f610ae c00d91a 2f610ae c00d91a 2f610ae c00d91a 2f610ae c00d91a 2f610ae c00d91a 2f610ae c00d91a 2f610ae c00d91a 2f610ae c00d91a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 | import json
import os
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
# --- Config ---
MODEL_NAME = "DSDUDEd/Cass-Beta1.3" # or "DSDUDEd/Dave"
MEMORY_FILE = "cass_memory.json"
MAX_MEMORY = 50 # max number of message pairs to keep
# --- Load model and tokenizer ---
print("Loading model...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
# --- Load memory ---
if os.path.exists(MEMORY_FILE):
with open(MEMORY_FILE, "r") as f:
memory = json.load(f)
else:
memory = []
def save_memory():
with open(MEMORY_FILE, "w") as f:
json.dump(memory[-MAX_MEMORY:], f, indent=2)
# --- Chat function ---
def chat_with_ai(user_input):
# Build context from memory
context = " ".join([f"User: {u} AI: {c}" for u, c in memory])
input_text = context + f" User: {user_input} AI:"
inputs = tokenizer(input_text, return_tensors="pt").to(device)
outputs = model.generate(
**inputs,
max_length=150,
do_sample=True,
temperature=0.8,
top_p=0.9,
pad_token_id=tokenizer.eos_token_id
)
reply = tokenizer.decode(outputs[0], skip_special_tokens=True)
new_reply = reply.split("AI:")[-1].strip()
# Add to memory and save
memory.append((user_input, new_reply))
save_memory()
return memory, memory
# --- Gradio Interface ---
with gr.Blocks() as demo:
chatbot = gr.Chatbot(value=memory)
msg = gr.Textbox(label="You")
clear = gr.Button("Clear")
msg.submit(chat_with_ai, [msg], [chatbot])
clear.click(lambda: [], None, chatbot)
demo.launch()
|