CASS / app.py
DSDUDEd's picture
Update app.py
b9a8c57 verified
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
class CASS:
def __init__(self, model_name="HPLT/gpt-13b-nordic-prerelease", device=None):
self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
print(f"Loading model on {self.device}...")
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
self.model = AutoModelForCausalLM.from_pretrained(
model_name,
device_map="auto",
torch_dtype=torch.float16 # Use FP16 for memory efficiency
)
# Conversation memory
self.history = []
self.introduced = False
self.user_memory = {}
self.mood = "friendly"
# Cass's persona
self.persona = (
"You are Cass, a friendly AI assistant and a cool friend. "
"You respond casually, helpfully, and with a touch of humor. "
"You adapt your tone depending on conversation mood, and sometimes use emojis or playful expressions."
)
def update_mood(self, user_message):
msg = user_message.lower()
if any(word in msg for word in ["sad", "unhappy", "bad", "tired"]):
self.mood = "supportive"
elif any(word in msg for word in ["funny", "joke", "lol"]):
self.mood = "playful"
else:
self.mood = "friendly"
def chat(self, user_message, max_new_tokens=120, temperature=0.7):
self.update_mood(user_message)
# Add introduction if first message
if not self.introduced:
intro = (
"Hi, my name's Cass! I'm your AI assistant and a cool friend. "
"I love chatting, helping, and making you smile 😊"
)
self.history.append({"role": "assistant", "content": intro})
self.introduced = True
# Add user message
self.history.append({"role": "user", "content": user_message})
# Build prompt string from persona + history
memory_str = " ".join([f"{k}: {v}" for k, v in self.user_memory.items()])
prompt = f"{self.persona}\nCurrent mood: {self.mood}\nUser memory: {memory_str}\n\n"
for msg in self.history:
prompt += f"{msg['role']}: {msg['content']}\n"
prompt += "assistant:"
# Tokenize and generate
inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device)
outputs = self.model.generate(
**inputs,
max_new_tokens=max_new_tokens,
do_sample=True,
temperature=temperature,
pad_token_id=self.tokenizer.eos_token_id
)
# Decode
response = self.tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True)
# Save assistant message
self.history.append({"role": "assistant", "content": response})
return response
def remember(self, key, value):
"""Store user info in memory."""
self.user_memory[key] = value
print(f"Memory updated: {key} = {value}")
def reset_history(self):
"""Clear conversation history and reset introduction."""
self.history = []
self.introduced = False
self.mood = "friendly"
print("Conversation history cleared.")
# Example usage
cass = CASS()
print(cass.chat("Hello!")) # Cass introduces itself
print(cass.chat("Can you tell me a joke?"))
cass.remember("favorite color", "blue")
print(cass.chat("Do you remember my favorite color?"))
print(cass.chat("I'm feeling a bit sad today."))