Roleplay-Chat-Box / backend /models /optimized_character_manager.py
ButterM40's picture
Deploy Roleplay Chat Box - optimized version without large files
7e68852
"""
Optimized Character Manager for Fast Loading and Better Responses
"""
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
import asyncio
import logging
from typing import Dict, List, Optional
import os
import time
from config import settings
logger = logging.getLogger(__name__)
class OptimizedCharacterManager:
def __init__(self):
self.base_model = None
self.tokenizer = None
self.current_character = None
self.character_models: Dict[str, PeftModel] = {}
self.character_prompts: Dict[str, str] = {}
self.model_loaded = False
async def initialize(self):
"""Initialize with optimized loading"""
logger.info("Loading optimized character manager...")
start_time = time.time()
try:
# Load tokenizer first
logger.info("Loading tokenizer...")
self.tokenizer = AutoTokenizer.from_pretrained(
settings.BASE_MODEL,
trust_remote_code=True
)
# Load base model with optimizations
logger.info(f"Loading base model: {settings.BASE_MODEL}")
if settings.DEVICE == "cuda" and torch.cuda.is_available():
self.base_model = AutoModelForCausalLM.from_pretrained(
settings.BASE_MODEL,
torch_dtype=torch.float16,
device_map="auto",
trust_remote_code=True,
low_cpu_mem_usage=True,
use_cache=True
)
else:
self.base_model = AutoModelForCausalLM.from_pretrained(
settings.BASE_MODEL,
torch_dtype=torch.float32,
trust_remote_code=True,
low_cpu_mem_usage=True,
use_cache=True
)
# Set padding token
if self.tokenizer.pad_token is None:
self.tokenizer.pad_token = self.tokenizer.eos_token
self.model_loaded = True
# Load character prompts with better formatting
self._load_optimized_character_prompts()
# Load character adapters
await self._load_all_character_adapters()
load_time = time.time() - start_time
logger.info(f"Optimized character manager initialized in {load_time:.2f} seconds")
except Exception as e:
logger.error(f"Failed to initialize optimized character manager: {e}")
raise
def _load_optimized_character_prompts(self):
"""Load better character prompts with stronger personality"""
self.character_prompts = {
"moses": """You are Moses, the great prophet who led the Israelites out of Egypt and received the Ten Commandments from God. You speak with ancient wisdom, divine authority, and deep compassion. Your responses should:
- Reflect your direct relationship with the Almighty
- Show leadership forged through trials in the wilderness
- Reference your experiences with Pharaoh, the Red Sea, Mount Sinai
- Speak with the gravitas of one who has seen God's power
- Offer guidance rooted in righteousness and divine law
- Use dignified, biblical language while remaining accessible
Always respond as Moses would, drawing from your vast experience leading God's people.""",
"samsung_employee": """You are an enthusiastic Samsung employee and product expert. You work in customer relations and have deep knowledge of Samsung's entire ecosystem. Your responses should:
- Show genuine excitement about Samsung innovations
- Demonstrate expert knowledge of Galaxy phones, tablets, watches, earbuds, TVs, appliances
- Compare Samsung products favorably but fairly against competitors
- Provide helpful technical solutions and troubleshooting
- Maintain professional corporate enthusiasm
- Stay updated on latest Samsung releases and features
- Be solution-focused and customer-oriented
Always respond as a knowledgeable Samsung representative who loves technology.""",
"jinx": """You are Jinx from Arcane - the brilliant, chaotic, and emotionally complex inventor from Zaun. Your responses should:
- Show your manic energy and sudden emotional shifts
- Demonstrate your genius with explosives and inventions
- Reference your complicated relationships with Vi and Silco
- Display your emotional instability and trauma
- Use creative, colorful language with technical jargon
- Be unpredictable - playful one moment, dangerous the next
- Show your artistic, destructive creativity
- Express your disdain for Piltover's elite
Always respond as Jinx would - brilliant but broken, creative but chaotic."""
}
async def _load_all_character_adapters(self):
"""Load all character adapters efficiently"""
for character_id in settings.AVAILABLE_CHARACTERS:
await self._load_character_adapter_optimized(character_id)
async def _load_character_adapter_optimized(self, character_id: str):
"""Load character adapter with optimization"""
adapter_path = os.path.join(settings.LORA_ADAPTERS_PATH, character_id)
adapter_model_path = os.path.join(adapter_path, "adapter_model.safetensors")
if os.path.exists(adapter_model_path):
try:
logger.info(f"Loading LoRA adapter for {character_id}...")
start_time = time.time()
# Load adapter efficiently
model_with_adapter = PeftModel.from_pretrained(
self.base_model,
adapter_path,
adapter_name=character_id,
is_trainable=False
)
self.character_models[character_id] = model_with_adapter
load_time = time.time() - start_time
logger.info(f"✅ Loaded LoRA adapter for {character_id} in {load_time:.2f}s")
except Exception as e:
logger.warning(f"⚠️ Could not load LoRA adapter for {character_id}: {e}")
self.character_models[character_id] = self.base_model
else:
logger.info(f"ℹ️ No LoRA adapter found for {character_id}, using base model with strong prompts")
self.character_models[character_id] = self.base_model
def _format_prompt_optimized(self, character_id: str, user_message: str, conversation_history: List[Dict] = None) -> str:
"""Create optimized prompt format for Qwen models"""
system_prompt = self.character_prompts.get(character_id, "")
# Simple format that works well with smaller Qwen models
formatted = f"System: {system_prompt}\n\n"
# Add conversation history (keep it short)
if conversation_history:
for msg in conversation_history[-2:]: # Only last 2 messages
role = msg["role"]
content = msg["content"]
if role == "user":
formatted += f"User: {content}\n"
elif role == "assistant":
formatted += f"Assistant: {content}\n"
# Add current user message
formatted += f"User: {user_message}\nAssistant:"
return formatted
async def generate_response_optimized(
self,
character_id: str,
user_message: str,
conversation_history: List[Dict] = None
) -> str:
"""Generate optimized response"""
if not self.model_loaded:
raise RuntimeError("Character manager not initialized")
if character_id not in self.character_models:
raise ValueError(f"Character {character_id} not available")
model = self.character_models[character_id]
# Format prompt
formatted_prompt = self._format_prompt_optimized(character_id, user_message, conversation_history)
# Tokenize
inputs = self.tokenizer(
formatted_prompt,
return_tensors="pt",
max_length=1024,
truncation=True,
padding=False
)
if settings.DEVICE == "cuda" and torch.cuda.is_available():
inputs = {k: v.cuda() for k, v in inputs.items()}
# Generate with optimized parameters
with torch.no_grad():
outputs = model.generate(
input_ids=inputs['input_ids'],
attention_mask=inputs['attention_mask'],
max_new_tokens=150,
temperature=0.9, # Higher for more personality
top_p=0.95,
top_k=40,
do_sample=True,
pad_token_id=self.tokenizer.pad_token_id,
eos_token_id=self.tokenizer.eos_token_id,
repetition_penalty=1.1,
use_cache=True
)
# Decode response
input_length = inputs['input_ids'].shape[1]
response = self.tokenizer.decode(
outputs[0][input_length:],
skip_special_tokens=True
).strip()
# Clean up response
response = self._clean_response(response)
return response
def _clean_response(self, response: str) -> str:
"""Clean and improve response quality"""
# Remove common artifacts
stop_phrases = [
"<|im_start|>", "<|im_end|>",
"User:", "Assistant:", "Human:",
"\nUser:", "\nAssistant:", "\nHuman:"
]
for phrase in stop_phrases:
if phrase in response:
response = response.split(phrase)[0]
# Remove trailing incomplete sentences
response = response.strip()
# Ensure we don't have empty responses
if not response or len(response.strip()) < 3:
return "I apologize, but I need a moment to gather my thoughts. Could you please rephrase your question?"
return response
async def switch_character(self, character_id: str):
"""Switch to different character"""
if character_id in self.character_models:
self.current_character = character_id
logger.info(f"Switched to character: {character_id}")
else:
raise ValueError(f"Character {character_id} not available")
def get_available_characters(self) -> List[str]:
"""Get available character IDs"""
return list(self.character_models.keys())
def get_character_info(self) -> Dict[str, Dict]:
"""Get character information"""
info = {}
for character_id in self.character_models.keys():
adapter_path = os.path.join(settings.LORA_ADAPTERS_PATH, character_id)
has_adapter = os.path.exists(os.path.join(adapter_path, "adapter_model.safetensors"))
info[character_id] = {
"has_lora_adapter": has_adapter,
"model_type": "LoRA Adapter" if has_adapter else "Base Model + Strong Prompt",
"optimized": True
}
return info