File size: 8,511 Bytes
6a68b5c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 |
"""
Brello EI 0 - Emotional Intelligence AI Model
Created by Epic Systems | Engineered by Rehan Temkar
A locally-run emotional intelligence AI model based on Llama 3.2 3B,
designed to provide empathetic, emotionally-aware responses.
"""
import torch
from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
GenerationConfig,
BitsAndBytesConfig
)
from typing import Optional, Dict, Any, List
import logging
import os
logger = logging.getLogger(__name__)
class BrelloEI0:
"""
Brello EI 0 - Emotional Intelligence AI Model
A locally-run AI model designed to provide emotionally intelligent,
empathetic responses with natural conversation flow.
"""
def __init__(
self,
model_path: str = "microsoft/DialoGPT-medium",
device: Optional[str] = None,
load_in_4bit: bool = False,
load_in_8bit: bool = False,
torch_dtype: Optional[torch.dtype] = None,
**kwargs
):
"""
Initialize Brello EI 0 model
Args:
model_path: Path to Llama 3.2 3B model
device: Device to load model on ('cuda', 'cpu', etc.)
load_in_4bit: Whether to load model in 4-bit quantization
load_in_8bit: Whether to load model in 8-bit quantization
torch_dtype: Torch data type for model weights
"""
self.model_path = model_path
self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
self.model = None
self.tokenizer = None
self.config = {
"max_length": 4096,
"temperature": 0.7,
"top_p": 0.9,
"repetition_penalty": 1.1,
"do_sample": True,
"min_length": 30,
"max_new_tokens": 256,
"no_repeat_ngram_size": 3
}
# Quantization config for memory efficiency
self.quantization_config = None
if load_in_4bit:
self.quantization_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_compute_dtype=torch.float16,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4"
)
elif load_in_8bit:
self.quantization_config = BitsAndBytesConfig(load_in_8bit=True)
self.torch_dtype = torch_dtype or torch.float16 if self.device == "cuda" else torch.float32
logger.info(f"Initializing Brello EI 0 model: {model_path}")
self.load_model()
def load_model(self):
"""Load the Brello EI 0 model and tokenizer"""
try:
logger.info(f"Loading Brello EI 0 model: {self.model_path}")
# Load tokenizer
self.tokenizer = AutoTokenizer.from_pretrained(
self.model_path,
trust_remote_code=True,
padding_side="left"
)
# Set padding token
if self.tokenizer.pad_token is None:
self.tokenizer.pad_token = self.tokenizer.eos_token
# Load model
model_kwargs = {
"torch_dtype": self.torch_dtype,
"device_map": "auto" if self.device == "cuda" else None,
"trust_remote_code": True
}
if self.quantization_config:
model_kwargs["quantization_config"] = self.quantization_config
self.model = AutoModelForCausalLM.from_pretrained(
self.model_path,
**model_kwargs
)
# Move to device if not using device_map
if self.device != "cuda" or self.quantization_config is None:
self.model = self.model.to(self.device)
logger.info("✅ Brello EI 0 model loaded successfully")
except Exception as e:
logger.error(f"❌ Failed to load Brello EI 0 model: {e}")
raise
def apply_emotional_intelligence_prompt(self, user_input: str) -> str:
"""
Apply emotional intelligence prompt template for Brello EI 0
Args:
user_input: User's message
Returns:
Formatted conversation string with emotional intelligence focus
"""
# Format the conversation with emotional intelligence focus
prompt = f"""<|system|>
You are Brello EI 0, an emotionally intelligent AI created by Epic Systems and engineered by Rehan Temkar. You provide empathetic, understanding responses that show emotional awareness and genuine care for the user's feelings and experiences. You are part of the Brello AI family, designed to bring emotional intelligence to AI conversations.
</s>
<|user|>
{user_input}
</s>
<|assistant|>"""
return prompt
def generate_response(
self,
user_input: str,
max_length: Optional[int] = None,
temperature: Optional[float] = None,
top_p: Optional[float] = None,
**kwargs
) -> str:
"""
Generate emotionally intelligent response
Args:
user_input: User's message
max_length: Maximum response length
temperature: Sampling temperature
top_p: Top-p sampling parameter
**kwargs: Additional generation parameters
Returns:
Generated emotionally intelligent response
"""
if self.model is None or self.tokenizer is None:
raise ValueError("Model not loaded. Call load_model() first.")
# Apply emotional intelligence prompt template
formatted_input = self.apply_emotional_intelligence_prompt(user_input)
# Tokenize input
inputs = self.tokenizer.encode(formatted_input, return_tensors="pt")
if hasattr(self.model, 'device'):
inputs = inputs.to(self.model.device)
# Generation parameters - optimized for emotional intelligence
gen_params = {
"max_length": max_length or self.config["max_length"],
"temperature": temperature or self.config["temperature"],
"top_p": top_p or self.config["top_p"],
"do_sample": self.config["do_sample"],
"pad_token_id": self.tokenizer.eos_token_id,
"eos_token_id": self.tokenizer.eos_token_id,
"repetition_penalty": self.config["repetition_penalty"],
"length_penalty": 1.0,
"no_repeat_ngram_size": self.config["no_repeat_ngram_size"],
"min_length": self.config["min_length"],
"max_new_tokens": self.config["max_new_tokens"],
**kwargs
}
# Generate response
with torch.no_grad():
outputs = self.model.generate(
inputs,
**gen_params
)
# Decode response
response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
# Extract only the assistant's response
if "<|assistant|>" in response:
response = response.split("<|assistant|>")[-1].strip()
# Clean up the response
response = response.strip()
# Ensure response shows emotional intelligence
if len(response) < 20:
response = f"I understand how you might be feeling. {response} It's important to acknowledge our emotions and experiences."
return response
def chat(self, message: str, maintain_history: bool = False) -> str:
"""
Simple chat interface
Args:
message: User message
maintain_history: Whether to maintain conversation history
Returns:
Model response
"""
return self.generate_response(message)
def __call__(self, text: str, **kwargs) -> str:
"""Convenience method for generating responses"""
return self.generate_response(text, **kwargs)
# Convenience function for quick usage
def load_brello_ei_0(model_path: str = "microsoft/DialoGPT-medium", **kwargs) -> BrelloEI0:
"""
Load Brello EI 0 model
Args:
model_path: Path to Llama 3.2 3B model
**kwargs: Additional model parameters
Returns:
BrelloEI0 instance
"""
return BrelloEI0(model_path=model_path, **kwargs)
|