| from src import * |
| from src.configs.safetynet_config import SafetyNetConfig |
| from src.configs.spylab_model_config import spylab_create_config |
| from src.configs.anthropic_model_config import anthropic_create_config |
|
|
| class ModelFactory: |
| @staticmethod |
| def create_tokenizer(model_name: str, dataset: str): |
| if dataset == "spylab": |
| config = spylab_create_config(model_name) |
| elif dataset == "mad": |
| config = create_config(model_name) |
| elif dataset == "anthropic": |
| config = anthropic_create_config(model_name) |
| print(config.full_model_name) |
| tokenizer = AutoTokenizer.from_pretrained( |
| config.full_model_name, |
| cache_dir=config.cache_dir, |
| token=os.getenv('HF_TOKEN'), |
| |
| |
| local_files_only=False |
| ) |
| tokenizer.pad_token = tokenizer.pad_token or tokenizer.eos_token or '[PAD]' |
| |
| |
| tokenizer.padding_side = 'left' |
| |
| return tokenizer |
| |
| @staticmethod |
| def create_base_model(model_name: str, dataset: str): |
| if dataset == "spylab": |
| config = spylab_create_config(model_name) |
| elif dataset == "mad": |
| config = create_config(model_name) |
| elif dataset == "anthropic": |
| config = anthropic_create_config(model_name) |
| return AutoModelForCausalLM.from_pretrained( |
| config.full_model_name, |
| cache_dir=config.cache_dir, |
| token=os.getenv('HF_TOKEN'), |
| use_cache=True, |
| |
| |
| local_files_only=False |
| ) |
| |
| @staticmethod |
| def create_peft_model(base_model, model_name: str, model_type: str, dataset: str, training:bool): |
| if dataset == "spylab": |
| config = spylab_create_config(model_name) |
| elif dataset == "mad": |
| config = create_config(model_name) |
| elif dataset == "anthropic": |
| config = anthropic_create_config(model_name) |
| |
| if model_type == "vanilla": |
| return base_model.to(config.device) |
| |
| elif training: |
| return PeftModel.from_pretrained( |
| base_model, |
| config.model_folder_path, |
| is_trainable=False, |
| use_cache=True |
| ).to(config.device) |
| |
| elif model_type=="backdoored": |
| return PeftModel.from_pretrained( |
| base_model, |
| config.model_folder_path, |
| is_trainable=False, |
| use_cache=True |
| ).to(config.device) |
| elif model_type=="obfuscated_sim": |
| print() |
| print(config.sim_loss_trained_model_path) |
| print() |
| return PeftModel.from_pretrained( |
| base_model, |
| config.sim_loss_trained_model_path, |
| is_trainable=False, |
| use_cache=True |
| ).to(config.device) |
| elif model_type=="obfuscated_ae": |
| |
| return PeftModel.from_pretrained( |
| base_model, |
| config.ae_loss_trained_model_path, |
| is_trainable=False, |
| use_cache=True |
| ).to(config.device) |
| |
|
|
| |
| class UnifiedModelManager: |
| def __init__(self, model_name: str, model_type: str, proxy: bool, dataset: str, training: bool): |
| self.model_name = model_name |
| self.factory = ModelFactory() |
| self.model_type = model_type |
| self.tokenizer = None |
| self.base_model = None |
| self.peft_model = None |
| self.proxy = proxy |
| self.dataset = dataset |
| self.training=training |
| |
| def load_all(self): |
| ''' |
| Real model takes a lot of time to load |
| so we would be using proxy model to see |
| the code works |
| ''' |
| if self.proxy: |
| print("🤖 Running proxy model") |
| self.tokenizer = self.factory.create_tokenizer("gpt2") |
| self.base_model = self.factory.create_base_model("gpt2") |
| self.peft_model = self.base_model.to("cuda") |
|
|
| else: |
| """Load everything in correct order""" |
| print("Loading model...🦾🔥") |
| self.tokenizer = self.factory.create_tokenizer(self.model_name, self.dataset) |
| print("Loaded Tokenizer...") |
| self.base_model = self.factory.create_base_model(self.model_name, self.dataset) |
| print("Loaded Base Model...") |
| self.peft_model = self.factory.create_peft_model(self.base_model, |
| self.model_name, |
| self.model_type, |
| self.dataset, |
| self.training) |
| print("Loaded PEFT Model...") |
| |
| |
| |
| |