import wandb import yaml from transformers import AutoModelForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments from data.datasets import load_and_tokenize_data from utils.monitor import measure_resources # Charger la configuration with open('config/config.yaml', 'r') as f: config = yaml.safe_load(f) # Initialiser wandb wandb.init(project=config['wandb']['project'], entity=config['wandb']['entity']) # Charger les données train_dataset, test_dataset = load_and_tokenize_data(config) def evaluate_model(model_name): # Charger le modèle et le tokenizer model = AutoModelForSequenceClassification.from_pretrained(model_name) tokenizer = AutoTokenizer.from_pretrained(model_name) # Tokenizer les données train_dataset = train_dataset.map(lambda x: tokenizer(x['text'], padding='max_length', truncation=True), batched=True) test_dataset = test_dataset.map(lambda x: tokenizer(x['text'], padding='max_length', truncation=True), batched=True) # Définir les arguments de formation training_args = TrainingArguments( output_dir=f'./results/{model_name}', num_train_epochs=config['training']['num_epochs'], per_device_train_batch_size=config['training']['batch_size'], per_device_eval_batch_size=config['training']['batch_size'], evaluation_strategy='epoch', save_steps=10_000, save_total_limit=2, logging_dir='./logs', logging_steps=10, ) # Créer le Trainer trainer = Trainer( model=model, args=training_args, train_dataset=train_dataset, eval_dataset=test_dataset, ) # Mesurer les ressources et évaluer le modèle peak_memory, training_time = measure_resources(trainer, model_name) # Évaluation des performances metrics = trainer.evaluate() wandb.log({ 'model_name': model_name, 'peak_memory_MB': peak_memory, 'training_time_seconds': training_time, **metrics }) # Évaluer chaque modèle for model_name in config['evaluation']['models']: evaluate_model(model_name)