Spaces:
Sleeping
Sleeping
| import wandb | |
| import yaml | |
| from transformers import AutoModelForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments | |
| from data.datasets import load_and_tokenize_data | |
| from utils.monitor import measure_resources | |
| # Charger la configuration | |
| with open('config/config.yaml', 'r') as f: | |
| config = yaml.safe_load(f) | |
| # Initialiser wandb | |
| wandb.init(project=config['wandb']['project'], entity=config['wandb']['entity']) | |
| # Charger les donn�es | |
| train_dataset, test_dataset = load_and_tokenize_data(config) | |
| def evaluate_model(model_name): | |
| # Charger le mod�le et le tokenizer | |
| model = AutoModelForSequenceClassification.from_pretrained(model_name) | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| # Tokenizer les donn�es | |
| train_dataset = train_dataset.map(lambda x: tokenizer(x['text'], padding='max_length', truncation=True), batched=True) | |
| test_dataset = test_dataset.map(lambda x: tokenizer(x['text'], padding='max_length', truncation=True), batched=True) | |
| # D�finir les arguments de formation | |
| training_args = TrainingArguments( | |
| output_dir=f'./results/{model_name}', | |
| num_train_epochs=config['training']['num_epochs'], | |
| per_device_train_batch_size=config['training']['batch_size'], | |
| per_device_eval_batch_size=config['training']['batch_size'], | |
| evaluation_strategy='epoch', | |
| save_steps=10_000, | |
| save_total_limit=2, | |
| logging_dir='./logs', | |
| logging_steps=10, | |
| ) | |
| # Cr�er le Trainer | |
| trainer = Trainer( | |
| model=model, | |
| args=training_args, | |
| train_dataset=train_dataset, | |
| eval_dataset=test_dataset, | |
| ) | |
| # Mesurer les ressources et �valuer le mod�le | |
| peak_memory, training_time = measure_resources(trainer, model_name) | |
| # �valuation des performances | |
| metrics = trainer.evaluate() | |
| wandb.log({ | |
| 'model_name': model_name, | |
| 'peak_memory_MB': peak_memory, | |
| 'training_time_seconds': training_time, | |
| **metrics | |
| }) | |
| # �valuer chaque mod�le | |
| for model_name in config['evaluation']['models']: | |
| evaluate_model(model_name) | |