| {"output_dir": "output", "run_name": "training_crossingguard_l", "num_epochs": 2, "batch_size": 64, "eval_batch_size": 64, "gradient_accumulation_steps": 4, "optim": "adamw_torch_fused", "learning_rate": 3e-06, "lr_scheduler_type": "cosine_with_min_lr", "lr_scheduler_kwargs": {"min_lr": 1e-06}, "warmup_ratio": 0.1, "max_grad_norm": 5.0, "weight_decay": 0.01, "adam_beta2": 0.998, "adam_epsilon": 1e-08, "label_smoothing_factor": 0.0, "bf16": true, "fp16": false, "eval_strategy": "steps", "evaluation_steps": 125, "load_best_model_at_end": true, "metric_for_best_model": "eval_loss", "save_strategy": "steps", "save_steps": 125, "save_total_limit": 2, "logging_steps": 10, "enable_llrd": false, "llrd_decay": 0.97, "use_tensorboard": true, "use_wandb": false, "wandb_project": "", "wandb_entity": "", "push_to_hub": false, "hub_model_id": ""} |