Input data filename: project-30-at-2024-08-27-13-17-2b8a675a_ind_theme_split_27_aug_andtitledescr.json Training date and time: 2024-08-29 02:34:02.540186 Training parameters: output_dir: ./results/Operational_Theme overwrite_output_dir: False do_train: False do_eval: True do_predict: False eval_strategy: epoch prediction_loss_only: False per_device_train_batch_size: 8 per_device_eval_batch_size: 8 per_gpu_train_batch_size: None per_gpu_eval_batch_size: None gradient_accumulation_steps: 1 eval_accumulation_steps: None eval_delay: 0 learning_rate: 2.6273363081029872e-05 weight_decay: 0.10476604556438612 adam_beta1: 0.9 adam_beta2: 0.999 adam_epsilon: 1e-08 max_grad_norm: 1.0 num_train_epochs: 5 max_steps: -1 lr_scheduler_type: linear lr_scheduler_kwargs: {} warmup_ratio: 0.0 warmup_steps: 0 log_level: passive log_level_replica: warning log_on_each_node: True logging_dir: ./logs logging_strategy: epoch logging_first_step: False logging_steps: 10 logging_nan_inf_filter: True save_strategy: epoch save_steps: 500 save_total_limit: None save_safetensors: True save_on_each_node: False save_only_model: False restore_callback_states_from_checkpoint: False no_cuda: False use_cpu: False use_mps_device: False seed: 42 data_seed: None jit_mode_eval: False use_ipex: False bf16: False fp16: False fp16_opt_level: O1 half_precision_backend: auto bf16_full_eval: False fp16_full_eval: False tf32: None local_rank: 0 ddp_backend: None tpu_num_cores: None tpu_metrics_debug: False debug: [] dataloader_drop_last: False eval_steps: None dataloader_num_workers: 0 dataloader_prefetch_factor: None past_index: -1 run_name: ./results/Operational_Theme disable_tqdm: False remove_unused_columns: False label_names: None load_best_model_at_end: True metric_for_best_model: eval_accuracy greater_is_better: True ignore_data_skip: False fsdp: [] fsdp_min_num_params: 0 fsdp_config: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False} fsdp_transformer_layer_cls_to_wrap: None accelerator_config: AcceleratorConfig(split_batches=False, dispatch_batches=None, even_batches=True, use_seedable_sampler=True, non_blocking=False, gradient_accumulation_kwargs=None, use_configured_state=False) deepspeed: None label_smoothing_factor: 0.0 optim: adamw_torch optim_args: None adafactor: False group_by_length: False length_column_name: length report_to: ['tensorboard'] ddp_find_unused_parameters: None ddp_bucket_cap_mb: None ddp_broadcast_buffers: None dataloader_pin_memory: True dataloader_persistent_workers: False skip_memory_metrics: True use_legacy_prediction_loop: False push_to_hub: False resume_from_checkpoint: None hub_model_id: None hub_strategy: every_save hub_token: None hub_private_repo: False hub_always_push: False gradient_checkpointing: False gradient_checkpointing_kwargs: None include_inputs_for_metrics: False eval_do_concat_batches: True fp16_backend: auto evaluation_strategy: None push_to_hub_model_id: None push_to_hub_organization: None push_to_hub_token: None mp_parameters: auto_find_batch_size: False full_determinism: False torchdynamo: None ray_scope: last ddp_timeout: 1800 torch_compile: False torch_compile_backend: None torch_compile_mode: None dispatch_batches: None split_batches: None include_tokens_per_second: False include_num_input_tokens_seen: False neftune_noise_alpha: None optim_target_modules: None batch_eval_metrics: False eval_on_start: False distributed_state: Distributed environment: NO Num processes: 1 Process index: 0 Local process index: 0 Device: cuda _n_gpu: 1 __cached__setup_devices: cuda:0 deepspeed_plugin: None Best Hyperparameters: learning_rate: 2.6273363081029872e-05 num_train_epochs: 5 weight_decay: 0.10476604556438612 Training metrics: train_runtime: 666.1029 train_samples_per_second: 18.586 train_steps_per_second: 2.327 total_flos: 1640004811530240.0 train_loss: 0.15333285177907635 epoch: 5.0 All Epoch Metrics: Epoch Training Loss Validation Loss Accuracy Precision Recall F1 Bce Loss Runtime Samples Per Second Steps Per Second 1.0 0.2849 0.22171258926391602 0.9483870967741935 0.8837209302325582 0.7755102040816326 0.826086956521739 0.22171259220801903 666.1029 18.586 2.327 2.0 0.1706 0.2003793716430664 0.9548387096774194 0.8571428571428571 0.8571428571428571 0.8571428571428571 0.20037939044557235 666.1029 18.586 2.327 3.0 0.1365 0.21005958318710327 0.9516129032258065 0.9146341463414634 0.7653061224489796 0.8333333333333334 0.2100595604488477 666.1029 18.586 2.327 4.0 0.0997 0.18039998412132263 0.9661290322580646 0.9230769230769231 0.8571428571428571 0.888888888888889 0.1803999622571323 666.1029 18.586 2.327 5.0 0.0751 0.1746075302362442 0.9709677419354839 0.925531914893617 0.8877551020408163 0.90625 0.1746075075244379 666.1029 18.586 2.327