Input data filename: project-30-at-2024-08-27-13-17-2b8a675a_ind_theme_split_27_aug_andtitledescr.json Training date and time: 2024-08-27 14:57:34.590724 Training parameters: output_dir: ./results/Entity_Action overwrite_output_dir: False do_train: False do_eval: True do_predict: False eval_strategy: epoch prediction_loss_only: False per_device_train_batch_size: 8 per_device_eval_batch_size: 8 per_gpu_train_batch_size: None per_gpu_eval_batch_size: None gradient_accumulation_steps: 1 eval_accumulation_steps: None eval_delay: 0 learning_rate: 4.270726644478662e-05 weight_decay: 0.21946446535298075 adam_beta1: 0.9 adam_beta2: 0.999 adam_epsilon: 1e-08 max_grad_norm: 1.0 num_train_epochs: 5 max_steps: -1 lr_scheduler_type: linear lr_scheduler_kwargs: {} warmup_ratio: 0.0 warmup_steps: 0 log_level: passive log_level_replica: warning log_on_each_node: True logging_dir: ./logs logging_strategy: epoch logging_first_step: False logging_steps: 10 logging_nan_inf_filter: True save_strategy: epoch save_steps: 500 save_total_limit: None save_safetensors: True save_on_each_node: False save_only_model: False restore_callback_states_from_checkpoint: False no_cuda: False use_cpu: False use_mps_device: False seed: 42 data_seed: None jit_mode_eval: False use_ipex: False bf16: False fp16: False fp16_opt_level: O1 half_precision_backend: auto bf16_full_eval: False fp16_full_eval: False tf32: None local_rank: 0 ddp_backend: None tpu_num_cores: None tpu_metrics_debug: False debug: [] dataloader_drop_last: False eval_steps: None dataloader_num_workers: 0 dataloader_prefetch_factor: None past_index: -1 run_name: ./results/Entity_Action disable_tqdm: False remove_unused_columns: False label_names: None load_best_model_at_end: True metric_for_best_model: eval_accuracy greater_is_better: True ignore_data_skip: False fsdp: [] fsdp_min_num_params: 0 fsdp_config: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False} fsdp_transformer_layer_cls_to_wrap: None accelerator_config: AcceleratorConfig(split_batches=False, dispatch_batches=None, even_batches=True, use_seedable_sampler=True, non_blocking=False, gradient_accumulation_kwargs=None, use_configured_state=False) deepspeed: None label_smoothing_factor: 0.0 optim: adamw_torch optim_args: None adafactor: False group_by_length: False length_column_name: length report_to: ['tensorboard'] ddp_find_unused_parameters: None ddp_bucket_cap_mb: None ddp_broadcast_buffers: None dataloader_pin_memory: True dataloader_persistent_workers: False skip_memory_metrics: True use_legacy_prediction_loop: False push_to_hub: False resume_from_checkpoint: None hub_model_id: None hub_strategy: every_save hub_token: None hub_private_repo: False hub_always_push: False gradient_checkpointing: False gradient_checkpointing_kwargs: None include_inputs_for_metrics: False eval_do_concat_batches: True fp16_backend: auto evaluation_strategy: None push_to_hub_model_id: None push_to_hub_organization: None push_to_hub_token: None mp_parameters: auto_find_batch_size: False full_determinism: False torchdynamo: None ray_scope: last ddp_timeout: 1800 torch_compile: False torch_compile_backend: None torch_compile_mode: None dispatch_batches: None split_batches: None include_tokens_per_second: False include_num_input_tokens_seen: False neftune_noise_alpha: None optim_target_modules: None batch_eval_metrics: False eval_on_start: False distributed_state: Distributed environment: NO Num processes: 1 Process index: 0 Local process index: 0 Device: cuda _n_gpu: 1 __cached__setup_devices: cuda:0 deepspeed_plugin: None Best Hyperparameters: learning_rate: 4.270726644478662e-05 num_train_epochs: 5 weight_decay: 0.21946446535298075 Training metrics: train_runtime: 672.3301 train_samples_per_second: 18.414 train_steps_per_second: 2.305 total_flos: 1640004811530240.0 train_loss: 0.12886674880981444 epoch: 5.0 All Epoch Metrics: Epoch Training Loss Validation Loss Accuracy Precision Recall F1 Bce Loss Runtime Samples Per Second Steps Per Second 1.0 0.3189 0.1607036143541336 0.9532258064516129 0.9599056603773585 0.9713603818615751 0.9655990510083036 0.16070359953727179 672.3301 18.414 2.305 2.0 0.1528 0.11495514214038849 0.9725806451612903 0.9808612440191388 0.9785202863961814 0.9796893667861409 0.1149551347974206 672.3301 18.414 2.305 3.0 0.0751 0.10520578175783157 0.9790322580645161 0.981042654028436 0.9880668257756563 0.9845422116527943 0.10520577938253993 672.3301 18.414 2.305 4.0 0.0623 0.08832962810993195 0.9854838709677419 0.9904306220095693 0.9880668257756563 0.9892473118279569 0.08832962441818693 672.3301 18.414 2.305 5.0 0.0353 0.08785796910524368 0.9838709677419355 0.9904076738609112 0.9856801909307876 0.9880382775119617 0.08785795159062294 672.3301 18.414 2.305