Input data filename: project-30-at-2024-08-27-13-17-2b8a675a_ind_theme_split_27_aug_andtitledescr.json Training date and time: 2024-08-28 20:33:23.939895 Training parameters: output_dir: ./results/General_Theme overwrite_output_dir: False do_train: False do_eval: True do_predict: False eval_strategy: epoch prediction_loss_only: False per_device_train_batch_size: 8 per_device_eval_batch_size: 8 per_gpu_train_batch_size: None per_gpu_eval_batch_size: None gradient_accumulation_steps: 1 eval_accumulation_steps: None eval_delay: 0 learning_rate: 4.4085428408543316e-05 weight_decay: 0.24134342790846802 adam_beta1: 0.9 adam_beta2: 0.999 adam_epsilon: 1e-08 max_grad_norm: 1.0 num_train_epochs: 5 max_steps: -1 lr_scheduler_type: linear lr_scheduler_kwargs: {} warmup_ratio: 0.0 warmup_steps: 0 log_level: passive log_level_replica: warning log_on_each_node: True logging_dir: ./logs logging_strategy: epoch logging_first_step: False logging_steps: 10 logging_nan_inf_filter: True save_strategy: epoch save_steps: 500 save_total_limit: None save_safetensors: True save_on_each_node: False save_only_model: False restore_callback_states_from_checkpoint: False no_cuda: False use_cpu: False use_mps_device: False seed: 42 data_seed: None jit_mode_eval: False use_ipex: False bf16: False fp16: False fp16_opt_level: O1 half_precision_backend: auto bf16_full_eval: False fp16_full_eval: False tf32: None local_rank: 0 ddp_backend: None tpu_num_cores: None tpu_metrics_debug: False debug: [] dataloader_drop_last: False eval_steps: None dataloader_num_workers: 0 dataloader_prefetch_factor: None past_index: -1 run_name: ./results/General_Theme disable_tqdm: False remove_unused_columns: False label_names: None load_best_model_at_end: True metric_for_best_model: eval_accuracy greater_is_better: True ignore_data_skip: False fsdp: [] fsdp_min_num_params: 0 fsdp_config: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False} fsdp_transformer_layer_cls_to_wrap: None accelerator_config: AcceleratorConfig(split_batches=False, dispatch_batches=None, even_batches=True, use_seedable_sampler=True, non_blocking=False, gradient_accumulation_kwargs=None, use_configured_state=False) deepspeed: None label_smoothing_factor: 0.0 optim: adamw_torch optim_args: None adafactor: False group_by_length: False length_column_name: length report_to: ['tensorboard'] ddp_find_unused_parameters: None ddp_bucket_cap_mb: None ddp_broadcast_buffers: None dataloader_pin_memory: True dataloader_persistent_workers: False skip_memory_metrics: True use_legacy_prediction_loop: False push_to_hub: False resume_from_checkpoint: None hub_model_id: None hub_strategy: every_save hub_token: None hub_private_repo: False hub_always_push: False gradient_checkpointing: False gradient_checkpointing_kwargs: None include_inputs_for_metrics: False eval_do_concat_batches: True fp16_backend: auto evaluation_strategy: None push_to_hub_model_id: None push_to_hub_organization: None push_to_hub_token: None mp_parameters: auto_find_batch_size: False full_determinism: False torchdynamo: None ray_scope: last ddp_timeout: 1800 torch_compile: False torch_compile_backend: None torch_compile_mode: None dispatch_batches: None split_batches: None include_tokens_per_second: False include_num_input_tokens_seen: False neftune_noise_alpha: None optim_target_modules: None batch_eval_metrics: False eval_on_start: False distributed_state: Distributed environment: NO Num processes: 1 Process index: 0 Local process index: 0 Device: cuda _n_gpu: 1 __cached__setup_devices: cuda:0 deepspeed_plugin: None Best Hyperparameters: learning_rate: 4.4085428408543316e-05 num_train_epochs: 5 weight_decay: 0.24134342790846802 Training metrics: train_runtime: 663.6031 train_samples_per_second: 18.656 train_steps_per_second: 2.336 total_flos: 1640004811530240.0 train_loss: 0.1455306170063634 epoch: 5.0 All Epoch Metrics: Epoch Training Loss Validation Loss Accuracy Precision Recall F1 Bce Loss Runtime Samples Per Second Steps Per Second 1.0 0.267 0.2946789860725403 0.9354838709677419 0.0 0.0 0.0 0.2946789638014845 663.6031 18.656 2.336 2.0 0.1941 0.206285759806633 0.9580645161290322 0.85 0.425 0.5666666666666667 0.20628575579050104 663.6031 18.656 2.336 3.0 0.1162 0.14209377765655518 0.9693548387096774 0.8888888888888888 0.6 0.7164179104477612 0.14209377192670633 663.6031 18.656 2.336 4.0 0.0857 0.15750133991241455 0.9612903225806452 0.7105263157894737 0.675 0.6923076923076923 0.15750131864997874 663.6031 18.656 2.336 5.0 0.0647 0.15003962814807892 0.9741935483870968 0.9285714285714286 0.65 0.7647058823529412 0.15003961238067395 663.6031 18.656 2.336