davelotito commited on
Commit
c11bbe2
·
verified ·
1 Parent(s): 6c8ff4a

Training in progress, epoch 2

Browse files
donut_experiment_bayesian_trial_0/.ipynb_checkpoints/hyperparameters-checkpoint.yaml ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __cached__setup_devices: !!python/object/apply:torch.device
2
+ - cuda
3
+ - 0
4
+ _n_gpu: 1
5
+ accelerator_config: !!python/object:transformers.trainer_pt_utils.AcceleratorConfig
6
+ dispatch_batches: null
7
+ even_batches: true
8
+ gradient_accumulation_kwargs: null
9
+ split_batches: false
10
+ use_seedable_sampler: true
11
+ adafactor: false
12
+ adam_beta1: 0.9
13
+ adam_beta2: 0.999
14
+ adam_epsilon: 1.0e-08
15
+ auto_find_batch_size: false
16
+ bf16: false
17
+ bf16_full_eval: false
18
+ data_seed: null
19
+ dataloader_drop_last: false
20
+ dataloader_num_workers: 0
21
+ dataloader_persistent_workers: false
22
+ dataloader_pin_memory: true
23
+ dataloader_prefetch_factor: null
24
+ ddp_backend: null
25
+ ddp_broadcast_buffers: null
26
+ ddp_bucket_cap_mb: null
27
+ ddp_find_unused_parameters: null
28
+ ddp_timeout: 1800
29
+ debug: []
30
+ deepspeed: null
31
+ deepspeed_plugin: null
32
+ disable_tqdm: false
33
+ dispatch_batches: null
34
+ distributed_state: !!python/object:accelerate.state.PartialState
35
+ _cpu: false
36
+ backend: null
37
+ debug: false
38
+ device: !!python/object/apply:torch.device
39
+ - cuda
40
+ distributed_type: !!python/object/apply:accelerate.utils.dataclasses.DistributedType
41
+ - 'NO'
42
+ fork_launched: false
43
+ local_process_index: 0
44
+ num_processes: 1
45
+ process_index: 0
46
+ do_eval: true
47
+ do_predict: false
48
+ do_train: false
49
+ eval_accumulation_steps: null
50
+ eval_delay: 0
51
+ eval_do_concat_batches: true
52
+ eval_steps: null
53
+ evaluation_strategy: !!python/object/apply:transformers.trainer_utils.IntervalStrategy
54
+ - epoch
55
+ fp16: true
56
+ fp16_backend: auto
57
+ fp16_full_eval: false
58
+ fp16_opt_level: O1
59
+ fsdp: []
60
+ fsdp_config:
61
+ min_num_params: 0
62
+ xla: false
63
+ xla_fsdp_grad_ckpt: false
64
+ xla_fsdp_v2: false
65
+ fsdp_min_num_params: 0
66
+ fsdp_transformer_layer_cls_to_wrap: null
67
+ full_determinism: false
68
+ generation_config: null
69
+ generation_max_length: null
70
+ generation_num_beams: null
71
+ gradient_accumulation_steps: 2
72
+ gradient_checkpointing: false
73
+ gradient_checkpointing_kwargs: null
74
+ greater_is_better: false
75
+ group_by_length: false
76
+ half_precision_backend: auto
77
+ hub_always_push: false
78
+ hub_model_id: donut_experiment_bayesian_1
79
+ hub_private_repo: false
80
+ hub_strategy: !!python/object/apply:transformers.trainer_utils.HubStrategy
81
+ - every_save
82
+ hub_token: null
83
+ ignore_data_skip: false
84
+ include_inputs_for_metrics: false
85
+ include_num_input_tokens_seen: false
86
+ include_tokens_per_second: false
87
+ jit_mode_eval: false
88
+ label_names: null
89
+ label_smoothing_factor: 0.0
90
+ learning_rate: 3.476033118766737e-05
91
+ length_column_name: length
92
+ load_best_model_at_end: true
93
+ local_rank: 0
94
+ log_level: passive
95
+ log_level_replica: warning
96
+ log_on_each_node: true
97
+ logging_dir: model_runs/donut_experiment_bayesian_1/runs/May20_16-34-26_ip-172-16-167-107.ec2.internal
98
+ logging_first_step: false
99
+ logging_nan_inf_filter: true
100
+ logging_steps: 100
101
+ logging_strategy: !!python/object/apply:transformers.trainer_utils.IntervalStrategy
102
+ - steps
103
+ lr_scheduler_kwargs: {}
104
+ lr_scheduler_type: !!python/object/apply:transformers.trainer_utils.SchedulerType
105
+ - linear
106
+ max_grad_norm: 1.0
107
+ max_steps: -1
108
+ metric_for_best_model: loss
109
+ mp_parameters: ''
110
+ neftune_noise_alpha: null
111
+ no_cuda: false
112
+ num_train_epochs: 2
113
+ optim: !!python/object/apply:transformers.training_args.OptimizerNames
114
+ - adamw_torch
115
+ optim_args: null
116
+ optim_target_modules: null
117
+ output_dir: model_runs/donut_experiment_bayesian_1
118
+ overwrite_output_dir: false
119
+ past_index: -1
120
+ per_device_eval_batch_size: 1
121
+ per_device_train_batch_size: 1
122
+ per_gpu_eval_batch_size: null
123
+ per_gpu_train_batch_size: null
124
+ predict_with_generate: true
125
+ prediction_loss_only: false
126
+ push_to_hub: true
127
+ push_to_hub_model_id: null
128
+ push_to_hub_organization: null
129
+ push_to_hub_token: null
130
+ ray_scope: last
131
+ remove_unused_columns: true
132
+ report_to:
133
+ - tensorboard
134
+ resume_from_checkpoint: null
135
+ run_name: model_runs/donut_experiment_bayesian_1
136
+ save_on_each_node: false
137
+ save_only_model: false
138
+ save_safetensors: true
139
+ save_steps: 500
140
+ save_strategy: !!python/object/apply:transformers.trainer_utils.IntervalStrategy
141
+ - epoch
142
+ save_total_limit: 2
143
+ seed: 42
144
+ skip_memory_metrics: true
145
+ sortish_sampler: false
146
+ split_batches: null
147
+ tf32: null
148
+ torch_compile: false
149
+ torch_compile_backend: null
150
+ torch_compile_mode: null
151
+ torchdynamo: null
152
+ tpu_metrics_debug: false
153
+ tpu_num_cores: null
154
+ use_cpu: false
155
+ use_ipex: false
156
+ use_legacy_prediction_loop: false
157
+ use_mps_device: false
158
+ warmup_ratio: 0.0
159
+ warmup_steps: 0
160
+ weight_decay: 0.001930584169502962
donut_experiment_bayesian_trial_0/.ipynb_checkpoints/metrics-checkpoint.jsonl ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eval_loss": 0.43816685676574707, "eval_bleu": 0.06408670006728434, "eval_precisions": [0.8294243070362474, 0.7742718446601942, 0.7352112676056338, 0.7013422818791947], "eval_brevity_penalty": 0.0844821210838816, "eval_length_ratio": 0.2880835380835381, "eval_translation_length": 469, "eval_reference_length": 1628, "eval_cer": 0.7588007367632248, "eval_wer": 0.8260088637359255, "eval_runtime": 72.3994, "eval_samples_per_second": 0.787, "eval_steps_per_second": 0.787, "epoch": 1.0}
donut_experiment_bayesian_trial_0/metrics.jsonl CHANGED
@@ -1 +1,2 @@
1
  {"eval_loss": 0.43816685676574707, "eval_bleu": 0.06408670006728434, "eval_precisions": [0.8294243070362474, 0.7742718446601942, 0.7352112676056338, 0.7013422818791947], "eval_brevity_penalty": 0.0844821210838816, "eval_length_ratio": 0.2880835380835381, "eval_translation_length": 469, "eval_reference_length": 1628, "eval_cer": 0.7588007367632248, "eval_wer": 0.8260088637359255, "eval_runtime": 72.3994, "eval_samples_per_second": 0.787, "eval_steps_per_second": 0.787, "epoch": 1.0}
 
 
1
  {"eval_loss": 0.43816685676574707, "eval_bleu": 0.06408670006728434, "eval_precisions": [0.8294243070362474, 0.7742718446601942, 0.7352112676056338, 0.7013422818791947], "eval_brevity_penalty": 0.0844821210838816, "eval_length_ratio": 0.2880835380835381, "eval_translation_length": 469, "eval_reference_length": 1628, "eval_cer": 0.7588007367632248, "eval_wer": 0.8260088637359255, "eval_runtime": 72.3994, "eval_samples_per_second": 0.787, "eval_steps_per_second": 0.787, "epoch": 1.0}
2
+ {"eval_loss": 0.4537677764892578, "eval_bleu": 0.06656125232590335, "eval_precisions": [0.8336842105263158, 0.7751196172248804, 0.7313019390581718, 0.6842105263157895], "eval_brevity_penalty": 0.08826881356184386, "eval_length_ratio": 0.2917690417690418, "eval_translation_length": 475, "eval_reference_length": 1628, "eval_cer": 0.7502191876462136, "eval_wer": 0.8199087588657978, "eval_runtime": 71.9659, "eval_samples_per_second": 0.792, "eval_steps_per_second": 0.792, "epoch": 2.0}
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e0f156c3750a8cfde01721752878c36cc36127c43ad7ed590a15b6e5bb0a67d
3
  size 809103512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a1c11c560b2d37406f92ae4ac062575bdea9eeb145205dc3b486d7684c251fd
3
  size 809103512
runs/May20_16-34-26_ip-172-16-167-107.ec2.internal/events.out.tfevents.1716222867.ip-172-16-167-107.ec2.internal.15611.3 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44d8365c727c579b2e90538a00196cd2a23ff9cec2ac53071871b61fff21e660
3
- size 10274
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:374a85d7394806b709b0c20575f7371ec81cba471d17cab038d7668bcaa7b6c1
3
+ size 11911