Zohaib002 commited on
Commit
f1f2967
·
verified ·
1 Parent(s): 52f3139

End of training

Browse files
Files changed (5) hide show
  1. README.md +2 -2
  2. pytorch_model.bin +3 -0
  3. trainer_state.json +50 -0
  4. training_args.bin +1 -1
  5. training_args.json +151 -0
README.md CHANGED
@@ -5,14 +5,14 @@ base_model: gavin124/gpt2-finetuned-cnn-summarization-v2
5
  tags:
6
  - generated_from_trainer
7
  model-index:
8
- - name: GPT2-Fixed-Train
9
  results: []
10
  ---
11
 
12
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
13
  should probably proofread and complete it, then remove this comment. -->
14
 
15
- # GPT2-Fixed-Train
16
 
17
  This model is a fine-tuned version of [gavin124/gpt2-finetuned-cnn-summarization-v2](https://huggingface.co/gavin124/gpt2-finetuned-cnn-summarization-v2) on the None dataset.
18
 
 
5
  tags:
6
  - generated_from_trainer
7
  model-index:
8
+ - name: GPT2-Fixed-Train-final
9
  results: []
10
  ---
11
 
12
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
13
  should probably proofread and complete it, then remove this comment. -->
14
 
15
+ # GPT2-Fixed-Train-final
16
 
17
  This model is a fine-tuned version of [gavin124/gpt2-finetuned-cnn-summarization-v2](https://huggingface.co/gavin124/gpt2-finetuned-cnn-summarization-v2) on the None dataset.
18
 
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa0e044d332484899ce1ab11e6d9a2638d90bbd95e05494dfb27dc6d35990c35
3
+ size 497825203
trainer_state.json ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 1.0,
6
+ "eval_steps": 500,
7
+ "global_step": 548,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.9124087591240876,
14
+ "grad_norm": 2.495621681213379,
15
+ "learning_rate": 1.788321167883212e-06,
16
+ "loss": 2.2768,
17
+ "step": 500
18
+ },
19
+ {
20
+ "epoch": 1.0,
21
+ "step": 548,
22
+ "total_flos": 572229550080000.0,
23
+ "train_loss": 2.2857559371168596,
24
+ "train_runtime": 316.5122,
25
+ "train_samples_per_second": 13.838,
26
+ "train_steps_per_second": 1.731
27
+ }
28
+ ],
29
+ "logging_steps": 500,
30
+ "max_steps": 548,
31
+ "num_input_tokens_seen": 0,
32
+ "num_train_epochs": 1,
33
+ "save_steps": 500,
34
+ "stateful_callbacks": {
35
+ "TrainerControl": {
36
+ "args": {
37
+ "should_epoch_stop": false,
38
+ "should_evaluate": false,
39
+ "should_log": false,
40
+ "should_save": true,
41
+ "should_training_stop": true
42
+ },
43
+ "attributes": {}
44
+ }
45
+ },
46
+ "total_flos": 572229550080000.0,
47
+ "train_batch_size": 8,
48
+ "trial_name": null,
49
+ "trial_params": null
50
+ }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c3c6a8e6f4846f51429261ef023fb15bcc1b38b273f79bd0c771a502e4dd0f21
3
  size 5969
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab824ad7b4edba3a150ff095c4192a3f3125714756b3aef8e136f9b3a57c11f9
3
  size 5969
training_args.json ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "output_dir": "/content/drive/MyDrive/GPT2-Fixed-Train-final",
3
+ "overwrite_output_dir": false,
4
+ "do_train": false,
5
+ "do_eval": false,
6
+ "do_predict": false,
7
+ "eval_strategy": "no",
8
+ "prediction_loss_only": false,
9
+ "per_device_train_batch_size": 8,
10
+ "per_device_eval_batch_size": 1,
11
+ "per_gpu_train_batch_size": null,
12
+ "per_gpu_eval_batch_size": null,
13
+ "gradient_accumulation_steps": 1,
14
+ "eval_accumulation_steps": null,
15
+ "eval_delay": 0,
16
+ "torch_empty_cache_steps": null,
17
+ "learning_rate": 2e-05,
18
+ "weight_decay": 0.01,
19
+ "adam_beta1": 0.9,
20
+ "adam_beta2": 0.999,
21
+ "adam_epsilon": 1e-08,
22
+ "max_grad_norm": 1.0,
23
+ "num_train_epochs": 1,
24
+ "max_steps": -1,
25
+ "lr_scheduler_type": "linear",
26
+ "lr_scheduler_kwargs": {},
27
+ "warmup_ratio": 0.0,
28
+ "warmup_steps": 0,
29
+ "log_level": "passive",
30
+ "log_level_replica": "warning",
31
+ "log_on_each_node": true,
32
+ "logging_dir": "GPT2-Fixed-Train/runs/Dec01_17-11-13_0693999823d4",
33
+ "logging_strategy": "steps",
34
+ "logging_first_step": false,
35
+ "logging_steps": 500,
36
+ "logging_nan_inf_filter": true,
37
+ "save_strategy": "steps",
38
+ "save_steps": 500,
39
+ "save_total_limit": null,
40
+ "save_safetensors": true,
41
+ "save_on_each_node": false,
42
+ "save_only_model": false,
43
+ "restore_callback_states_from_checkpoint": false,
44
+ "no_cuda": false,
45
+ "use_cpu": false,
46
+ "use_mps_device": false,
47
+ "seed": 42,
48
+ "data_seed": null,
49
+ "jit_mode_eval": false,
50
+ "bf16": false,
51
+ "fp16": true,
52
+ "fp16_opt_level": "O1",
53
+ "half_precision_backend": "auto",
54
+ "bf16_full_eval": false,
55
+ "fp16_full_eval": false,
56
+ "tf32": null,
57
+ "local_rank": 0,
58
+ "ddp_backend": null,
59
+ "tpu_num_cores": null,
60
+ "tpu_metrics_debug": false,
61
+ "debug": [],
62
+ "dataloader_drop_last": false,
63
+ "eval_steps": null,
64
+ "dataloader_num_workers": 0,
65
+ "dataloader_prefetch_factor": null,
66
+ "past_index": -1,
67
+ "run_name": null,
68
+ "disable_tqdm": false,
69
+ "remove_unused_columns": true,
70
+ "label_names": null,
71
+ "load_best_model_at_end": false,
72
+ "metric_for_best_model": null,
73
+ "greater_is_better": null,
74
+ "ignore_data_skip": false,
75
+ "fsdp": [],
76
+ "fsdp_min_num_params": 0,
77
+ "fsdp_config": {
78
+ "min_num_params": 0,
79
+ "xla": false,
80
+ "xla_fsdp_v2": false,
81
+ "xla_fsdp_grad_ckpt": false
82
+ },
83
+ "fsdp_transformer_layer_cls_to_wrap": null,
84
+ "accelerator_config": {
85
+ "split_batches": false,
86
+ "dispatch_batches": null,
87
+ "even_batches": true,
88
+ "use_seedable_sampler": true,
89
+ "non_blocking": false,
90
+ "gradient_accumulation_kwargs": null
91
+ },
92
+ "parallelism_config": null,
93
+ "deepspeed": null,
94
+ "label_smoothing_factor": 0.0,
95
+ "optim": "adamw_torch_fused",
96
+ "optim_args": null,
97
+ "adafactor": false,
98
+ "group_by_length": false,
99
+ "length_column_name": "length",
100
+ "report_to": [],
101
+ "project": "huggingface",
102
+ "trackio_space_id": "trackio",
103
+ "ddp_find_unused_parameters": null,
104
+ "ddp_bucket_cap_mb": null,
105
+ "ddp_broadcast_buffers": null,
106
+ "dataloader_pin_memory": true,
107
+ "dataloader_persistent_workers": false,
108
+ "skip_memory_metrics": true,
109
+ "use_legacy_prediction_loop": false,
110
+ "push_to_hub": false,
111
+ "resume_from_checkpoint": null,
112
+ "hub_model_id": null,
113
+ "hub_strategy": "every_save",
114
+ "hub_token": "<HUB_TOKEN>",
115
+ "hub_private_repo": null,
116
+ "hub_always_push": false,
117
+ "hub_revision": null,
118
+ "gradient_checkpointing": false,
119
+ "gradient_checkpointing_kwargs": null,
120
+ "include_inputs_for_metrics": false,
121
+ "include_for_metrics": [],
122
+ "eval_do_concat_batches": true,
123
+ "fp16_backend": "auto",
124
+ "push_to_hub_model_id": null,
125
+ "push_to_hub_organization": null,
126
+ "push_to_hub_token": "<PUSH_TO_HUB_TOKEN>",
127
+ "mp_parameters": "",
128
+ "auto_find_batch_size": false,
129
+ "full_determinism": false,
130
+ "torchdynamo": null,
131
+ "ray_scope": "last",
132
+ "ddp_timeout": 1800,
133
+ "torch_compile": false,
134
+ "torch_compile_backend": null,
135
+ "torch_compile_mode": null,
136
+ "include_tokens_per_second": false,
137
+ "include_num_input_tokens_seen": "no",
138
+ "neftune_noise_alpha": null,
139
+ "optim_target_modules": null,
140
+ "batch_eval_metrics": false,
141
+ "eval_on_start": false,
142
+ "use_liger_kernel": false,
143
+ "liger_kernel_config": null,
144
+ "eval_use_gather_object": false,
145
+ "average_tokens_across_devices": true,
146
+ "sortish_sampler": false,
147
+ "predict_with_generate": false,
148
+ "generation_max_length": null,
149
+ "generation_num_beams": null,
150
+ "generation_config": null
151
+ }