devvanshhh commited on
Commit
12c9df9
·
1 Parent(s): 9330ec3

Training in progress, epoch 4, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2e97d1601b2e3f59fcff76a066bd87f8bca7ca2f80a0c253e165a2cde4b1766e
3
  size 615849402
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:abcf9278bf80f871bbda4ffa9be883d3b9d1c52089b9d33c2ca021af12bc00f8
3
  size 615849402
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c7464e96410b0803829fcaf50c827a2df990e4d630ae154ac0f3d16961ae70f
3
  size 307910594
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6da080e8fde1ff422407acfc1b0714d8f6aa1266c97fc465e5d731386708176d
3
  size 307910594
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b7dac90b8b7aae7204dbe1f1064bf8fa4437e2d9a0b4c17eaf969810bf8a5a83
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15552249b9c446195b97b2b0abd07e68ddc68dd1792bd9d1e769855d57c06f5e
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee3a1f39b0649bc03eb60d23712514a784906b72a24cf2557a6e6d26f75c9b70
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8387f825847a9b8a2c8387203584ce1062acc3caff7113d13f9f01240448459b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.009255478158593178,
3
- "best_model_checkpoint": "flan-search-terms/checkpoint-4500",
4
- "epoch": 3.0,
5
  "eval_steps": 500,
6
- "global_step": 4500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -100,13 +100,44 @@
100
  "eval_samples_per_second": 11.896,
101
  "eval_steps_per_second": 1.487,
102
  "step": 4500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  }
104
  ],
105
  "logging_steps": 500,
106
  "max_steps": 7500,
107
  "num_train_epochs": 5,
108
  "save_steps": 500,
109
- "total_flos": 6692058169344000.0,
110
  "trial_name": null,
111
  "trial_params": null
112
  }
 
1
  {
2
+ "best_metric": 0.007984320633113384,
3
+ "best_model_checkpoint": "flan-search-terms/checkpoint-6000",
4
+ "epoch": 4.0,
5
  "eval_steps": 500,
6
+ "global_step": 6000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
100
  "eval_samples_per_second": 11.896,
101
  "eval_steps_per_second": 1.487,
102
  "step": 4500
103
+ },
104
+ {
105
+ "epoch": 3.33,
106
+ "learning_rate": 1.6666666666666667e-05,
107
+ "loss": 0.0127,
108
+ "step": 5000
109
+ },
110
+ {
111
+ "epoch": 3.67,
112
+ "learning_rate": 1.3333333333333333e-05,
113
+ "loss": 0.0119,
114
+ "step": 5500
115
+ },
116
+ {
117
+ "epoch": 4.0,
118
+ "learning_rate": 1e-05,
119
+ "loss": 0.0114,
120
+ "step": 6000
121
+ },
122
+ {
123
+ "epoch": 4.0,
124
+ "eval_gen_len": 8.544666666666666,
125
+ "eval_loss": 0.007984320633113384,
126
+ "eval_rouge1": 5.3013,
127
+ "eval_rouge2": 4.448,
128
+ "eval_rougeL": 4.9117,
129
+ "eval_rougeLsum": 4.9183,
130
+ "eval_runtime": 255.3664,
131
+ "eval_samples_per_second": 11.748,
132
+ "eval_steps_per_second": 1.468,
133
+ "step": 6000
134
  }
135
  ],
136
  "logging_steps": 500,
137
  "max_steps": 7500,
138
  "num_train_epochs": 5,
139
  "save_steps": 500,
140
+ "total_flos": 8922744225792000.0,
141
  "trial_name": null,
142
  "trial_params": null
143
  }