FormlessAI commited on
Commit
018c2e9
·
verified ·
1 Parent(s): 98b1394

Training in progress, epoch 1, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:49846034f50a2a9f5eb42a9e8a6055d6a73c94640135462513558c9a87f7d885
3
  size 1037269336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:386a6380325bc3dff1a7a5f881832a0696cbe9be2672febd8c95a996479adb3e
3
  size 1037269336
last-checkpoint/global_step9700/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d96dfe1d3b0bce855880e2d23009bef0264fec55853ad94d2e36720de87856c
3
+ size 781993445
last-checkpoint/global_step9700/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd77cdbaa722f4b8912db62f39f33a77ebcb2c4b56e744f47b25c61d4f150680
3
+ size 781993509
last-checkpoint/global_step9700/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06050d6663481f1a7ff845243d9881b454d43f213a7fb01187ac4f95e030533e
3
+ size 781993509
last-checkpoint/global_step9700/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61080b35f30c80f43393735f9295771ec864832f9102797e3e593e019c3378d5
3
+ size 781993509
last-checkpoint/global_step9700/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7499f1c06e56dcadbbf5d0fd1a13a4f469aefe348f76c2b2bff829e5697961f1
3
+ size 2610290277
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step9600
 
1
+ global_step9700
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0a0f72dfbb72f16af6accdc19e004dfec99288ac9c898ec61ccdf4b7c0b05a4b
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b56969535e9a8e88cd3829c988a0a37451d46c9a48a232e2bf2ff895e958e53f
3
  size 15429
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:535e8f9e9aae202c1444e7d23d7a70248d88b0bea04c6a46c5eac28644caca91
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac5d72eb18852fad4db4fcc6f4250d07f49de688916884e0bd15cf332644e3c4
3
  size 15429
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bee6b4179c69666f23a2f89ab3e0a78ee0257014c2542355e87d560d6ae8937d
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e093e8dca30af25bb4868596fab940bd5b96385b2a5252906d4fb7506ec6e3c
3
  size 15429
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3562322dcece4573117ccf7e4cc4e19277c183387ba7f8b2446065a5008e7c67
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a02a2ce27f65153b8be850fa84fb66458319a4fbe52b6b4116118eb9d4b7ccda
3
  size 15429
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:11d855a891c5a4fdc5d95bdbc0aa687ea07b9ae51067bbfd00dce8a66404c36a
3
  size 1401
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5c6f3cc57d69dd40ef86ebd5faf9e78cc6a0d89512a7f5fd9a4c13cda1f059a
3
  size 1401
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "best_global_step": null,
3
- "best_metric": 1.5219709873199463,
4
  "best_model_checkpoint": null,
5
- "epoch": 1.395551679023114,
6
  "eval_steps": 50,
7
- "global_step": 9600,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -14984,6 +14984,162 @@
14984
  "eval_samples_per_second": 174.882,
14985
  "eval_steps_per_second": 10.967,
14986
  "step": 9600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14987
  }
14988
  ],
14989
  "logging_steps": 5,
@@ -15012,7 +15168,7 @@
15012
  "attributes": {}
15013
  }
15014
  },
15015
- "total_flos": 2.506228892243591e+18,
15016
  "train_batch_size": 4,
15017
  "trial_name": null,
15018
  "trial_params": null
 
1
  {
2
  "best_global_step": null,
3
+ "best_metric": 1.521620512008667,
4
  "best_model_checkpoint": null,
5
+ "epoch": 1.4100886756796047,
6
  "eval_steps": 50,
7
+ "global_step": 9700,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
14984
  "eval_samples_per_second": 174.882,
14985
  "eval_steps_per_second": 10.967,
14986
  "step": 9600
14987
+ },
14988
+ {
14989
+ "epoch": 1.3962785288559383,
14990
+ "grad_norm": 2.4075584411621094,
14991
+ "learning_rate": 3.9504059213659793e-07,
14992
+ "loss": 1.5897,
14993
+ "step": 9605
14994
+ },
14995
+ {
14996
+ "epoch": 1.3970053786887628,
14997
+ "grad_norm": 2.440012216567993,
14998
+ "learning_rate": 3.8514051886811723e-07,
14999
+ "loss": 1.6766,
15000
+ "step": 9610
15001
+ },
15002
+ {
15003
+ "epoch": 1.3977322285215874,
15004
+ "grad_norm": 2.658358335494995,
15005
+ "learning_rate": 3.7536559858959155e-07,
15006
+ "loss": 1.6694,
15007
+ "step": 9615
15008
+ },
15009
+ {
15010
+ "epoch": 1.398459078354412,
15011
+ "grad_norm": 2.324554443359375,
15012
+ "learning_rate": 3.657158559093597e-07,
15013
+ "loss": 1.7643,
15014
+ "step": 9620
15015
+ },
15016
+ {
15017
+ "epoch": 1.3991859281872365,
15018
+ "grad_norm": 2.6129276752471924,
15019
+ "learning_rate": 3.56191315120649e-07,
15020
+ "loss": 1.5292,
15021
+ "step": 9625
15022
+ },
15023
+ {
15024
+ "epoch": 1.399912778020061,
15025
+ "grad_norm": 2.4178617000579834,
15026
+ "learning_rate": 3.467920002014695e-07,
15027
+ "loss": 1.556,
15028
+ "step": 9630
15029
+ },
15030
+ {
15031
+ "epoch": 1.4006396278528856,
15032
+ "grad_norm": 2.558295726776123,
15033
+ "learning_rate": 3.375179348145972e-07,
15034
+ "loss": 1.5579,
15035
+ "step": 9635
15036
+ },
15037
+ {
15038
+ "epoch": 1.4013664776857102,
15039
+ "grad_norm": 2.540734052658081,
15040
+ "learning_rate": 3.283691423074685e-07,
15041
+ "loss": 1.6343,
15042
+ "step": 9640
15043
+ },
15044
+ {
15045
+ "epoch": 1.4020933275185348,
15046
+ "grad_norm": 2.0778424739837646,
15047
+ "learning_rate": 3.193456457121636e-07,
15048
+ "loss": 1.5255,
15049
+ "step": 9645
15050
+ },
15051
+ {
15052
+ "epoch": 1.402820177351359,
15053
+ "grad_norm": 2.8635857105255127,
15054
+ "learning_rate": 3.1044746774532277e-07,
15055
+ "loss": 1.604,
15056
+ "step": 9650
15057
+ },
15058
+ {
15059
+ "epoch": 1.402820177351359,
15060
+ "eval_loss": 1.5223361253738403,
15061
+ "eval_runtime": 20.5763,
15062
+ "eval_samples_per_second": 160.428,
15063
+ "eval_steps_per_second": 10.06,
15064
+ "step": 9650
15065
+ },
15066
+ {
15067
+ "epoch": 1.4035470271841837,
15068
+ "grad_norm": 2.443467617034912,
15069
+ "learning_rate": 3.0167463080810214e-07,
15070
+ "loss": 1.6844,
15071
+ "step": 9655
15072
+ },
15073
+ {
15074
+ "epoch": 1.4042738770170082,
15075
+ "grad_norm": 2.570190906524658,
15076
+ "learning_rate": 2.9302715698610123e-07,
15077
+ "loss": 1.6661,
15078
+ "step": 9660
15079
+ },
15080
+ {
15081
+ "epoch": 1.4050007268498328,
15082
+ "grad_norm": 2.4715726375579834,
15083
+ "learning_rate": 2.845050680493296e-07,
15084
+ "loss": 1.579,
15085
+ "step": 9665
15086
+ },
15087
+ {
15088
+ "epoch": 1.4057275766826574,
15089
+ "grad_norm": 2.529876947402954,
15090
+ "learning_rate": 2.761083854521403e-07,
15091
+ "loss": 1.7274,
15092
+ "step": 9670
15093
+ },
15094
+ {
15095
+ "epoch": 1.406454426515482,
15096
+ "grad_norm": 2.4188828468322754,
15097
+ "learning_rate": 2.678371303331627e-07,
15098
+ "loss": 1.5238,
15099
+ "step": 9675
15100
+ },
15101
+ {
15102
+ "epoch": 1.4071812763483065,
15103
+ "grad_norm": 2.511361598968506,
15104
+ "learning_rate": 2.5969132351527523e-07,
15105
+ "loss": 1.5761,
15106
+ "step": 9680
15107
+ },
15108
+ {
15109
+ "epoch": 1.407908126181131,
15110
+ "grad_norm": 2.82676362991333,
15111
+ "learning_rate": 2.5167098550553806e-07,
15112
+ "loss": 1.6957,
15113
+ "step": 9685
15114
+ },
15115
+ {
15116
+ "epoch": 1.4086349760139556,
15117
+ "grad_norm": 2.6926026344299316,
15118
+ "learning_rate": 2.437761364951492e-07,
15119
+ "loss": 1.6426,
15120
+ "step": 9690
15121
+ },
15122
+ {
15123
+ "epoch": 1.40936182584678,
15124
+ "grad_norm": 2.8157596588134766,
15125
+ "learning_rate": 2.36006796359366e-07,
15126
+ "loss": 1.6126,
15127
+ "step": 9695
15128
+ },
15129
+ {
15130
+ "epoch": 1.4100886756796047,
15131
+ "grad_norm": 2.3840818405151367,
15132
+ "learning_rate": 2.2836298465750569e-07,
15133
+ "loss": 1.551,
15134
+ "step": 9700
15135
+ },
15136
+ {
15137
+ "epoch": 1.4100886756796047,
15138
+ "eval_loss": 1.521620512008667,
15139
+ "eval_runtime": 19.0149,
15140
+ "eval_samples_per_second": 173.601,
15141
+ "eval_steps_per_second": 10.886,
15142
+ "step": 9700
15143
  }
15144
  ],
15145
  "logging_steps": 5,
 
15168
  "attributes": {}
15169
  }
15170
  },
15171
+ "total_flos": 2.5325929674917806e+18,
15172
  "train_batch_size": 4,
15173
  "trial_name": null,
15174
  "trial_params": null