FormlessAI commited on
Commit
430b881
·
verified ·
1 Parent(s): 789225a

Training in progress, epoch 0, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b5b523eb134094d0fe3ed4dfef81ffed7224784825c6f64b8661fe04d195a546
3
  size 1037269336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ec12a67f9dfa7e82f7d1fa27e46947cfa5b2e70dc641605dca0f15edc26ac5b
3
  size 1037269336
last-checkpoint/global_step5200/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:376d8867ef6a010707f6d5ffde3b70f1fc37fbb25fc4c67986ce621672102162
3
+ size 781993445
last-checkpoint/global_step5200/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94681bee6eff66c9426bb36f1f8b5f82871ec40e34245855236063b0768f353d
3
+ size 781993509
last-checkpoint/global_step5200/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43446a308e9f0fe1c37260f8f2fc16128b95eec4bef7121becf3b52c6cd5e5ae
3
+ size 781993509
last-checkpoint/global_step5200/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccdadbf4b397a4566ebe26955a4e21a4bdbf0380614debe2fa34aa6ddd4a065f
3
+ size 781993509
last-checkpoint/global_step5200/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c48adfda58d5714ac6a30167ebed520731034b75a271625bea166e61975b263e
3
+ size 2610290277
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step5100
 
1
+ global_step5200
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:56865be07eff6f66c69791fc2b9b609f0e20d2a4499e1c484d2daf5499c42b5c
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c05264822189d459f4c1c5b27ebdb9b6b9e8dcee1a009b4f2e28ecf49dc4f5b
3
  size 15429
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dcc331c1dd0e2fd6a26f5faf857be1fe7603138c25d38c533d290076fd5c63d2
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f645a6078c88ee4b1185c0d7c1ae791e9bd6d926fdbc01aebef5ee84d1159b5
3
  size 15429
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:68160d4ba6654984de0d46bc96a7fe87a66866d7126298837a820322efc5e287
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c59312f152825042f7e3a29466b0959cd08f51130fdfed991ab5bf960815a6dc
3
  size 15429
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:66e51d3128d1b9d77da6840ea0cc45f49e7d431d13998e4e4edcf5f6460d262d
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1de6530daf248f1aed878ddb32856047ec142beb86757afc3c303de79cdabc8
3
  size 15429
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bfd32ddbd680624dcd914b61c50d077bc8f0cb703973d6bb57f048563ab5de57
3
  size 1401
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:096748cf67d302b535a039b43df4da991eba6c27882d0848f447a99c87428013
3
  size 1401
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "best_global_step": null,
3
- "best_metric": 1.8430671691894531,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.7413868294810292,
6
  "eval_steps": 50,
7
- "global_step": 5100,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -7964,6 +7964,162 @@
7964
  "eval_samples_per_second": 172.648,
7965
  "eval_steps_per_second": 10.826,
7966
  "step": 5100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7967
  }
7968
  ],
7969
  "logging_steps": 5,
@@ -7992,7 +8148,7 @@
7992
  "attributes": {}
7993
  }
7994
  },
7995
- "total_flos": 1.3296709309594337e+18,
7996
  "train_batch_size": 4,
7997
  "trial_name": null,
7998
  "trial_params": null
 
1
  {
2
  "best_global_step": null,
3
+ "best_metric": 1.8277243375778198,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.75592382613752,
6
  "eval_steps": 50,
7
+ "global_step": 5200,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
7964
  "eval_samples_per_second": 172.648,
7965
  "eval_steps_per_second": 10.826,
7966
  "step": 5100
7967
+ },
7968
+ {
7969
+ "epoch": 0.7421136793138537,
7970
+ "grad_norm": 2.1775574684143066,
7971
+ "learning_rate": 4.923992598825168e-05,
7972
+ "loss": 1.8894,
7973
+ "step": 5105
7974
+ },
7975
+ {
7976
+ "epoch": 0.7428405291466783,
7977
+ "grad_norm": 2.293163299560547,
7978
+ "learning_rate": 4.9160449548493304e-05,
7979
+ "loss": 2.0062,
7980
+ "step": 5110
7981
+ },
7982
+ {
7983
+ "epoch": 0.7435673789795029,
7984
+ "grad_norm": 2.4334089756011963,
7985
+ "learning_rate": 4.908097546994249e-05,
7986
+ "loss": 1.8894,
7987
+ "step": 5115
7988
+ },
7989
+ {
7990
+ "epoch": 0.7442942288123273,
7991
+ "grad_norm": 2.508547782897949,
7992
+ "learning_rate": 4.9001503952675144e-05,
7993
+ "loss": 2.1935,
7994
+ "step": 5120
7995
+ },
7996
+ {
7997
+ "epoch": 0.7450210786451519,
7998
+ "grad_norm": 2.257105588912964,
7999
+ "learning_rate": 4.89220351967607e-05,
8000
+ "loss": 1.9659,
8001
+ "step": 5125
8002
+ },
8003
+ {
8004
+ "epoch": 0.7457479284779764,
8005
+ "grad_norm": 2.537111520767212,
8006
+ "learning_rate": 4.884256940226167e-05,
8007
+ "loss": 1.9314,
8008
+ "step": 5130
8009
+ },
8010
+ {
8011
+ "epoch": 0.746474778310801,
8012
+ "grad_norm": 2.178720474243164,
8013
+ "learning_rate": 4.876310676923307e-05,
8014
+ "loss": 1.9614,
8015
+ "step": 5135
8016
+ },
8017
+ {
8018
+ "epoch": 0.7472016281436256,
8019
+ "grad_norm": 2.6238718032836914,
8020
+ "learning_rate": 4.868364749772204e-05,
8021
+ "loss": 1.8404,
8022
+ "step": 5140
8023
+ },
8024
+ {
8025
+ "epoch": 0.74792847797645,
8026
+ "grad_norm": 2.7192604541778564,
8027
+ "learning_rate": 4.860419178776716e-05,
8028
+ "loss": 1.965,
8029
+ "step": 5145
8030
+ },
8031
+ {
8032
+ "epoch": 0.7486553278092746,
8033
+ "grad_norm": 2.0032546520233154,
8034
+ "learning_rate": 4.852473983939808e-05,
8035
+ "loss": 1.9087,
8036
+ "step": 5150
8037
+ },
8038
+ {
8039
+ "epoch": 0.7486553278092746,
8040
+ "eval_loss": 1.838592529296875,
8041
+ "eval_runtime": 21.3886,
8042
+ "eval_samples_per_second": 154.335,
8043
+ "eval_steps_per_second": 9.678,
8044
+ "step": 5150
8045
+ },
8046
+ {
8047
+ "epoch": 0.7493821776420991,
8048
+ "grad_norm": 1.9931970834732056,
8049
+ "learning_rate": 4.844529185263501e-05,
8050
+ "loss": 2.1584,
8051
+ "step": 5155
8052
+ },
8053
+ {
8054
+ "epoch": 0.7501090274749237,
8055
+ "grad_norm": 2.349775791168213,
8056
+ "learning_rate": 4.836584802748814e-05,
8057
+ "loss": 2.0698,
8058
+ "step": 5160
8059
+ },
8060
+ {
8061
+ "epoch": 0.7508358773077483,
8062
+ "grad_norm": 4.791730880737305,
8063
+ "learning_rate": 4.828640856395723e-05,
8064
+ "loss": 2.1494,
8065
+ "step": 5165
8066
+ },
8067
+ {
8068
+ "epoch": 0.7515627271405727,
8069
+ "grad_norm": 2.025981903076172,
8070
+ "learning_rate": 4.8206973662030984e-05,
8071
+ "loss": 2.0689,
8072
+ "step": 5170
8073
+ },
8074
+ {
8075
+ "epoch": 0.7522895769733973,
8076
+ "grad_norm": 2.32045841217041,
8077
+ "learning_rate": 4.8127543521686746e-05,
8078
+ "loss": 2.0441,
8079
+ "step": 5175
8080
+ },
8081
+ {
8082
+ "epoch": 0.7530164268062218,
8083
+ "grad_norm": 2.6872143745422363,
8084
+ "learning_rate": 4.8048118342889746e-05,
8085
+ "loss": 1.863,
8086
+ "step": 5180
8087
+ },
8088
+ {
8089
+ "epoch": 0.7537432766390464,
8090
+ "grad_norm": 2.622974395751953,
8091
+ "learning_rate": 4.7968698325592805e-05,
8092
+ "loss": 2.0201,
8093
+ "step": 5185
8094
+ },
8095
+ {
8096
+ "epoch": 0.754470126471871,
8097
+ "grad_norm": 2.663489818572998,
8098
+ "learning_rate": 4.7889283669735706e-05,
8099
+ "loss": 2.0436,
8100
+ "step": 5190
8101
+ },
8102
+ {
8103
+ "epoch": 0.7551969763046954,
8104
+ "grad_norm": 2.5928540229797363,
8105
+ "learning_rate": 4.780987457524476e-05,
8106
+ "loss": 2.0155,
8107
+ "step": 5195
8108
+ },
8109
+ {
8110
+ "epoch": 0.75592382613752,
8111
+ "grad_norm": 2.380448579788208,
8112
+ "learning_rate": 4.7730471242032245e-05,
8113
+ "loss": 2.0713,
8114
+ "step": 5200
8115
+ },
8116
+ {
8117
+ "epoch": 0.75592382613752,
8118
+ "eval_loss": 1.8277243375778198,
8119
+ "eval_runtime": 19.2213,
8120
+ "eval_samples_per_second": 171.736,
8121
+ "eval_steps_per_second": 10.769,
8122
+ "step": 5200
8123
  }
8124
  ],
8125
  "logging_steps": 5,
 
8148
  "attributes": {}
8149
  }
8150
  },
8151
+ "total_flos": 1.3550775535088435e+18,
8152
  "train_batch_size": 4,
8153
  "trial_name": null,
8154
  "trial_params": null