saracandu commited on
Commit
e573a52
·
verified ·
1 Parent(s): 284e9db

Training in progress, step 40250, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:346b6ba8a0e7815947f20628de9f3d3af6ab8139a91118cf143cd3dc8eb146ad
3
  size 611073224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdf20454bb920b5229a72d2241aded5af56bf1d40cafdc205ad2ed71ddb51ce2
3
  size 611073224
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b8bc63fd2b8baa76ecd7886a045dc375cb875b522a59fa37376c6075b7214c5
3
  size 1222241675
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b51bbb3052c6336fdd306188aefaf0df9e191813737b03dc83638919d7f1fd5
3
  size 1222241675
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:57c1faacff2ef52c66c645c610107c0e55aa7d56e45f05da40e4b290dd1ba1d0
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c63672711fc07bad7a68a3b4c4d72bc53b6bdbd46717a715e436ced2146084e0
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:977a470143aa24e72cee7a02d2bbb1562c97dd70f900cdb3987072bece7c2dec
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42e39dfc5ecd7ab1c066d7ba59fe38c030dfd049fdd393d56e8651cbc6e2f98e
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 39250,
3
  "best_metric": 2.157838821411133,
4
  "best_model_checkpoint": "/share/ai-lab/scandussio/stlenc_tuning_0.2/results-final/checkpoint-39250",
5
- "epoch": 10.41124414367517,
6
  "eval_steps": 250,
7
- "global_step": 40000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -6888,6 +6888,49 @@
6888
  "eval_samples_per_second": 219.795,
6889
  "eval_steps_per_second": 1.758,
6890
  "step": 40000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6891
  }
6892
  ],
6893
  "logging_steps": 50,
@@ -6907,7 +6950,7 @@
6907
  "attributes": {}
6908
  }
6909
  },
6910
- "total_flos": 2.393798126642135e+18,
6911
  "train_batch_size": 128,
6912
  "trial_name": null,
6913
  "trial_params": null
 
2
  "best_global_step": 39250,
3
  "best_metric": 2.157838821411133,
4
  "best_model_checkpoint": "/share/ai-lab/scandussio/stlenc_tuning_0.2/results-final/checkpoint-39250",
5
+ "epoch": 10.47631441957314,
6
  "eval_steps": 250,
7
+ "global_step": 40250,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
6888
  "eval_samples_per_second": 219.795,
6889
  "eval_steps_per_second": 1.758,
6890
  "step": 40000
6891
+ },
6892
+ {
6893
+ "epoch": 10.424258198854764,
6894
+ "grad_norm": 4.224571704864502,
6895
+ "learning_rate": 1.5253340274162764e-06,
6896
+ "loss": 1.4713,
6897
+ "step": 40050
6898
+ },
6899
+ {
6900
+ "epoch": 10.437272254034356,
6901
+ "grad_norm": 10.799654960632324,
6902
+ "learning_rate": 1.5209960090230782e-06,
6903
+ "loss": 1.4412,
6904
+ "step": 40100
6905
+ },
6906
+ {
6907
+ "epoch": 10.45028630921395,
6908
+ "grad_norm": 8.12926959991455,
6909
+ "learning_rate": 1.5166579906298803e-06,
6910
+ "loss": 1.4705,
6911
+ "step": 40150
6912
+ },
6913
+ {
6914
+ "epoch": 10.463300364393545,
6915
+ "grad_norm": 4.9367756843566895,
6916
+ "learning_rate": 1.5123199722366823e-06,
6917
+ "loss": 1.447,
6918
+ "step": 40200
6919
+ },
6920
+ {
6921
+ "epoch": 10.47631441957314,
6922
+ "grad_norm": 6.339715480804443,
6923
+ "learning_rate": 1.5079819538434846e-06,
6924
+ "loss": 1.4478,
6925
+ "step": 40250
6926
+ },
6927
+ {
6928
+ "epoch": 10.47631441957314,
6929
+ "eval_loss": 2.1831064224243164,
6930
+ "eval_runtime": 13.7128,
6931
+ "eval_samples_per_second": 218.774,
6932
+ "eval_steps_per_second": 1.75,
6933
+ "step": 40250
6934
  }
6935
  ],
6936
  "logging_steps": 50,
 
6950
  "attributes": {}
6951
  }
6952
  },
6953
+ "total_flos": 2.408760416969687e+18,
6954
  "train_batch_size": 128,
6955
  "trial_name": null,
6956
  "trial_params": null