irishprancer commited on
Commit
3b308b7
·
verified ·
1 Parent(s): 4c2bc5e

Training in progress, step 5250, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0b8241d490cf7c6d73daa6e58d1e953d37a78b29a116a5819c07ede09f2a18a6
3
  size 527048968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce8cb0c1636a06b1e76c546c4b0282f02b71ccad43c283d3d33d43185c64edec
3
  size 527048968
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a28d22fdc04db2c35d665a1ac5cb8cd349b54d05fc00d81df4548b08481678c
3
  size 1054135994
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70afac3958025bed818f692a236ab6bf6b28db45140796294f475309149762a5
3
  size 1054135994
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:67c6f49cb50d8b09b0f2e9704dcb4986f8fc63f53d3b695322fdb8756b868c02
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7de188e422eb0da886da3c865f1df00995a0a219ebff0d43a41d74c3b9d38d5
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:736f4102e80c412e5bd9ae55e7c4ee4195aa9541999b56cf808f798e57d982a7
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7cc08ef7615af7896731786745ca416272561837649d6bc1ff644d72a48c9b0d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.7166205048561096,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
- "epoch": 221.7391304347826,
5
  "eval_steps": 150,
6
- "global_step": 5100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4937,6 +4937,151 @@
4937
  "EMA_steps_per_second": 22.619,
4938
  "epoch": 221.7391304347826,
4939
  "step": 5100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4940
  }
4941
  ],
4942
  "logging_steps": 10,
@@ -4956,7 +5101,7 @@
4956
  "attributes": {}
4957
  }
4958
  },
4959
- "total_flos": 1.3106424730691174e+17,
4960
  "train_batch_size": 4,
4961
  "trial_name": null,
4962
  "trial_params": null
 
1
  {
2
  "best_metric": 0.7166205048561096,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
+ "epoch": 228.2608695652174,
5
  "eval_steps": 150,
6
+ "global_step": 5250,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4937
  "EMA_steps_per_second": 22.619,
4938
  "epoch": 221.7391304347826,
4939
  "step": 5100
4940
+ },
4941
+ {
4942
+ "epoch": 222.17391304347825,
4943
+ "grad_norm": 2.351990222930908,
4944
+ "learning_rate": 1.528980386974772e-06,
4945
+ "loss": 0.2432,
4946
+ "step": 5110
4947
+ },
4948
+ {
4949
+ "epoch": 222.6086956521739,
4950
+ "grad_norm": 2.001265287399292,
4951
+ "learning_rate": 1.5289556856653922e-06,
4952
+ "loss": 0.2269,
4953
+ "step": 5120
4954
+ },
4955
+ {
4956
+ "epoch": 223.04347826086956,
4957
+ "grad_norm": 2.3239283561706543,
4958
+ "learning_rate": 1.5289306815434077e-06,
4959
+ "loss": 0.2179,
4960
+ "step": 5130
4961
+ },
4962
+ {
4963
+ "epoch": 223.47826086956522,
4964
+ "grad_norm": 2.3948585987091064,
4965
+ "learning_rate": 1.528905374618729e-06,
4966
+ "loss": 0.2354,
4967
+ "step": 5140
4968
+ },
4969
+ {
4970
+ "epoch": 223.91304347826087,
4971
+ "grad_norm": 1.9040113687515259,
4972
+ "learning_rate": 1.5288797649013872e-06,
4973
+ "loss": 0.2289,
4974
+ "step": 5150
4975
+ },
4976
+ {
4977
+ "epoch": 224.34782608695653,
4978
+ "grad_norm": 2.0223822593688965,
4979
+ "learning_rate": 1.5288538524015332e-06,
4980
+ "loss": 0.2192,
4981
+ "step": 5160
4982
+ },
4983
+ {
4984
+ "epoch": 224.7826086956522,
4985
+ "grad_norm": 1.7613234519958496,
4986
+ "learning_rate": 1.5288276371294373e-06,
4987
+ "loss": 0.2122,
4988
+ "step": 5170
4989
+ },
4990
+ {
4991
+ "epoch": 225.2173913043478,
4992
+ "grad_norm": 1.8648544549942017,
4993
+ "learning_rate": 1.5288011190954913e-06,
4994
+ "loss": 0.2038,
4995
+ "step": 5180
4996
+ },
4997
+ {
4998
+ "epoch": 225.65217391304347,
4999
+ "grad_norm": 1.9111838340759277,
5000
+ "learning_rate": 1.5287742983102055e-06,
5001
+ "loss": 0.1973,
5002
+ "step": 5190
5003
+ },
5004
+ {
5005
+ "epoch": 226.08695652173913,
5006
+ "grad_norm": 2.3617441654205322,
5007
+ "learning_rate": 1.5287471747842112e-06,
5008
+ "loss": 0.2629,
5009
+ "step": 5200
5010
+ },
5011
+ {
5012
+ "epoch": 226.52173913043478,
5013
+ "grad_norm": 1.7532709836959839,
5014
+ "learning_rate": 1.5287197485282586e-06,
5015
+ "loss": 0.2078,
5016
+ "step": 5210
5017
+ },
5018
+ {
5019
+ "epoch": 226.95652173913044,
5020
+ "grad_norm": 2.7147057056427,
5021
+ "learning_rate": 1.5286920195532195e-06,
5022
+ "loss": 0.2379,
5023
+ "step": 5220
5024
+ },
5025
+ {
5026
+ "epoch": 227.3913043478261,
5027
+ "grad_norm": 1.9578685760498047,
5028
+ "learning_rate": 1.528663987870084e-06,
5029
+ "loss": 0.2432,
5030
+ "step": 5230
5031
+ },
5032
+ {
5033
+ "epoch": 227.82608695652175,
5034
+ "grad_norm": 3.356194019317627,
5035
+ "learning_rate": 1.5286356534899634e-06,
5036
+ "loss": 0.2264,
5037
+ "step": 5240
5038
+ },
5039
+ {
5040
+ "epoch": 228.2608695652174,
5041
+ "grad_norm": 1.8712825775146484,
5042
+ "learning_rate": 1.5286070164240883e-06,
5043
+ "loss": 0.2185,
5044
+ "step": 5250
5045
+ },
5046
+ {
5047
+ "epoch": 228.2608695652174,
5048
+ "eval_loss": 0.9829781651496887,
5049
+ "eval_runtime": 0.3975,
5050
+ "eval_samples_per_second": 25.156,
5051
+ "eval_steps_per_second": 25.156,
5052
+ "step": 5250
5053
+ },
5054
+ {
5055
+ "Start_State_loss": 0.8609819412231445,
5056
+ "Start_State_runtime": 0.3894,
5057
+ "Start_State_samples_per_second": 25.682,
5058
+ "Start_State_steps_per_second": 25.682,
5059
+ "epoch": 228.2608695652174,
5060
+ "step": 5250
5061
+ },
5062
+ {
5063
+ "Raw_Model_loss": 0.9829781651496887,
5064
+ "Raw_Model_runtime": 0.391,
5065
+ "Raw_Model_samples_per_second": 25.574,
5066
+ "Raw_Model_steps_per_second": 25.574,
5067
+ "epoch": 228.2608695652174,
5068
+ "step": 5250
5069
+ },
5070
+ {
5071
+ "SWA_loss": 0.8260501027107239,
5072
+ "SWA_runtime": 0.4158,
5073
+ "SWA_samples_per_second": 24.051,
5074
+ "SWA_steps_per_second": 24.051,
5075
+ "epoch": 228.2608695652174,
5076
+ "step": 5250
5077
+ },
5078
+ {
5079
+ "EMA_loss": 0.8600662350654602,
5080
+ "EMA_runtime": 0.3878,
5081
+ "EMA_samples_per_second": 25.783,
5082
+ "EMA_steps_per_second": 25.783,
5083
+ "epoch": 228.2608695652174,
5084
+ "step": 5250
5085
  }
5086
  ],
5087
  "logging_steps": 10,
 
5101
  "attributes": {}
5102
  }
5103
  },
5104
+ "total_flos": 1.3495580841170534e+17,
5105
  "train_batch_size": 4,
5106
  "trial_name": null,
5107
  "trial_params": null