TweedleDeepLearnings commited on
Commit
3cbf5a2
·
verified ·
1 Parent(s): 02566cf

Training in progress, step 1500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c0f1846b2eca9ece14ca419dc900cc69794d0a86a70292e3bc1ed7ec5237704
3
  size 1474661312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7df61c683eb310ecb8d849314ea3d16e0ebb1dd999709bbc636356ed1746d614
3
  size 1474661312
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:55c27f4d0598adba4c648399258f4e05c836933f6246139847a43d93e0e3caa4
3
- size 2881708922
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4433c2f8bbbc12a2544c63ff3b1614dc61f0bf7c45ea85ef2db83f0a2cc6534
3
+ size 2881708858
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c06dc6e1421384e2c500c31f024c1f7c824663f681d921062bd75d6142146be1
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3a0a21d7a4e0dc7af1cb57b47bc980d3c876420d73fe129ed224cf22685abe6
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d87192dfad02856a78d2515aef5449813a7a3d2c3161d182469ba9a023f23658
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c72ff83b81e4d5ca4579296bd0d644ce6acb0bdb379683b8988ce492e8fcf1fd
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.00011071480548707768,
3
  "best_model_checkpoint": "./output/checkpoint-600",
4
- "epoch": 0.7550335570469798,
5
  "eval_steps": 150,
6
- "global_step": 1350,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1024,6 +1024,119 @@
1024
  "eval_samples_per_second": 9.903,
1025
  "eval_steps_per_second": 9.903,
1026
  "step": 1350
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1027
  }
1028
  ],
1029
  "logging_steps": 10,
@@ -1043,7 +1156,7 @@
1043
  "attributes": {}
1044
  }
1045
  },
1046
- "total_flos": 8.343847373262029e+16,
1047
  "train_batch_size": 4,
1048
  "trial_name": null,
1049
  "trial_params": null
 
1
  {
2
  "best_metric": 0.00011071480548707768,
3
  "best_model_checkpoint": "./output/checkpoint-600",
4
+ "epoch": 0.8389261744966443,
5
  "eval_steps": 150,
6
+ "global_step": 1500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1024
  "eval_samples_per_second": 9.903,
1025
  "eval_steps_per_second": 9.903,
1026
  "step": 1350
1027
+ },
1028
+ {
1029
+ "epoch": 0.7606263982102909,
1030
+ "grad_norm": 6.651922012679279e-05,
1031
+ "learning_rate": 3.7203378277711024e-05,
1032
+ "loss": 0.0004,
1033
+ "step": 1360
1034
+ },
1035
+ {
1036
+ "epoch": 0.7662192393736018,
1037
+ "grad_norm": 0.018086520954966545,
1038
+ "learning_rate": 3.710111549081191e-05,
1039
+ "loss": 0.0,
1040
+ "step": 1370
1041
+ },
1042
+ {
1043
+ "epoch": 0.7718120805369127,
1044
+ "grad_norm": 0.4451378285884857,
1045
+ "learning_rate": 3.699823195620199e-05,
1046
+ "loss": 0.0002,
1047
+ "step": 1380
1048
+ },
1049
+ {
1050
+ "epoch": 0.7774049217002237,
1051
+ "grad_norm": 0.0013188536977395415,
1052
+ "learning_rate": 3.689473190302041e-05,
1053
+ "loss": 0.0,
1054
+ "step": 1390
1055
+ },
1056
+ {
1057
+ "epoch": 0.7829977628635347,
1058
+ "grad_norm": 0.015242448076605797,
1059
+ "learning_rate": 3.679061958574897e-05,
1060
+ "loss": 0.0,
1061
+ "step": 1400
1062
+ },
1063
+ {
1064
+ "epoch": 0.7885906040268457,
1065
+ "grad_norm": 0.0007480831118300557,
1066
+ "learning_rate": 3.668589928403726e-05,
1067
+ "loss": 0.001,
1068
+ "step": 1410
1069
+ },
1070
+ {
1071
+ "epoch": 0.7941834451901566,
1072
+ "grad_norm": 0.0003548146632965654,
1073
+ "learning_rate": 3.6580575302526706e-05,
1074
+ "loss": 0.0001,
1075
+ "step": 1420
1076
+ },
1077
+ {
1078
+ "epoch": 0.7997762863534675,
1079
+ "grad_norm": 4.4911037548445165e-05,
1080
+ "learning_rate": 3.647465197067368e-05,
1081
+ "loss": 0.0002,
1082
+ "step": 1430
1083
+ },
1084
+ {
1085
+ "epoch": 0.8053691275167785,
1086
+ "grad_norm": 6.35408578091301e-05,
1087
+ "learning_rate": 3.6368133642571464e-05,
1088
+ "loss": 0.0,
1089
+ "step": 1440
1090
+ },
1091
+ {
1092
+ "epoch": 0.8109619686800895,
1093
+ "grad_norm": 0.0015594850992783904,
1094
+ "learning_rate": 3.6261024696771345e-05,
1095
+ "loss": 0.0,
1096
+ "step": 1450
1097
+ },
1098
+ {
1099
+ "epoch": 0.8165548098434005,
1100
+ "grad_norm": 0.00036429730243980885,
1101
+ "learning_rate": 3.615332953610255e-05,
1102
+ "loss": 0.0059,
1103
+ "step": 1460
1104
+ },
1105
+ {
1106
+ "epoch": 0.8221476510067114,
1107
+ "grad_norm": 1.828911542892456,
1108
+ "learning_rate": 3.604505258749132e-05,
1109
+ "loss": 0.0006,
1110
+ "step": 1470
1111
+ },
1112
+ {
1113
+ "epoch": 0.8277404921700223,
1114
+ "grad_norm": 0.01230633445084095,
1115
+ "learning_rate": 3.5936198301778945e-05,
1116
+ "loss": 0.0012,
1117
+ "step": 1480
1118
+ },
1119
+ {
1120
+ "epoch": 0.8333333333333334,
1121
+ "grad_norm": 0.0066687436774373055,
1122
+ "learning_rate": 3.5826771153538716e-05,
1123
+ "loss": 0.0006,
1124
+ "step": 1490
1125
+ },
1126
+ {
1127
+ "epoch": 0.8389261744966443,
1128
+ "grad_norm": 5.592922752839513e-05,
1129
+ "learning_rate": 3.571677564089214e-05,
1130
+ "loss": 0.0002,
1131
+ "step": 1500
1132
+ },
1133
+ {
1134
+ "epoch": 0.8389261744966443,
1135
+ "eval_loss": 0.0009469892247579992,
1136
+ "eval_runtime": 50.2402,
1137
+ "eval_samples_per_second": 9.952,
1138
+ "eval_steps_per_second": 9.952,
1139
+ "step": 1500
1140
  }
1141
  ],
1142
  "logging_steps": 10,
 
1156
  "attributes": {}
1157
  }
1158
  },
1159
+ "total_flos": 9.355502107695514e+16,
1160
  "train_batch_size": 4,
1161
  "trial_name": null,
1162
  "trial_params": null