alicegoesdown commited on
Commit
9978625
·
verified ·
1 Parent(s): da5b168

Training in progress, step 1500, checkpoint

Browse files
last-checkpoint/lora_top/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc3a67f4de1685ce6a0fced5c481b644514ade913e29a302672e9588e575aaad
3
  size 6299784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4836b545a1e7dda6e59a106bd77a5c159f54e80e28ff1bcbf794b726c912e64f
3
  size 6299784
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c3361d74875a43ac74d1859dc2aa87429b4f648ae5c9304cdab88e987b01c49
3
  size 12623930
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dae80422e7904b9ec35f17973a2f911c29752998c955d59a093db85783e326c5
3
  size 12623930
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d242322c5678470cae524c621709ef41118946651d32e327740afb650f163702
3
  size 14180
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89d743de80481e3c9d0faa475c7d9b8f3460af5726c846f23b5880691f9ad6d5
3
  size 14180
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d682cfc5aa181fdf75f0f7c385234b0db148db5e71a3fbb7d749d518ba02734f
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88686af0f5ff2cda3047db0918a626e92cc1249b4aba5b7793fa657eb12e8226
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 4.089999198913574,
3
- "best_model_checkpoint": "./output/checkpoint-1350",
4
- "epoch": 2.0833333333333335,
5
  "eval_steps": 150,
6
- "global_step": 1350,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1024,6 +1024,119 @@
1024
  "eval_samples_per_second": 37.248,
1025
  "eval_steps_per_second": 37.248,
1026
  "step": 1350
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1027
  }
1028
  ],
1029
  "logging_steps": 10,
@@ -1043,7 +1156,7 @@
1043
  "attributes": {}
1044
  }
1045
  },
1046
- "total_flos": 5153548116885504.0,
1047
  "train_batch_size": 16,
1048
  "trial_name": null,
1049
  "trial_params": null
 
1
  {
2
+ "best_metric": 4.045746803283691,
3
+ "best_model_checkpoint": "./output/checkpoint-1500",
4
+ "epoch": 2.314814814814815,
5
  "eval_steps": 150,
6
+ "global_step": 1500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1024
  "eval_samples_per_second": 37.248,
1025
  "eval_steps_per_second": 37.248,
1026
  "step": 1350
1027
+ },
1028
+ {
1029
+ "epoch": 2.0987654320987654,
1030
+ "grad_norm": 3.1429619789123535,
1031
+ "learning_rate": 8.455313244934322e-05,
1032
+ "loss": 4.0394,
1033
+ "step": 1360
1034
+ },
1035
+ {
1036
+ "epoch": 2.1141975308641974,
1037
+ "grad_norm": 3.6172502040863037,
1038
+ "learning_rate": 8.432071702457251e-05,
1039
+ "loss": 3.8239,
1040
+ "step": 1370
1041
+ },
1042
+ {
1043
+ "epoch": 2.1296296296296298,
1044
+ "grad_norm": 4.0605573654174805,
1045
+ "learning_rate": 8.408689080954995e-05,
1046
+ "loss": 3.9563,
1047
+ "step": 1380
1048
+ },
1049
+ {
1050
+ "epoch": 2.1450617283950617,
1051
+ "grad_norm": 2.4950709342956543,
1052
+ "learning_rate": 8.385166341595547e-05,
1053
+ "loss": 3.8381,
1054
+ "step": 1390
1055
+ },
1056
+ {
1057
+ "epoch": 2.1604938271604937,
1058
+ "grad_norm": 4.008175849914551,
1059
+ "learning_rate": 8.361504451306582e-05,
1060
+ "loss": 3.7534,
1061
+ "step": 1400
1062
+ },
1063
+ {
1064
+ "epoch": 2.175925925925926,
1065
+ "grad_norm": 3.574023723602295,
1066
+ "learning_rate": 8.337704382735738e-05,
1067
+ "loss": 3.8082,
1068
+ "step": 1410
1069
+ },
1070
+ {
1071
+ "epoch": 2.191358024691358,
1072
+ "grad_norm": 3.903048276901245,
1073
+ "learning_rate": 8.313767114210614e-05,
1074
+ "loss": 3.9183,
1075
+ "step": 1420
1076
+ },
1077
+ {
1078
+ "epoch": 2.20679012345679,
1079
+ "grad_norm": 3.0344271659851074,
1080
+ "learning_rate": 8.289693629698562e-05,
1081
+ "loss": 3.9951,
1082
+ "step": 1430
1083
+ },
1084
+ {
1085
+ "epoch": 2.2222222222222223,
1086
+ "grad_norm": 4.642614364624023,
1087
+ "learning_rate": 8.265484918766241e-05,
1088
+ "loss": 3.8322,
1089
+ "step": 1440
1090
+ },
1091
+ {
1092
+ "epoch": 2.2376543209876543,
1093
+ "grad_norm": 2.3817903995513916,
1094
+ "learning_rate": 8.241141976538941e-05,
1095
+ "loss": 3.7852,
1096
+ "step": 1450
1097
+ },
1098
+ {
1099
+ "epoch": 2.253086419753086,
1100
+ "grad_norm": 3.2333765029907227,
1101
+ "learning_rate": 8.216665803659669e-05,
1102
+ "loss": 3.8552,
1103
+ "step": 1460
1104
+ },
1105
+ {
1106
+ "epoch": 2.2685185185185186,
1107
+ "grad_norm": 3.2392756938934326,
1108
+ "learning_rate": 8.192057406248027e-05,
1109
+ "loss": 3.9154,
1110
+ "step": 1470
1111
+ },
1112
+ {
1113
+ "epoch": 2.2839506172839505,
1114
+ "grad_norm": 3.9679174423217773,
1115
+ "learning_rate": 8.167317795858849e-05,
1116
+ "loss": 3.834,
1117
+ "step": 1480
1118
+ },
1119
+ {
1120
+ "epoch": 2.299382716049383,
1121
+ "grad_norm": 4.692811489105225,
1122
+ "learning_rate": 8.142447989440615e-05,
1123
+ "loss": 3.7553,
1124
+ "step": 1490
1125
+ },
1126
+ {
1127
+ "epoch": 2.314814814814815,
1128
+ "grad_norm": 3.6301052570343018,
1129
+ "learning_rate": 8.117449009293666e-05,
1130
+ "loss": 3.8343,
1131
+ "step": 1500
1132
+ },
1133
+ {
1134
+ "epoch": 2.314814814814815,
1135
+ "eval_loss": 4.045746803283691,
1136
+ "eval_runtime": 13.3599,
1137
+ "eval_samples_per_second": 37.425,
1138
+ "eval_steps_per_second": 37.425,
1139
+ "step": 1500
1140
  }
1141
  ],
1142
  "logging_steps": 10,
 
1156
  "attributes": {}
1157
  }
1158
  },
1159
+ "total_flos": 5736850563072000.0,
1160
  "train_batch_size": 16,
1161
  "trial_name": null,
1162
  "trial_params": null