Training in progress, epoch 1
Browse files- all_results.json +8 -0
- config.json +4 -174
- model.safetensors +2 -2
- runs/May13_09-06-56_MSI/events.out.tfevents.1715593801.MSI.20512.1 +2 -2
- runs/May13_13-50-11_MSI/events.out.tfevents.1715601018.MSI.20512.2 +3 -0
- runs/May13_14-01-15_MSI/events.out.tfevents.1715601677.MSI.20512.3 +3 -0
- runs/May13_19-31-28_MSI/events.out.tfevents.1715621490.MSI.14116.0 +3 -0
- test_results.json +8 -0
- trainer_state.json +182 -0
- training_args.bin +1 -1
all_results.json
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"epoch": 3.22972972972973,
|
| 3 |
+
"eval_accuracy": 0.5942028985507246,
|
| 4 |
+
"eval_loss": 0.7709811925888062,
|
| 5 |
+
"eval_runtime": 203.3655,
|
| 6 |
+
"eval_samples_per_second": 0.339,
|
| 7 |
+
"eval_steps_per_second": 0.089
|
| 8 |
+
}
|
config.json
CHANGED
|
@@ -12,185 +12,15 @@
|
|
| 12 |
"hidden_dropout_prob": 0.0,
|
| 13 |
"hidden_size": 768,
|
| 14 |
"id2label": {
|
| 15 |
-
"0": "
|
| 16 |
-
"1": "
|
| 17 |
-
"2": "video_11.mp4",
|
| 18 |
-
"3": "video_12.mp4",
|
| 19 |
-
"4": "video_13.mp4",
|
| 20 |
-
"5": "video_14.mp4",
|
| 21 |
-
"6": "video_15.mp4",
|
| 22 |
-
"7": "video_16.mp4",
|
| 23 |
-
"8": "video_17.mp4",
|
| 24 |
-
"9": "video_18.mp4",
|
| 25 |
-
"10": "video_19.mp4",
|
| 26 |
-
"11": "video_2.mp4",
|
| 27 |
-
"12": "video_20.mp4",
|
| 28 |
-
"13": "video_21.mp4",
|
| 29 |
-
"14": "video_22.mp4",
|
| 30 |
-
"15": "video_23.mp4",
|
| 31 |
-
"16": "video_24.mp4",
|
| 32 |
-
"17": "video_25.mp4",
|
| 33 |
-
"18": "video_26.mp4",
|
| 34 |
-
"19": "video_27.mp4",
|
| 35 |
-
"20": "video_28.mp4",
|
| 36 |
-
"21": "video_29.mp4",
|
| 37 |
-
"22": "video_3.mp4",
|
| 38 |
-
"23": "video_30.mp4",
|
| 39 |
-
"24": "video_31.mp4",
|
| 40 |
-
"25": "video_32.mp4",
|
| 41 |
-
"26": "video_33.mp4",
|
| 42 |
-
"27": "video_34.mp4",
|
| 43 |
-
"28": "video_35.mp4",
|
| 44 |
-
"29": "video_36.mp4",
|
| 45 |
-
"30": "video_37.mp4",
|
| 46 |
-
"31": "video_38.mp4",
|
| 47 |
-
"32": "video_39.mp4",
|
| 48 |
-
"33": "video_4.mp4",
|
| 49 |
-
"34": "video_40.mp4",
|
| 50 |
-
"35": "video_41.mp4",
|
| 51 |
-
"36": "video_42.mp4",
|
| 52 |
-
"37": "video_43.mp4",
|
| 53 |
-
"38": "video_44.mp4",
|
| 54 |
-
"39": "video_45.mp4",
|
| 55 |
-
"40": "video_46.mp4",
|
| 56 |
-
"41": "video_47.mp4",
|
| 57 |
-
"42": "video_48.mp4",
|
| 58 |
-
"43": "video_49.mp4",
|
| 59 |
-
"44": "video_5.mp4",
|
| 60 |
-
"45": "video_50.mp4",
|
| 61 |
-
"46": "video_51.mp4",
|
| 62 |
-
"47": "video_52.mp4",
|
| 63 |
-
"48": "video_53.mp4",
|
| 64 |
-
"49": "video_54.mp4",
|
| 65 |
-
"50": "video_55.mp4",
|
| 66 |
-
"51": "video_56.mp4",
|
| 67 |
-
"52": "video_57.mp4",
|
| 68 |
-
"53": "video_58.mp4",
|
| 69 |
-
"54": "video_59.mp4",
|
| 70 |
-
"55": "video_6.mp4",
|
| 71 |
-
"56": "video_60.mp4",
|
| 72 |
-
"57": "video_61.mp4",
|
| 73 |
-
"58": "video_62.mp4",
|
| 74 |
-
"59": "video_63.mp4",
|
| 75 |
-
"60": "video_64.mp4",
|
| 76 |
-
"61": "video_65.mp4",
|
| 77 |
-
"62": "video_66.mp4",
|
| 78 |
-
"63": "video_67.mp4",
|
| 79 |
-
"64": "video_68.mp4",
|
| 80 |
-
"65": "video_69.mp4",
|
| 81 |
-
"66": "video_7.mp4",
|
| 82 |
-
"67": "video_70.mp4",
|
| 83 |
-
"68": "video_71.mp4",
|
| 84 |
-
"69": "video_72.mp4",
|
| 85 |
-
"70": "video_73.mp4",
|
| 86 |
-
"71": "video_74.mp4",
|
| 87 |
-
"72": "video_75.mp4",
|
| 88 |
-
"73": "video_76.mp4",
|
| 89 |
-
"74": "video_77.mp4",
|
| 90 |
-
"75": "video_78.mp4",
|
| 91 |
-
"76": "video_79.mp4",
|
| 92 |
-
"77": "video_8.mp4",
|
| 93 |
-
"78": "video_80.mp4",
|
| 94 |
-
"79": "video_81.mp4",
|
| 95 |
-
"80": "video_82.mp4",
|
| 96 |
-
"81": "video_83.mp4",
|
| 97 |
-
"82": "video_84.mp4",
|
| 98 |
-
"83": "video_85.mp4",
|
| 99 |
-
"84": "video_86.mp4",
|
| 100 |
-
"85": "video_87.mp4",
|
| 101 |
-
"86": "video_9.mp4"
|
| 102 |
},
|
| 103 |
"image_size": 224,
|
| 104 |
"initializer_range": 0.02,
|
| 105 |
"intermediate_size": 3072,
|
| 106 |
"label2id": {
|
| 107 |
-
"
|
| 108 |
-
"
|
| 109 |
-
"video_11.mp4": 2,
|
| 110 |
-
"video_12.mp4": 3,
|
| 111 |
-
"video_13.mp4": 4,
|
| 112 |
-
"video_14.mp4": 5,
|
| 113 |
-
"video_15.mp4": 6,
|
| 114 |
-
"video_16.mp4": 7,
|
| 115 |
-
"video_17.mp4": 8,
|
| 116 |
-
"video_18.mp4": 9,
|
| 117 |
-
"video_19.mp4": 10,
|
| 118 |
-
"video_2.mp4": 11,
|
| 119 |
-
"video_20.mp4": 12,
|
| 120 |
-
"video_21.mp4": 13,
|
| 121 |
-
"video_22.mp4": 14,
|
| 122 |
-
"video_23.mp4": 15,
|
| 123 |
-
"video_24.mp4": 16,
|
| 124 |
-
"video_25.mp4": 17,
|
| 125 |
-
"video_26.mp4": 18,
|
| 126 |
-
"video_27.mp4": 19,
|
| 127 |
-
"video_28.mp4": 20,
|
| 128 |
-
"video_29.mp4": 21,
|
| 129 |
-
"video_3.mp4": 22,
|
| 130 |
-
"video_30.mp4": 23,
|
| 131 |
-
"video_31.mp4": 24,
|
| 132 |
-
"video_32.mp4": 25,
|
| 133 |
-
"video_33.mp4": 26,
|
| 134 |
-
"video_34.mp4": 27,
|
| 135 |
-
"video_35.mp4": 28,
|
| 136 |
-
"video_36.mp4": 29,
|
| 137 |
-
"video_37.mp4": 30,
|
| 138 |
-
"video_38.mp4": 31,
|
| 139 |
-
"video_39.mp4": 32,
|
| 140 |
-
"video_4.mp4": 33,
|
| 141 |
-
"video_40.mp4": 34,
|
| 142 |
-
"video_41.mp4": 35,
|
| 143 |
-
"video_42.mp4": 36,
|
| 144 |
-
"video_43.mp4": 37,
|
| 145 |
-
"video_44.mp4": 38,
|
| 146 |
-
"video_45.mp4": 39,
|
| 147 |
-
"video_46.mp4": 40,
|
| 148 |
-
"video_47.mp4": 41,
|
| 149 |
-
"video_48.mp4": 42,
|
| 150 |
-
"video_49.mp4": 43,
|
| 151 |
-
"video_5.mp4": 44,
|
| 152 |
-
"video_50.mp4": 45,
|
| 153 |
-
"video_51.mp4": 46,
|
| 154 |
-
"video_52.mp4": 47,
|
| 155 |
-
"video_53.mp4": 48,
|
| 156 |
-
"video_54.mp4": 49,
|
| 157 |
-
"video_55.mp4": 50,
|
| 158 |
-
"video_56.mp4": 51,
|
| 159 |
-
"video_57.mp4": 52,
|
| 160 |
-
"video_58.mp4": 53,
|
| 161 |
-
"video_59.mp4": 54,
|
| 162 |
-
"video_6.mp4": 55,
|
| 163 |
-
"video_60.mp4": 56,
|
| 164 |
-
"video_61.mp4": 57,
|
| 165 |
-
"video_62.mp4": 58,
|
| 166 |
-
"video_63.mp4": 59,
|
| 167 |
-
"video_64.mp4": 60,
|
| 168 |
-
"video_65.mp4": 61,
|
| 169 |
-
"video_66.mp4": 62,
|
| 170 |
-
"video_67.mp4": 63,
|
| 171 |
-
"video_68.mp4": 64,
|
| 172 |
-
"video_69.mp4": 65,
|
| 173 |
-
"video_7.mp4": 66,
|
| 174 |
-
"video_70.mp4": 67,
|
| 175 |
-
"video_71.mp4": 68,
|
| 176 |
-
"video_72.mp4": 69,
|
| 177 |
-
"video_73.mp4": 70,
|
| 178 |
-
"video_74.mp4": 71,
|
| 179 |
-
"video_75.mp4": 72,
|
| 180 |
-
"video_76.mp4": 73,
|
| 181 |
-
"video_77.mp4": 74,
|
| 182 |
-
"video_78.mp4": 75,
|
| 183 |
-
"video_79.mp4": 76,
|
| 184 |
-
"video_8.mp4": 77,
|
| 185 |
-
"video_80.mp4": 78,
|
| 186 |
-
"video_81.mp4": 79,
|
| 187 |
-
"video_82.mp4": 80,
|
| 188 |
-
"video_83.mp4": 81,
|
| 189 |
-
"video_84.mp4": 82,
|
| 190 |
-
"video_85.mp4": 83,
|
| 191 |
-
"video_86.mp4": 84,
|
| 192 |
-
"video_87.mp4": 85,
|
| 193 |
-
"video_9.mp4": 86
|
| 194 |
},
|
| 195 |
"layer_norm_eps": 1e-12,
|
| 196 |
"model_type": "videomae",
|
|
|
|
| 12 |
"hidden_dropout_prob": 0.0,
|
| 13 |
"hidden_size": 768,
|
| 14 |
"id2label": {
|
| 15 |
+
"0": "carve-frontside",
|
| 16 |
+
"1": "roller"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
},
|
| 18 |
"image_size": 224,
|
| 19 |
"initializer_range": 0.02,
|
| 20 |
"intermediate_size": 3072,
|
| 21 |
"label2id": {
|
| 22 |
+
"carve-frontside": 0,
|
| 23 |
+
"roller": 1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
},
|
| 25 |
"layer_norm_eps": 1e-12,
|
| 26 |
"model_type": "videomae",
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d02c3727ec357ac7a08d17342b670e9b4591f6b59968ba47c782a9158e256aed
|
| 3 |
+
size 344937368
|
runs/May13_09-06-56_MSI/events.out.tfevents.1715593801.MSI.20512.1
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:db669cb12490441a28fe0e91292f7014ab51fecb55559ac8bc67f5bba8c72244
|
| 3 |
+
size 734
|
runs/May13_13-50-11_MSI/events.out.tfevents.1715601018.MSI.20512.2
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:99381a7035c02e00dcd92a3f8954cbdb84e5614a06edb8eeda0a96be452efd3b
|
| 3 |
+
size 4946
|
runs/May13_14-01-15_MSI/events.out.tfevents.1715601677.MSI.20512.3
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:696582a77b08d005b5af7894a79130f4769fa08dfad3b22b691e8866d20025d4
|
| 3 |
+
size 4946
|
runs/May13_19-31-28_MSI/events.out.tfevents.1715621490.MSI.14116.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:664b5ddad17480598fbe88e84965c845f473faa72a743d95c16d6e118b670669
|
| 3 |
+
size 7029
|
test_results.json
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"epoch": 3.22972972972973,
|
| 3 |
+
"eval_accuracy": 0.5942028985507246,
|
| 4 |
+
"eval_loss": 0.7709811925888062,
|
| 5 |
+
"eval_runtime": 203.3655,
|
| 6 |
+
"eval_samples_per_second": 0.339,
|
| 7 |
+
"eval_steps_per_second": 0.089
|
| 8 |
+
}
|
trainer_state.json
ADDED
|
@@ -0,0 +1,182 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": 0.6181818181818182,
|
| 3 |
+
"best_model_checkpoint": "videomae-base-finetuned-2\\checkpoint-76",
|
| 4 |
+
"epoch": 3.22972972972973,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 148,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.06756756756756757,
|
| 13 |
+
"grad_norm": 16.52984046936035,
|
| 14 |
+
"learning_rate": 3.3333333333333335e-05,
|
| 15 |
+
"loss": 4.2404,
|
| 16 |
+
"step": 10
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.13513513513513514,
|
| 20 |
+
"grad_norm": 10.275276184082031,
|
| 21 |
+
"learning_rate": 4.81203007518797e-05,
|
| 22 |
+
"loss": 2.1932,
|
| 23 |
+
"step": 20
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.20270270270270271,
|
| 27 |
+
"grad_norm": 6.553492069244385,
|
| 28 |
+
"learning_rate": 4.43609022556391e-05,
|
| 29 |
+
"loss": 1.0024,
|
| 30 |
+
"step": 30
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.25675675675675674,
|
| 34 |
+
"eval_accuracy": 0.6,
|
| 35 |
+
"eval_loss": 0.8335393667221069,
|
| 36 |
+
"eval_runtime": 202.228,
|
| 37 |
+
"eval_samples_per_second": 0.272,
|
| 38 |
+
"eval_steps_per_second": 0.069,
|
| 39 |
+
"step": 38
|
| 40 |
+
},
|
| 41 |
+
{
|
| 42 |
+
"epoch": 1.0135135135135136,
|
| 43 |
+
"grad_norm": 5.992822170257568,
|
| 44 |
+
"learning_rate": 4.0601503759398494e-05,
|
| 45 |
+
"loss": 0.9484,
|
| 46 |
+
"step": 40
|
| 47 |
+
},
|
| 48 |
+
{
|
| 49 |
+
"epoch": 1.0810810810810811,
|
| 50 |
+
"grad_norm": 4.172947406768799,
|
| 51 |
+
"learning_rate": 3.6842105263157895e-05,
|
| 52 |
+
"loss": 0.8423,
|
| 53 |
+
"step": 50
|
| 54 |
+
},
|
| 55 |
+
{
|
| 56 |
+
"epoch": 1.1486486486486487,
|
| 57 |
+
"grad_norm": 8.419867515563965,
|
| 58 |
+
"learning_rate": 3.3082706766917295e-05,
|
| 59 |
+
"loss": 0.7835,
|
| 60 |
+
"step": 60
|
| 61 |
+
},
|
| 62 |
+
{
|
| 63 |
+
"epoch": 1.2162162162162162,
|
| 64 |
+
"grad_norm": 6.0131940841674805,
|
| 65 |
+
"learning_rate": 2.9323308270676693e-05,
|
| 66 |
+
"loss": 0.733,
|
| 67 |
+
"step": 70
|
| 68 |
+
},
|
| 69 |
+
{
|
| 70 |
+
"epoch": 1.2567567567567568,
|
| 71 |
+
"eval_accuracy": 0.6181818181818182,
|
| 72 |
+
"eval_loss": 0.7877511978149414,
|
| 73 |
+
"eval_runtime": 181.8111,
|
| 74 |
+
"eval_samples_per_second": 0.303,
|
| 75 |
+
"eval_steps_per_second": 0.077,
|
| 76 |
+
"step": 76
|
| 77 |
+
},
|
| 78 |
+
{
|
| 79 |
+
"epoch": 2.027027027027027,
|
| 80 |
+
"grad_norm": 11.416802406311035,
|
| 81 |
+
"learning_rate": 2.556390977443609e-05,
|
| 82 |
+
"loss": 0.8287,
|
| 83 |
+
"step": 80
|
| 84 |
+
},
|
| 85 |
+
{
|
| 86 |
+
"epoch": 2.0945945945945947,
|
| 87 |
+
"grad_norm": 8.970609664916992,
|
| 88 |
+
"learning_rate": 2.1804511278195487e-05,
|
| 89 |
+
"loss": 0.6822,
|
| 90 |
+
"step": 90
|
| 91 |
+
},
|
| 92 |
+
{
|
| 93 |
+
"epoch": 2.1621621621621623,
|
| 94 |
+
"grad_norm": 20.559356689453125,
|
| 95 |
+
"learning_rate": 1.8045112781954888e-05,
|
| 96 |
+
"loss": 0.8237,
|
| 97 |
+
"step": 100
|
| 98 |
+
},
|
| 99 |
+
{
|
| 100 |
+
"epoch": 2.22972972972973,
|
| 101 |
+
"grad_norm": 9.306185722351074,
|
| 102 |
+
"learning_rate": 1.4285714285714285e-05,
|
| 103 |
+
"loss": 0.704,
|
| 104 |
+
"step": 110
|
| 105 |
+
},
|
| 106 |
+
{
|
| 107 |
+
"epoch": 2.2567567567567566,
|
| 108 |
+
"eval_accuracy": 0.6181818181818182,
|
| 109 |
+
"eval_loss": 0.8107306957244873,
|
| 110 |
+
"eval_runtime": 178.6821,
|
| 111 |
+
"eval_samples_per_second": 0.308,
|
| 112 |
+
"eval_steps_per_second": 0.078,
|
| 113 |
+
"step": 114
|
| 114 |
+
},
|
| 115 |
+
{
|
| 116 |
+
"epoch": 3.0405405405405403,
|
| 117 |
+
"grad_norm": 10.595173835754395,
|
| 118 |
+
"learning_rate": 1.0526315789473684e-05,
|
| 119 |
+
"loss": 0.8146,
|
| 120 |
+
"step": 120
|
| 121 |
+
},
|
| 122 |
+
{
|
| 123 |
+
"epoch": 3.108108108108108,
|
| 124 |
+
"grad_norm": 13.952937126159668,
|
| 125 |
+
"learning_rate": 6.766917293233083e-06,
|
| 126 |
+
"loss": 0.6075,
|
| 127 |
+
"step": 130
|
| 128 |
+
},
|
| 129 |
+
{
|
| 130 |
+
"epoch": 3.175675675675676,
|
| 131 |
+
"grad_norm": 14.456306457519531,
|
| 132 |
+
"learning_rate": 3.007518796992481e-06,
|
| 133 |
+
"loss": 0.5661,
|
| 134 |
+
"step": 140
|
| 135 |
+
},
|
| 136 |
+
{
|
| 137 |
+
"epoch": 3.22972972972973,
|
| 138 |
+
"eval_accuracy": 0.6181818181818182,
|
| 139 |
+
"eval_loss": 0.7859958410263062,
|
| 140 |
+
"eval_runtime": 168.8621,
|
| 141 |
+
"eval_samples_per_second": 0.326,
|
| 142 |
+
"eval_steps_per_second": 0.083,
|
| 143 |
+
"step": 148
|
| 144 |
+
},
|
| 145 |
+
{
|
| 146 |
+
"epoch": 3.22972972972973,
|
| 147 |
+
"step": 148,
|
| 148 |
+
"total_flos": 7.270063395179397e+17,
|
| 149 |
+
"train_loss": 1.1026684142447807,
|
| 150 |
+
"train_runtime": 5645.231,
|
| 151 |
+
"train_samples_per_second": 0.105,
|
| 152 |
+
"train_steps_per_second": 0.026
|
| 153 |
+
},
|
| 154 |
+
{
|
| 155 |
+
"epoch": 3.22972972972973,
|
| 156 |
+
"eval_accuracy": 0.5942028985507246,
|
| 157 |
+
"eval_loss": 0.7709812521934509,
|
| 158 |
+
"eval_runtime": 248.4709,
|
| 159 |
+
"eval_samples_per_second": 0.278,
|
| 160 |
+
"eval_steps_per_second": 0.072,
|
| 161 |
+
"step": 148
|
| 162 |
+
},
|
| 163 |
+
{
|
| 164 |
+
"epoch": 3.22972972972973,
|
| 165 |
+
"eval_accuracy": 0.5942028985507246,
|
| 166 |
+
"eval_loss": 0.7709811925888062,
|
| 167 |
+
"eval_runtime": 203.3655,
|
| 168 |
+
"eval_samples_per_second": 0.339,
|
| 169 |
+
"eval_steps_per_second": 0.089,
|
| 170 |
+
"step": 148
|
| 171 |
+
}
|
| 172 |
+
],
|
| 173 |
+
"logging_steps": 10,
|
| 174 |
+
"max_steps": 148,
|
| 175 |
+
"num_input_tokens_seen": 0,
|
| 176 |
+
"num_train_epochs": 9223372036854775807,
|
| 177 |
+
"save_steps": 500,
|
| 178 |
+
"total_flos": 7.270063395179397e+17,
|
| 179 |
+
"train_batch_size": 4,
|
| 180 |
+
"trial_name": null,
|
| 181 |
+
"trial_params": null
|
| 182 |
+
}
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:43654f961aef1e5dd1068a7d3b8fa50ac16eb42e41c0e7feac21eb0f47247ad3
|
| 3 |
size 4984
|