jialicheng commited on
Commit
3baafdc
·
verified ·
1 Parent(s): ba284b2

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: cc-by-nc-4.0
3
+ base_model: MCG-NJU/videomae-large
4
+ tags:
5
+ - video-classification
6
+ - generated_from_trainer
7
+ metrics:
8
+ - accuracy
9
+ model-index:
10
+ - name: ucf101_42
11
+ results: []
12
+ ---
13
+
14
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
+ should probably proofread and complete it, then remove this comment. -->
16
+
17
+ # ucf101_42
18
+
19
+ This model is a fine-tuned version of [MCG-NJU/videomae-large](https://huggingface.co/MCG-NJU/videomae-large) on the ucf101 dataset.
20
+ It achieves the following results on the evaluation set:
21
+ - Loss: 0.3185
22
+ - Accuracy: 0.9345
23
+
24
+ ## Model description
25
+
26
+ More information needed
27
+
28
+ ## Intended uses & limitations
29
+
30
+ More information needed
31
+
32
+ ## Training and evaluation data
33
+
34
+ More information needed
35
+
36
+ ## Training procedure
37
+
38
+ ### Training hyperparameters
39
+
40
+ The following hyperparameters were used during training:
41
+ - learning_rate: 5e-05
42
+ - train_batch_size: 4
43
+ - eval_batch_size: 32
44
+ - seed: 42
45
+ - gradient_accumulation_steps: 8
46
+ - total_train_batch_size: 32
47
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
48
+ - lr_scheduler_type: linear
49
+ - num_epochs: 20
50
+
51
+ ### Training results
52
+
53
+ | Training Loss | Epoch | Step | Validation Loss | Accuracy |
54
+ |:-------------:|:-----:|:----:|:---------------:|:--------:|
55
+ | No log | 1.0 | 298 | 0.5806 | 0.8423 |
56
+ | No log | 2.0 | 596 | 0.5192 | 0.8653 |
57
+ | No log | 3.0 | 894 | 0.4903 | 0.8814 |
58
+ | 0.5923 | 4.0 | 1192 | 0.4623 | 0.8901 |
59
+ | 0.5923 | 5.0 | 1490 | 0.3949 | 0.9005 |
60
+ | 0.5923 | 6.0 | 1788 | 0.4748 | 0.8844 |
61
+ | 0.0581 | 7.0 | 2086 | 0.4877 | 0.8820 |
62
+ | 0.0581 | 8.0 | 2385 | 0.3976 | 0.9131 |
63
+ | 0.0581 | 9.0 | 2683 | 0.3824 | 0.9116 |
64
+ | 0.0581 | 10.0 | 2981 | 0.3553 | 0.9171 |
65
+ | 0.0221 | 11.0 | 3279 | 0.3557 | 0.9229 |
66
+ | 0.0221 | 12.0 | 3577 | 0.3619 | 0.9258 |
67
+ | 0.0221 | 13.0 | 3875 | 0.3941 | 0.9214 |
68
+ | 0.0112 | 14.0 | 4173 | 0.3989 | 0.9145 |
69
+ | 0.0112 | 15.0 | 4471 | 0.3635 | 0.9236 |
70
+ | 0.0112 | 16.0 | 4770 | 0.3418 | 0.9285 |
71
+ | 0.005 | 17.0 | 5068 | 0.3374 | 0.9261 |
72
+ | 0.005 | 18.0 | 5366 | 0.3340 | 0.9333 |
73
+ | 0.005 | 19.0 | 5664 | 0.3294 | 0.9338 |
74
+ | 0.005 | 19.99 | 5960 | 0.3185 | 0.9345 |
75
+
76
+
77
+ ### Framework versions
78
+
79
+ - Transformers 4.39.3
80
+ - Pytorch 2.2.2+cu118
81
+ - Datasets 2.18.0
82
+ - Tokenizers 0.15.2
all_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 19.99,
3
+ "eval_accuracy": 0.9345116279069767,
4
+ "eval_loss": 0.3185047507286072,
5
+ "eval_runtime": 1108.1051,
6
+ "eval_samples_per_second": 3.414,
7
+ "eval_steps_per_second": 0.107,
8
+ "train_accuracy": 0.9997902904477299,
9
+ "train_loss": 0.0,
10
+ "train_runtime": 64.1643,
11
+ "train_samples_per_second": 743.17,
12
+ "train_steps_per_second": 23.222
13
+ }
config.json ADDED
@@ -0,0 +1,236 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "MCG-NJU/videomae-large",
3
+ "architectures": [
4
+ "VideoMAEForVideoClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "decoder_hidden_size": 512,
8
+ "decoder_intermediate_size": 2048,
9
+ "decoder_num_attention_heads": 8,
10
+ "decoder_num_hidden_layers": 12,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.0,
13
+ "hidden_size": 1024,
14
+ "id2label": {
15
+ "0": "ApplyEyeMakeup",
16
+ "1": "ApplyLipstick",
17
+ "2": "Archery",
18
+ "3": "BabyCrawling",
19
+ "4": "BalanceBeam",
20
+ "5": "BandMarching",
21
+ "6": "BaseballPitch",
22
+ "7": "Basketball",
23
+ "8": "BasketballDunk",
24
+ "9": "BenchPress",
25
+ "10": "Biking",
26
+ "11": "Billiards",
27
+ "12": "BlowDryHair",
28
+ "13": "BlowingCandles",
29
+ "14": "BodyWeightSquats",
30
+ "15": "Bowling",
31
+ "16": "BoxingPunchingBag",
32
+ "17": "BoxingSpeedBag",
33
+ "18": "BreastStroke",
34
+ "19": "BrushingTeeth",
35
+ "20": "CleanAndJerk",
36
+ "21": "CliffDiving",
37
+ "22": "CricketBowling",
38
+ "23": "CricketShot",
39
+ "24": "CuttingInKitchen",
40
+ "25": "Diving",
41
+ "26": "Drumming",
42
+ "27": "Fencing",
43
+ "28": "FieldHockeyPenalty",
44
+ "29": "FloorGymnastics",
45
+ "30": "FrisbeeCatch",
46
+ "31": "FrontCrawl",
47
+ "32": "GolfSwing",
48
+ "33": "Haircut",
49
+ "34": "Hammering",
50
+ "35": "HammerThrow",
51
+ "36": "HandstandPushups",
52
+ "37": "HandstandWalking",
53
+ "38": "HeadMassage",
54
+ "39": "HighJump",
55
+ "40": "HorseRace",
56
+ "41": "HorseRiding",
57
+ "42": "HulaHoop",
58
+ "43": "IceDancing",
59
+ "44": "JavelinThrow",
60
+ "45": "JugglingBalls",
61
+ "46": "JumpingJack",
62
+ "47": "JumpRope",
63
+ "48": "Kayaking",
64
+ "49": "Knitting",
65
+ "50": "LongJump",
66
+ "51": "Lunges",
67
+ "52": "MilitaryParade",
68
+ "53": "Mixing",
69
+ "54": "MoppingFloor",
70
+ "55": "Nunchucks",
71
+ "56": "ParallelBars",
72
+ "57": "PizzaTossing",
73
+ "58": "PlayingCello",
74
+ "59": "PlayingDaf",
75
+ "60": "PlayingDhol",
76
+ "61": "PlayingFlute",
77
+ "62": "PlayingGuitar",
78
+ "63": "PlayingPiano",
79
+ "64": "PlayingSitar",
80
+ "65": "PlayingTabla",
81
+ "66": "PlayingViolin",
82
+ "67": "PoleVault",
83
+ "68": "PommelHorse",
84
+ "69": "PullUps",
85
+ "70": "Punch",
86
+ "71": "PushUps",
87
+ "72": "Rafting",
88
+ "73": "RockClimbingIndoor",
89
+ "74": "RopeClimbing",
90
+ "75": "Rowing",
91
+ "76": "SalsaSpin",
92
+ "77": "ShavingBeard",
93
+ "78": "Shotput",
94
+ "79": "SkateBoarding",
95
+ "80": "Skiing",
96
+ "81": "Skijet",
97
+ "82": "SkyDiving",
98
+ "83": "SoccerJuggling",
99
+ "84": "SoccerPenalty",
100
+ "85": "StillRings",
101
+ "86": "SumoWrestling",
102
+ "87": "Surfing",
103
+ "88": "Swing",
104
+ "89": "TableTennisShot",
105
+ "90": "TaiChi",
106
+ "91": "TennisSwing",
107
+ "92": "ThrowDiscus",
108
+ "93": "TrampolineJumping",
109
+ "94": "Typing",
110
+ "95": "UnevenBars",
111
+ "96": "VolleyballSpiking",
112
+ "97": "WalkingWithDog",
113
+ "98": "WallPushups",
114
+ "99": "WritingOnBoard",
115
+ "100": "YoYo"
116
+ },
117
+ "image_size": 224,
118
+ "initializer_range": 0.02,
119
+ "intermediate_size": 4096,
120
+ "label2id": {
121
+ "ApplyEyeMakeup": 0,
122
+ "ApplyLipstick": 1,
123
+ "Archery": 2,
124
+ "BabyCrawling": 3,
125
+ "BalanceBeam": 4,
126
+ "BandMarching": 5,
127
+ "BaseballPitch": 6,
128
+ "Basketball": 7,
129
+ "BasketballDunk": 8,
130
+ "BenchPress": 9,
131
+ "Biking": 10,
132
+ "Billiards": 11,
133
+ "BlowDryHair": 12,
134
+ "BlowingCandles": 13,
135
+ "BodyWeightSquats": 14,
136
+ "Bowling": 15,
137
+ "BoxingPunchingBag": 16,
138
+ "BoxingSpeedBag": 17,
139
+ "BreastStroke": 18,
140
+ "BrushingTeeth": 19,
141
+ "CleanAndJerk": 20,
142
+ "CliffDiving": 21,
143
+ "CricketBowling": 22,
144
+ "CricketShot": 23,
145
+ "CuttingInKitchen": 24,
146
+ "Diving": 25,
147
+ "Drumming": 26,
148
+ "Fencing": 27,
149
+ "FieldHockeyPenalty": 28,
150
+ "FloorGymnastics": 29,
151
+ "FrisbeeCatch": 30,
152
+ "FrontCrawl": 31,
153
+ "GolfSwing": 32,
154
+ "Haircut": 33,
155
+ "HammerThrow": 35,
156
+ "Hammering": 34,
157
+ "HandstandPushups": 36,
158
+ "HandstandWalking": 37,
159
+ "HeadMassage": 38,
160
+ "HighJump": 39,
161
+ "HorseRace": 40,
162
+ "HorseRiding": 41,
163
+ "HulaHoop": 42,
164
+ "IceDancing": 43,
165
+ "JavelinThrow": 44,
166
+ "JugglingBalls": 45,
167
+ "JumpRope": 47,
168
+ "JumpingJack": 46,
169
+ "Kayaking": 48,
170
+ "Knitting": 49,
171
+ "LongJump": 50,
172
+ "Lunges": 51,
173
+ "MilitaryParade": 52,
174
+ "Mixing": 53,
175
+ "MoppingFloor": 54,
176
+ "Nunchucks": 55,
177
+ "ParallelBars": 56,
178
+ "PizzaTossing": 57,
179
+ "PlayingCello": 58,
180
+ "PlayingDaf": 59,
181
+ "PlayingDhol": 60,
182
+ "PlayingFlute": 61,
183
+ "PlayingGuitar": 62,
184
+ "PlayingPiano": 63,
185
+ "PlayingSitar": 64,
186
+ "PlayingTabla": 65,
187
+ "PlayingViolin": 66,
188
+ "PoleVault": 67,
189
+ "PommelHorse": 68,
190
+ "PullUps": 69,
191
+ "Punch": 70,
192
+ "PushUps": 71,
193
+ "Rafting": 72,
194
+ "RockClimbingIndoor": 73,
195
+ "RopeClimbing": 74,
196
+ "Rowing": 75,
197
+ "SalsaSpin": 76,
198
+ "ShavingBeard": 77,
199
+ "Shotput": 78,
200
+ "SkateBoarding": 79,
201
+ "Skiing": 80,
202
+ "Skijet": 81,
203
+ "SkyDiving": 82,
204
+ "SoccerJuggling": 83,
205
+ "SoccerPenalty": 84,
206
+ "StillRings": 85,
207
+ "SumoWrestling": 86,
208
+ "Surfing": 87,
209
+ "Swing": 88,
210
+ "TableTennisShot": 89,
211
+ "TaiChi": 90,
212
+ "TennisSwing": 91,
213
+ "ThrowDiscus": 92,
214
+ "TrampolineJumping": 93,
215
+ "Typing": 94,
216
+ "UnevenBars": 95,
217
+ "VolleyballSpiking": 96,
218
+ "WalkingWithDog": 97,
219
+ "WallPushups": 98,
220
+ "WritingOnBoard": 99,
221
+ "YoYo": 100
222
+ },
223
+ "layer_norm_eps": 1e-12,
224
+ "model_type": "videomae",
225
+ "norm_pix_loss": true,
226
+ "num_attention_heads": 16,
227
+ "num_channels": 3,
228
+ "num_frames": 16,
229
+ "num_hidden_layers": 24,
230
+ "patch_size": 16,
231
+ "qkv_bias": true,
232
+ "torch_dtype": "float32",
233
+ "transformers_version": "4.39.3",
234
+ "tubelet_size": 2,
235
+ "use_mean_pooling": false
236
+ }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 19.99,
3
+ "eval_accuracy": 0.9345116279069767,
4
+ "eval_loss": 0.3185047507286072,
5
+ "eval_runtime": 1108.1051,
6
+ "eval_samples_per_second": 3.414,
7
+ "eval_steps_per_second": 0.107
8
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21b20ff82edd13b0f0d503182adf86dc0dc9b77e12ea9656960469f818bd93ae
3
+ size 1215902164
pred_logit_eval.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36c98fe8aaf86136b0061b4d9f89aea2b3d81502ae1574992eb97a670aec73fa
3
+ size 4343128
pred_logit_train.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2745ba8dc0667853d5ac96c8cea5bd0dc24b3105029c1c6b064b21d0990cff1a
3
+ size 3853076
preprocessor_config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_valid_processor_keys": [
3
+ "videos",
4
+ "do_resize",
5
+ "size",
6
+ "resample",
7
+ "do_center_crop",
8
+ "crop_size",
9
+ "do_rescale",
10
+ "rescale_factor",
11
+ "do_normalize",
12
+ "image_mean",
13
+ "image_std",
14
+ "return_tensors",
15
+ "data_format",
16
+ "input_data_format"
17
+ ],
18
+ "crop_size": {
19
+ "height": 224,
20
+ "width": 224
21
+ },
22
+ "do_center_crop": true,
23
+ "do_normalize": true,
24
+ "do_rescale": true,
25
+ "do_resize": true,
26
+ "image_mean": [
27
+ 0.485,
28
+ 0.456,
29
+ 0.406
30
+ ],
31
+ "image_processor_type": "VideoMAEImageProcessor",
32
+ "image_std": [
33
+ 0.229,
34
+ 0.224,
35
+ 0.225
36
+ ],
37
+ "resample": 2,
38
+ "rescale_factor": 0.00392156862745098,
39
+ "size": {
40
+ "shortest_edge": 224
41
+ }
42
+ }
test_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 19.99,
3
+ "eval_accuracy": 0.9345116279069767,
4
+ "eval_loss": 0.3185047507286072,
5
+ "eval_runtime": 1108.1051,
6
+ "eval_samples_per_second": 3.414,
7
+ "eval_steps_per_second": 0.107
8
+ }
train_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 19.99,
3
+ "train_loss": 0.0,
4
+ "train_runtime": 64.1643,
5
+ "train_samples_per_second": 743.17,
6
+ "train_steps_per_second": 23.222
7
+ }
trainer_state.json ADDED
@@ -0,0 +1,245 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.9345116279069767,
3
+ "best_model_checkpoint": "video/train/checkpoint/videomae-large/ucf101_42/checkpoint-5960",
4
+ "epoch": 19.9916142557652,
5
+ "eval_steps": 500,
6
+ "global_step": 5960,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "eval_accuracy": 0.8423255813953489,
14
+ "eval_loss": 0.5805811285972595,
15
+ "eval_runtime": 1087.3833,
16
+ "eval_samples_per_second": 3.479,
17
+ "eval_steps_per_second": 0.109,
18
+ "step": 298
19
+ },
20
+ {
21
+ "epoch": 2.0,
22
+ "eval_accuracy": 0.8653023255813953,
23
+ "eval_loss": 0.5191652178764343,
24
+ "eval_runtime": 1079.1551,
25
+ "eval_samples_per_second": 3.506,
26
+ "eval_steps_per_second": 0.11,
27
+ "step": 596
28
+ },
29
+ {
30
+ "epoch": 3.0,
31
+ "eval_accuracy": 0.8813953488372093,
32
+ "eval_loss": 0.4902515411376953,
33
+ "eval_runtime": 1081.8928,
34
+ "eval_samples_per_second": 3.497,
35
+ "eval_steps_per_second": 0.11,
36
+ "step": 894
37
+ },
38
+ {
39
+ "epoch": 3.35,
40
+ "grad_norm": 9.042092323303223,
41
+ "learning_rate": 4.161073825503356e-05,
42
+ "loss": 0.5923,
43
+ "step": 1000
44
+ },
45
+ {
46
+ "epoch": 4.0,
47
+ "eval_accuracy": 0.8901395348837209,
48
+ "eval_loss": 0.4622752368450165,
49
+ "eval_runtime": 1075.9895,
50
+ "eval_samples_per_second": 3.516,
51
+ "eval_steps_per_second": 0.111,
52
+ "step": 1192
53
+ },
54
+ {
55
+ "epoch": 5.0,
56
+ "eval_accuracy": 0.9004651162790698,
57
+ "eval_loss": 0.3949489891529083,
58
+ "eval_runtime": 1078.4827,
59
+ "eval_samples_per_second": 3.508,
60
+ "eval_steps_per_second": 0.11,
61
+ "step": 1490
62
+ },
63
+ {
64
+ "epoch": 6.0,
65
+ "eval_accuracy": 0.8843720930232558,
66
+ "eval_loss": 0.47477614879608154,
67
+ "eval_runtime": 1079.6204,
68
+ "eval_samples_per_second": 3.504,
69
+ "eval_steps_per_second": 0.11,
70
+ "step": 1788
71
+ },
72
+ {
73
+ "epoch": 6.71,
74
+ "grad_norm": 0.13601620495319366,
75
+ "learning_rate": 3.3221476510067115e-05,
76
+ "loss": 0.0581,
77
+ "step": 2000
78
+ },
79
+ {
80
+ "epoch": 7.0,
81
+ "eval_accuracy": 0.881953488372093,
82
+ "eval_loss": 0.4876798987388611,
83
+ "eval_runtime": 1084.206,
84
+ "eval_samples_per_second": 3.489,
85
+ "eval_steps_per_second": 0.11,
86
+ "step": 2086
87
+ },
88
+ {
89
+ "epoch": 8.0,
90
+ "eval_accuracy": 0.9131162790697674,
91
+ "eval_loss": 0.3975999355316162,
92
+ "eval_runtime": 1079.1296,
93
+ "eval_samples_per_second": 3.506,
94
+ "eval_steps_per_second": 0.11,
95
+ "step": 2385
96
+ },
97
+ {
98
+ "epoch": 9.0,
99
+ "eval_accuracy": 0.9116279069767442,
100
+ "eval_loss": 0.38238757848739624,
101
+ "eval_runtime": 1081.1712,
102
+ "eval_samples_per_second": 3.499,
103
+ "eval_steps_per_second": 0.11,
104
+ "step": 2683
105
+ },
106
+ {
107
+ "epoch": 10.0,
108
+ "eval_accuracy": 0.9171162790697674,
109
+ "eval_loss": 0.355253130197525,
110
+ "eval_runtime": 1079.15,
111
+ "eval_samples_per_second": 3.506,
112
+ "eval_steps_per_second": 0.11,
113
+ "step": 2981
114
+ },
115
+ {
116
+ "epoch": 10.06,
117
+ "grad_norm": 0.021823862567543983,
118
+ "learning_rate": 2.4832214765100674e-05,
119
+ "loss": 0.0221,
120
+ "step": 3000
121
+ },
122
+ {
123
+ "epoch": 11.0,
124
+ "eval_accuracy": 0.9228837209302325,
125
+ "eval_loss": 0.3557371497154236,
126
+ "eval_runtime": 1075.3717,
127
+ "eval_samples_per_second": 3.518,
128
+ "eval_steps_per_second": 0.111,
129
+ "step": 3279
130
+ },
131
+ {
132
+ "epoch": 12.0,
133
+ "eval_accuracy": 0.9257674418604651,
134
+ "eval_loss": 0.3618585467338562,
135
+ "eval_runtime": 1080.9763,
136
+ "eval_samples_per_second": 3.5,
137
+ "eval_steps_per_second": 0.11,
138
+ "step": 3577
139
+ },
140
+ {
141
+ "epoch": 13.0,
142
+ "eval_accuracy": 0.9213953488372093,
143
+ "eval_loss": 0.39410680532455444,
144
+ "eval_runtime": 1078.0403,
145
+ "eval_samples_per_second": 3.509,
146
+ "eval_steps_per_second": 0.11,
147
+ "step": 3875
148
+ },
149
+ {
150
+ "epoch": 13.42,
151
+ "grad_norm": 0.0037381162401288748,
152
+ "learning_rate": 1.644295302013423e-05,
153
+ "loss": 0.0112,
154
+ "step": 4000
155
+ },
156
+ {
157
+ "epoch": 14.0,
158
+ "eval_accuracy": 0.9145116279069767,
159
+ "eval_loss": 0.3988926410675049,
160
+ "eval_runtime": 1081.4143,
161
+ "eval_samples_per_second": 3.498,
162
+ "eval_steps_per_second": 0.11,
163
+ "step": 4173
164
+ },
165
+ {
166
+ "epoch": 15.0,
167
+ "eval_accuracy": 0.9236279069767442,
168
+ "eval_loss": 0.36351171135902405,
169
+ "eval_runtime": 1085.0693,
170
+ "eval_samples_per_second": 3.486,
171
+ "eval_steps_per_second": 0.11,
172
+ "step": 4471
173
+ },
174
+ {
175
+ "epoch": 16.0,
176
+ "eval_accuracy": 0.9284651162790698,
177
+ "eval_loss": 0.34182119369506836,
178
+ "eval_runtime": 1075.078,
179
+ "eval_samples_per_second": 3.519,
180
+ "eval_steps_per_second": 0.111,
181
+ "step": 4770
182
+ },
183
+ {
184
+ "epoch": 16.77,
185
+ "grad_norm": 0.004534624051302671,
186
+ "learning_rate": 8.053691275167785e-06,
187
+ "loss": 0.005,
188
+ "step": 5000
189
+ },
190
+ {
191
+ "epoch": 17.0,
192
+ "eval_accuracy": 0.9261395348837209,
193
+ "eval_loss": 0.3373846411705017,
194
+ "eval_runtime": 1074.5018,
195
+ "eval_samples_per_second": 3.521,
196
+ "eval_steps_per_second": 0.111,
197
+ "step": 5068
198
+ },
199
+ {
200
+ "epoch": 18.0,
201
+ "eval_accuracy": 0.9333023255813954,
202
+ "eval_loss": 0.33403050899505615,
203
+ "eval_runtime": 1082.5365,
204
+ "eval_samples_per_second": 3.495,
205
+ "eval_steps_per_second": 0.11,
206
+ "step": 5366
207
+ },
208
+ {
209
+ "epoch": 19.0,
210
+ "eval_accuracy": 0.9337674418604651,
211
+ "eval_loss": 0.32944682240486145,
212
+ "eval_runtime": 1080.8892,
213
+ "eval_samples_per_second": 3.5,
214
+ "eval_steps_per_second": 0.11,
215
+ "step": 5664
216
+ },
217
+ {
218
+ "epoch": 19.99,
219
+ "eval_accuracy": 0.9345116279069767,
220
+ "eval_loss": 0.3185047507286072,
221
+ "eval_runtime": 1083.1042,
222
+ "eval_samples_per_second": 3.493,
223
+ "eval_steps_per_second": 0.11,
224
+ "step": 5960
225
+ },
226
+ {
227
+ "epoch": 19.99,
228
+ "step": 5960,
229
+ "total_flos": 8.374858354430428e+20,
230
+ "train_loss": 0.0,
231
+ "train_runtime": 64.1643,
232
+ "train_samples_per_second": 743.17,
233
+ "train_steps_per_second": 23.222
234
+ }
235
+ ],
236
+ "logging_steps": 1000,
237
+ "max_steps": 1490,
238
+ "num_input_tokens_seen": 0,
239
+ "num_train_epochs": 5,
240
+ "save_steps": 500,
241
+ "total_flos": 8.374858354430428e+20,
242
+ "train_batch_size": 4,
243
+ "trial_name": null,
244
+ "trial_params": null
245
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2e56b3d3ec8ae18fd638f50986a71dae32fbc06de0e5e89b97cdaefa3f8339e
3
+ size 5048