EYEDOL commited on
Commit
5e79ec1
·
verified ·
1 Parent(s): 814127e

Upload folder using huggingface_hub

Browse files
checkpoint-200/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7a70a9af242ff0e1565305fd79d6347e3d0ab897fd3729c7b65171ebde4ca5b9
3
  size 966995080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f516c96875369a56b3695683bcaeeda6eb2a60e651928b627dc327f2c1940c37
3
  size 966995080
checkpoint-200/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2a2245dda281cf7f0034c402bf28a80422c2ea2616455d75ee7003f5ea2117d9
3
  size 1925064044
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37abe716a191e7469cc08de7a38a4975038c5d4ae5393c33e7fc18ca427984c7
3
  size 1925064044
checkpoint-200/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9bd55b3848d82967a207e0805911c79200c6adce71e3b37fd24549a718f75738
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2412050386e127bb591a2ab33de2372b85e96145ca2326e6c8e125dbabd1d804
3
  size 988
checkpoint-200/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:031e0fdc2c20f6db35d37f449b7cbb318d204bbc0f5803ee3adfbcbdb6426566
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c805b003744d6c00853d1b84790e23f138d0a52d5331bb5dfe3ab6b5b38c6240
3
  size 1064
checkpoint-200/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_global_step": 200,
3
- "best_metric": 6.366537880060474,
4
  "best_model_checkpoint": "./SALAMA_C7/checkpoint-200",
5
  "epoch": 2.0833333333333335,
6
  "eval_steps": 200,
@@ -11,88 +11,88 @@
11
  "log_history": [
12
  {
13
  "epoch": 0.20833333333333334,
14
- "grad_norm": 2.567998170852661,
15
- "learning_rate": 1.9000000000000002e-06,
16
- "loss": 0.1774,
17
  "step": 20
18
  },
19
  {
20
  "epoch": 0.4166666666666667,
21
- "grad_norm": 3.8020272254943848,
22
- "learning_rate": 3.900000000000001e-06,
23
- "loss": 0.1662,
24
  "step": 40
25
  },
26
  {
27
  "epoch": 0.625,
28
- "grad_norm": 2.032432794570923,
29
- "learning_rate": 5.9e-06,
30
- "loss": 0.1198,
31
  "step": 60
32
  },
33
  {
34
  "epoch": 0.8333333333333334,
35
- "grad_norm": 1.15700101852417,
36
- "learning_rate": 7.9e-06,
37
- "loss": 0.109,
38
  "step": 80
39
  },
40
  {
41
  "epoch": 1.0416666666666667,
42
- "grad_norm": 3.478109121322632,
43
- "learning_rate": 9.9e-06,
44
- "loss": 0.0838,
45
  "step": 100
46
  },
47
  {
48
  "epoch": 1.25,
49
- "grad_norm": 1.5184258222579956,
50
- "learning_rate": 9.779069767441862e-06,
51
- "loss": 0.0669,
52
  "step": 120
53
  },
54
  {
55
  "epoch": 1.4583333333333333,
56
- "grad_norm": 4.6623759269714355,
57
- "learning_rate": 9.546511627906978e-06,
58
- "loss": 0.0833,
59
  "step": 140
60
  },
61
  {
62
  "epoch": 1.6666666666666665,
63
- "grad_norm": 3.8098835945129395,
64
- "learning_rate": 9.313953488372095e-06,
65
- "loss": 0.0915,
66
  "step": 160
67
  },
68
  {
69
  "epoch": 1.875,
70
- "grad_norm": 2.154282331466675,
71
- "learning_rate": 9.08139534883721e-06,
72
- "loss": 0.0876,
73
  "step": 180
74
  },
75
  {
76
  "epoch": 2.0833333333333335,
77
- "grad_norm": 2.825495719909668,
78
- "learning_rate": 8.848837209302326e-06,
79
- "loss": 0.1009,
80
  "step": 200
81
  },
82
  {
83
  "epoch": 2.0833333333333335,
84
- "eval_loss": 0.08810210227966309,
85
- "eval_runtime": 266.2349,
86
- "eval_samples_per_second": 2.881,
87
- "eval_steps_per_second": 0.361,
88
- "eval_wer": 6.366537880060474,
89
  "step": 200
90
  }
91
  ],
92
  "logging_steps": 20,
93
- "max_steps": 960,
94
  "num_input_tokens_seen": 0,
95
- "num_train_epochs": 10,
96
  "save_steps": 200,
97
  "stateful_callbacks": {
98
  "TrainerControl": {
 
1
  {
2
  "best_global_step": 200,
3
+ "best_metric": 10.44776119402985,
4
  "best_model_checkpoint": "./SALAMA_C7/checkpoint-200",
5
  "epoch": 2.0833333333333335,
6
  "eval_steps": 200,
 
11
  "log_history": [
12
  {
13
  "epoch": 0.20833333333333334,
14
+ "grad_norm": 11.919928550720215,
15
+ "learning_rate": 1.8000000000000001e-06,
16
+ "loss": 0.6563,
17
  "step": 20
18
  },
19
  {
20
  "epoch": 0.4166666666666667,
21
+ "grad_norm": 11.068168640136719,
22
+ "learning_rate": 3.8000000000000005e-06,
23
+ "loss": 0.5814,
24
  "step": 40
25
  },
26
  {
27
  "epoch": 0.625,
28
+ "grad_norm": 7.94372034072876,
29
+ "learning_rate": 5.8e-06,
30
+ "loss": 0.5414,
31
  "step": 60
32
  },
33
  {
34
  "epoch": 0.8333333333333334,
35
+ "grad_norm": 6.410305500030518,
36
+ "learning_rate": 7.800000000000002e-06,
37
+ "loss": 0.4745,
38
  "step": 80
39
  },
40
  {
41
  "epoch": 1.0416666666666667,
42
+ "grad_norm": 4.956066131591797,
43
+ "learning_rate": 9.800000000000001e-06,
44
+ "loss": 0.3529,
45
  "step": 100
46
  },
47
  {
48
  "epoch": 1.25,
49
+ "grad_norm": 3.679781436920166,
50
+ "learning_rate": 9.042553191489362e-06,
51
+ "loss": 0.2865,
52
  "step": 120
53
  },
54
  {
55
  "epoch": 1.4583333333333333,
56
+ "grad_norm": 3.3053739070892334,
57
+ "learning_rate": 7.97872340425532e-06,
58
+ "loss": 0.263,
59
  "step": 140
60
  },
61
  {
62
  "epoch": 1.6666666666666665,
63
+ "grad_norm": 5.734177589416504,
64
+ "learning_rate": 6.914893617021278e-06,
65
+ "loss": 0.2875,
66
  "step": 160
67
  },
68
  {
69
  "epoch": 1.875,
70
+ "grad_norm": 7.049015522003174,
71
+ "learning_rate": 5.851063829787235e-06,
72
+ "loss": 0.2821,
73
  "step": 180
74
  },
75
  {
76
  "epoch": 2.0833333333333335,
77
+ "grad_norm": 2.211012363433838,
78
+ "learning_rate": 4.787234042553192e-06,
79
+ "loss": 0.204,
80
  "step": 200
81
  },
82
  {
83
  "epoch": 2.0833333333333335,
84
+ "eval_loss": 0.18036405742168427,
85
+ "eval_runtime": 274.0379,
86
+ "eval_samples_per_second": 2.799,
87
+ "eval_steps_per_second": 0.35,
88
+ "eval_wer": 10.44776119402985,
89
  "step": 200
90
  }
91
  ],
92
  "logging_steps": 20,
93
+ "max_steps": 288,
94
  "num_input_tokens_seen": 0,
95
+ "num_train_epochs": 3,
96
  "save_steps": 200,
97
  "stateful_callbacks": {
98
  "TrainerControl": {
checkpoint-200/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0fe98a9ee4e1bd23d9bf7869649307b5338a198e11e58c7d70a840df6d10e0f6
3
  size 5496
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac55cd8bbcced756202be3aca523f94ecd6548025e9d1a1609129703b17c0d6c
3
  size 5496
checkpoint-288/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0459fdd50caeddc1dddcfe48a68ce9b966d455fa8c104fe166f29b4a007f1c1c
3
  size 966995080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3487346ccfeb08676834c610a07f3a5bbeedb50749a2e7fb0a8fba167c41b2f4
3
  size 966995080
checkpoint-288/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:31a2cee16bc237b2d704ceea75812d061144601bb291c3deb683ceb9cb6c3c70
3
  size 1925064044
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88dd23a6795ecc80ab0f1f87e68d52393c275ef3d4b6a28f74de7e9d0c68a663
3
  size 1925064044
checkpoint-288/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ef86260123ed82be82b42a2f4de267130a1b8d733ee3522c344257175f57e900
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:def1e6b141b8d9b6306daa1675d0c5192ba3a9a3dba69e9798a62bdc7129b459
3
  size 988
checkpoint-288/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_global_step": 200,
3
- "best_metric": 9.978162271123804,
4
  "best_model_checkpoint": "./SALAMA_C7/checkpoint-200",
5
  "epoch": 3.0,
6
  "eval_steps": 200,
@@ -11,109 +11,109 @@
11
  "log_history": [
12
  {
13
  "epoch": 0.20833333333333334,
14
- "grad_norm": 10.547709465026855,
15
  "learning_rate": 1.8000000000000001e-06,
16
- "loss": 0.7281,
17
  "step": 20
18
  },
19
  {
20
  "epoch": 0.4166666666666667,
21
- "grad_norm": 11.514107704162598,
22
  "learning_rate": 3.8000000000000005e-06,
23
- "loss": 0.6221,
24
  "step": 40
25
  },
26
  {
27
  "epoch": 0.625,
28
- "grad_norm": 7.552187442779541,
29
  "learning_rate": 5.8e-06,
30
- "loss": 0.4623,
31
  "step": 60
32
  },
33
  {
34
  "epoch": 0.8333333333333334,
35
- "grad_norm": 5.789488315582275,
36
  "learning_rate": 7.800000000000002e-06,
37
- "loss": 0.4368,
38
  "step": 80
39
  },
40
  {
41
  "epoch": 1.0416666666666667,
42
- "grad_norm": 5.5591630935668945,
43
  "learning_rate": 9.800000000000001e-06,
44
- "loss": 0.3215,
45
  "step": 100
46
  },
47
  {
48
  "epoch": 1.25,
49
- "grad_norm": 3.7926101684570312,
50
  "learning_rate": 9.042553191489362e-06,
51
- "loss": 0.2535,
52
  "step": 120
53
  },
54
  {
55
  "epoch": 1.4583333333333333,
56
- "grad_norm": 6.078831672668457,
57
  "learning_rate": 7.97872340425532e-06,
58
- "loss": 0.2608,
59
  "step": 140
60
  },
61
  {
62
  "epoch": 1.6666666666666665,
63
- "grad_norm": 5.2472662925720215,
64
  "learning_rate": 6.914893617021278e-06,
65
- "loss": 0.2671,
66
  "step": 160
67
  },
68
  {
69
  "epoch": 1.875,
70
- "grad_norm": 3.961386203765869,
71
  "learning_rate": 5.851063829787235e-06,
72
- "loss": 0.2477,
73
  "step": 180
74
  },
75
  {
76
  "epoch": 2.0833333333333335,
77
- "grad_norm": 3.207435369491577,
78
  "learning_rate": 4.787234042553192e-06,
79
- "loss": 0.244,
80
  "step": 200
81
  },
82
  {
83
  "epoch": 2.0833333333333335,
84
- "eval_loss": 0.15180820226669312,
85
- "eval_runtime": 341.1778,
86
- "eval_samples_per_second": 2.248,
87
- "eval_steps_per_second": 0.281,
88
- "eval_wer": 9.978162271123804,
89
  "step": 200
90
  },
91
  {
92
  "epoch": 2.2916666666666665,
93
- "grad_norm": 3.7495334148406982,
94
  "learning_rate": 3.723404255319149e-06,
95
- "loss": 0.1436,
96
  "step": 220
97
  },
98
  {
99
  "epoch": 2.5,
100
- "grad_norm": 4.28264856338501,
101
  "learning_rate": 2.6595744680851065e-06,
102
- "loss": 0.152,
103
  "step": 240
104
  },
105
  {
106
  "epoch": 2.7083333333333335,
107
- "grad_norm": 2.525714159011841,
108
  "learning_rate": 1.595744680851064e-06,
109
- "loss": 0.1231,
110
  "step": 260
111
  },
112
  {
113
  "epoch": 2.9166666666666665,
114
- "grad_norm": 2.2750539779663086,
115
  "learning_rate": 5.319148936170213e-07,
116
- "loss": 0.116,
117
  "step": 280
118
  }
119
  ],
 
1
  {
2
  "best_global_step": 200,
3
+ "best_metric": 10.44776119402985,
4
  "best_model_checkpoint": "./SALAMA_C7/checkpoint-200",
5
  "epoch": 3.0,
6
  "eval_steps": 200,
 
11
  "log_history": [
12
  {
13
  "epoch": 0.20833333333333334,
14
+ "grad_norm": 11.919928550720215,
15
  "learning_rate": 1.8000000000000001e-06,
16
+ "loss": 0.6563,
17
  "step": 20
18
  },
19
  {
20
  "epoch": 0.4166666666666667,
21
+ "grad_norm": 11.068168640136719,
22
  "learning_rate": 3.8000000000000005e-06,
23
+ "loss": 0.5814,
24
  "step": 40
25
  },
26
  {
27
  "epoch": 0.625,
28
+ "grad_norm": 7.94372034072876,
29
  "learning_rate": 5.8e-06,
30
+ "loss": 0.5414,
31
  "step": 60
32
  },
33
  {
34
  "epoch": 0.8333333333333334,
35
+ "grad_norm": 6.410305500030518,
36
  "learning_rate": 7.800000000000002e-06,
37
+ "loss": 0.4745,
38
  "step": 80
39
  },
40
  {
41
  "epoch": 1.0416666666666667,
42
+ "grad_norm": 4.956066131591797,
43
  "learning_rate": 9.800000000000001e-06,
44
+ "loss": 0.3529,
45
  "step": 100
46
  },
47
  {
48
  "epoch": 1.25,
49
+ "grad_norm": 3.679781436920166,
50
  "learning_rate": 9.042553191489362e-06,
51
+ "loss": 0.2865,
52
  "step": 120
53
  },
54
  {
55
  "epoch": 1.4583333333333333,
56
+ "grad_norm": 3.3053739070892334,
57
  "learning_rate": 7.97872340425532e-06,
58
+ "loss": 0.263,
59
  "step": 140
60
  },
61
  {
62
  "epoch": 1.6666666666666665,
63
+ "grad_norm": 5.734177589416504,
64
  "learning_rate": 6.914893617021278e-06,
65
+ "loss": 0.2875,
66
  "step": 160
67
  },
68
  {
69
  "epoch": 1.875,
70
+ "grad_norm": 7.049015522003174,
71
  "learning_rate": 5.851063829787235e-06,
72
+ "loss": 0.2821,
73
  "step": 180
74
  },
75
  {
76
  "epoch": 2.0833333333333335,
77
+ "grad_norm": 2.211012363433838,
78
  "learning_rate": 4.787234042553192e-06,
79
+ "loss": 0.204,
80
  "step": 200
81
  },
82
  {
83
  "epoch": 2.0833333333333335,
84
+ "eval_loss": 0.18036405742168427,
85
+ "eval_runtime": 274.0379,
86
+ "eval_samples_per_second": 2.799,
87
+ "eval_steps_per_second": 0.35,
88
+ "eval_wer": 10.44776119402985,
89
  "step": 200
90
  },
91
  {
92
  "epoch": 2.2916666666666665,
93
+ "grad_norm": 4.120218276977539,
94
  "learning_rate": 3.723404255319149e-06,
95
+ "loss": 0.1378,
96
  "step": 220
97
  },
98
  {
99
  "epoch": 2.5,
100
+ "grad_norm": 2.3007442951202393,
101
  "learning_rate": 2.6595744680851065e-06,
102
+ "loss": 0.1111,
103
  "step": 240
104
  },
105
  {
106
  "epoch": 2.7083333333333335,
107
+ "grad_norm": 4.804255485534668,
108
  "learning_rate": 1.595744680851064e-06,
109
+ "loss": 0.1654,
110
  "step": 260
111
  },
112
  {
113
  "epoch": 2.9166666666666665,
114
+ "grad_norm": 3.7491042613983154,
115
  "learning_rate": 5.319148936170213e-07,
116
+ "loss": 0.1346,
117
  "step": 280
118
  }
119
  ],
checkpoint-288/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a3ffc78527769250dfdd03630f788573d15e547f1e38bf61f7517e9ee58ba54e
3
  size 5496
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac55cd8bbcced756202be3aca523f94ecd6548025e9d1a1609129703b17c0d6c
3
  size 5496