SamagraDataGov commited on
Commit
db78494
·
verified ·
1 Parent(s): 950b641

Training in progress, step 40

Browse files
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:704f03dd6d6a4a15c353dc922b1cd8801ee537c707ebb81d453328220dd822e4
3
  size 151099494
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d66909332438e143fe70e0c29ee54f40db0d656b2f2c855e1fb48504b5799f5c
3
  size 151099494
runs/Jun29_09-39-32_bharatsahaiyak-test/events.out.tfevents.1719661023.bharatsahaiyak-test.1854617.6 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1687f609fdcc4d6a59acca2f6a6554d4482f90f26a6f94ad333c7808c79a2fed
3
+ size 6396
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 32.377740303541316,
3
- "best_model_checkpoint": "./whisper-tiny-hindi2_test/checkpoint-200",
4
  "epoch": 6.349206349206349,
5
  "eval_steps": 40,
6
  "global_step": 200,
@@ -10,127 +10,127 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.6349206349206349,
13
- "grad_norm": 5.7862091064453125,
14
  "learning_rate": 3.75e-05,
15
- "loss": 0.3639,
16
  "step": 20
17
  },
18
  {
19
  "epoch": 1.2698412698412698,
20
- "grad_norm": 4.1368255615234375,
21
  "learning_rate": 3.75e-05,
22
- "loss": 0.2955,
23
  "step": 40
24
  },
25
  {
26
  "epoch": 1.2698412698412698,
27
- "eval_loss": 0.23172235488891602,
28
- "eval_runtime": 56.6839,
29
- "eval_samples_per_second": 1.764,
30
- "eval_steps_per_second": 1.764,
31
- "eval_wer": 36.00337268128162,
32
  "step": 40
33
  },
34
  {
35
  "epoch": 1.9047619047619047,
36
- "grad_norm": 4.015283584594727,
37
  "learning_rate": 3.75e-05,
38
- "loss": 0.2134,
39
  "step": 60
40
  },
41
  {
42
  "epoch": 2.5396825396825395,
43
- "grad_norm": 3.215930223464966,
44
  "learning_rate": 3.75e-05,
45
- "loss": 0.1522,
46
  "step": 80
47
  },
48
  {
49
  "epoch": 2.5396825396825395,
50
- "eval_loss": 0.22003404796123505,
51
- "eval_runtime": 55.4903,
52
- "eval_samples_per_second": 1.802,
53
- "eval_steps_per_second": 1.802,
54
- "eval_wer": 34.40134907251265,
55
  "step": 80
56
  },
57
  {
58
  "epoch": 3.1746031746031744,
59
- "grad_norm": 2.828727960586548,
60
  "learning_rate": 3.75e-05,
61
- "loss": 0.1209,
62
  "step": 100
63
  },
64
  {
65
  "epoch": 3.8095238095238093,
66
- "grad_norm": 2.526444673538208,
67
  "learning_rate": 3.75e-05,
68
- "loss": 0.0819,
69
  "step": 120
70
  },
71
  {
72
  "epoch": 3.8095238095238093,
73
- "eval_loss": 0.22554056346416473,
74
- "eval_runtime": 54.7978,
75
- "eval_samples_per_second": 1.825,
76
- "eval_steps_per_second": 1.825,
77
- "eval_wer": 32.54637436762226,
78
  "step": 120
79
  },
80
  {
81
  "epoch": 4.444444444444445,
82
- "grad_norm": 2.135953664779663,
83
  "learning_rate": 3.75e-05,
84
- "loss": 0.061,
85
  "step": 140
86
  },
87
  {
88
  "epoch": 5.079365079365079,
89
- "grad_norm": 2.1400399208068848,
90
  "learning_rate": 3.75e-05,
91
- "loss": 0.0463,
92
  "step": 160
93
  },
94
  {
95
  "epoch": 5.079365079365079,
96
- "eval_loss": 0.24825386703014374,
97
- "eval_runtime": 54.848,
98
- "eval_samples_per_second": 1.823,
99
- "eval_steps_per_second": 1.823,
100
- "eval_wer": 32.79932546374368,
101
  "step": 160
102
  },
103
  {
104
  "epoch": 5.714285714285714,
105
- "grad_norm": 1.9905014038085938,
106
  "learning_rate": 3.75e-05,
107
- "loss": 0.0304,
108
  "step": 180
109
  },
110
  {
111
  "epoch": 6.349206349206349,
112
- "grad_norm": 1.8886462450027466,
113
  "learning_rate": 3.75e-05,
114
- "loss": 0.0289,
115
  "step": 200
116
  },
117
  {
118
  "epoch": 6.349206349206349,
119
- "eval_loss": 0.2673737406730652,
120
- "eval_runtime": 55.0065,
121
- "eval_samples_per_second": 1.818,
122
- "eval_steps_per_second": 1.818,
123
- "eval_wer": 32.377740303541316,
124
  "step": 200
125
  },
126
  {
127
  "epoch": 6.349206349206349,
128
  "step": 200,
129
  "total_flos": 1.5637915828224e+17,
130
- "train_loss": 0.13942732244729997,
131
- "train_runtime": 1124.5585,
132
- "train_samples_per_second": 5.691,
133
- "train_steps_per_second": 0.178
134
  }
135
  ],
136
  "logging_steps": 20,
 
1
  {
2
+ "best_metric": 31.703204047217536,
3
+ "best_model_checkpoint": "./whisper-tiny-hindi2_test/checkpoint-80",
4
  "epoch": 6.349206349206349,
5
  "eval_steps": 40,
6
  "global_step": 200,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.6349206349206349,
13
+ "grad_norm": 5.663797855377197,
14
  "learning_rate": 3.75e-05,
15
+ "loss": 0.3885,
16
  "step": 20
17
  },
18
  {
19
  "epoch": 1.2698412698412698,
20
+ "grad_norm": 4.554736137390137,
21
  "learning_rate": 3.75e-05,
22
+ "loss": 0.2894,
23
  "step": 40
24
  },
25
  {
26
  "epoch": 1.2698412698412698,
27
+ "eval_loss": 0.21782219409942627,
28
+ "eval_runtime": 55.5859,
29
+ "eval_samples_per_second": 1.799,
30
+ "eval_steps_per_second": 1.799,
31
+ "eval_wer": 32.96795952782462,
32
  "step": 40
33
  },
34
  {
35
  "epoch": 1.9047619047619047,
36
+ "grad_norm": 4.489897727966309,
37
  "learning_rate": 3.75e-05,
38
+ "loss": 0.1965,
39
  "step": 60
40
  },
41
  {
42
  "epoch": 2.5396825396825395,
43
+ "grad_norm": 2.790485382080078,
44
  "learning_rate": 3.75e-05,
45
+ "loss": 0.1333,
46
  "step": 80
47
  },
48
  {
49
  "epoch": 2.5396825396825395,
50
+ "eval_loss": 0.2209174931049347,
51
+ "eval_runtime": 55.2087,
52
+ "eval_samples_per_second": 1.811,
53
+ "eval_steps_per_second": 1.811,
54
+ "eval_wer": 31.703204047217536,
55
  "step": 80
56
  },
57
  {
58
  "epoch": 3.1746031746031744,
59
+ "grad_norm": 2.2434844970703125,
60
  "learning_rate": 3.75e-05,
61
+ "loss": 0.1016,
62
  "step": 100
63
  },
64
  {
65
  "epoch": 3.8095238095238093,
66
+ "grad_norm": 2.5447564125061035,
67
  "learning_rate": 3.75e-05,
68
+ "loss": 0.0599,
69
  "step": 120
70
  },
71
  {
72
  "epoch": 3.8095238095238093,
73
+ "eval_loss": 0.23671233654022217,
74
+ "eval_runtime": 55.6056,
75
+ "eval_samples_per_second": 1.798,
76
+ "eval_steps_per_second": 1.798,
77
+ "eval_wer": 33.22091062394604,
78
  "step": 120
79
  },
80
  {
81
  "epoch": 4.444444444444445,
82
+ "grad_norm": 2.4546711444854736,
83
  "learning_rate": 3.75e-05,
84
+ "loss": 0.0414,
85
  "step": 140
86
  },
87
  {
88
  "epoch": 5.079365079365079,
89
+ "grad_norm": 1.9287947416305542,
90
  "learning_rate": 3.75e-05,
91
+ "loss": 0.0332,
92
  "step": 160
93
  },
94
  {
95
  "epoch": 5.079365079365079,
96
+ "eval_loss": 0.24990424513816833,
97
+ "eval_runtime": 55.3919,
98
+ "eval_samples_per_second": 1.805,
99
+ "eval_steps_per_second": 1.805,
100
+ "eval_wer": 31.871838111298484,
101
  "step": 160
102
  },
103
  {
104
  "epoch": 5.714285714285714,
105
+ "grad_norm": 2.413437843322754,
106
  "learning_rate": 3.75e-05,
107
+ "loss": 0.0232,
108
  "step": 180
109
  },
110
  {
111
  "epoch": 6.349206349206349,
112
+ "grad_norm": 2.503246784210205,
113
  "learning_rate": 3.75e-05,
114
+ "loss": 0.0189,
115
  "step": 200
116
  },
117
  {
118
  "epoch": 6.349206349206349,
119
+ "eval_loss": 0.27371078729629517,
120
+ "eval_runtime": 55.2258,
121
+ "eval_samples_per_second": 1.811,
122
+ "eval_steps_per_second": 1.811,
123
+ "eval_wer": 33.05227655986509,
124
  "step": 200
125
  },
126
  {
127
  "epoch": 6.349206349206349,
128
  "step": 200,
129
  "total_flos": 1.5637915828224e+17,
130
+ "train_loss": 0.1285850764811039,
131
+ "train_runtime": 1129.5385,
132
+ "train_samples_per_second": 5.666,
133
+ "train_steps_per_second": 0.177
134
  }
135
  ],
136
  "logging_steps": 20,