Deepu1965 commited on
Commit
7c4b4af
·
verified ·
1 Parent(s): 3ecb23d

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. README.md +1 -1
  2. history.csv +3 -3
  3. metrics.json +109 -109
  4. model.pt +1 -1
README.md CHANGED
@@ -1,6 +1,6 @@
1
  # Week 2 MoE Seq2Seq (hash routing)
2
 
3
- * Best validation loss: 5.6076
4
  * Top-k: 1
5
  * Aux loss coef: 0.0
6
 
 
1
  # Week 2 MoE Seq2Seq (hash routing)
2
 
3
+ * Best validation loss: 5.6068
4
  * Top-k: 1
5
  * Aux loss coef: 0.0
6
 
history.csv CHANGED
@@ -1,4 +1,4 @@
1
  epoch,train_loss,train_aux_loss,train_perplexity,val_loss,val_aux_loss,val_perplexity
2
- 1,6.781526548633617,0.0,881.413217772467,6.13985468708606,0.0,463.9861427818742
3
- 2,5.734937044167949,0.0,309.49348572595704,5.784560862128246,0.0,325.23918390511113
4
- 3,5.265651165721379,0.0,193.57231541377683,5.607645124527238,0.0,272.5017740962985
 
1
  epoch,train_loss,train_aux_loss,train_perplexity,val_loss,val_aux_loss,val_perplexity
2
+ 1,6.783949644676193,0.0,883.5515563058923,6.120485806811677,0.0,455.08572455110965
3
+ 2,5.72200544220322,0.0,305.51700570051946,5.783777441596244,0.0,324.984484631975
4
+ 3,5.255330442014935,0.0,191.58478305751783,5.606828254184237,0.0,272.279266370978
metrics.json CHANGED
@@ -2,120 +2,120 @@
2
  "history": [
3
  {
4
  "epoch": 1,
5
- "train_loss": 6.781526548633617,
6
  "train_aux_loss": 0.0,
7
- "train_perplexity": 881.413217772467,
8
- "val_loss": 6.13985468708606,
9
  "val_aux_loss": 0.0,
10
- "val_perplexity": 463.9861427818742
11
  },
12
  {
13
  "epoch": 2,
14
- "train_loss": 5.734937044167949,
15
  "train_aux_loss": 0.0,
16
- "train_perplexity": 309.49348572595704,
17
- "val_loss": 5.784560862128246,
18
  "val_aux_loss": 0.0,
19
- "val_perplexity": 325.23918390511113
20
  },
21
  {
22
  "epoch": 3,
23
- "train_loss": 5.265651165721379,
24
  "train_aux_loss": 0.0,
25
- "train_perplexity": 193.57231541377683,
26
- "val_loss": 5.607645124527238,
27
  "val_aux_loss": 0.0,
28
- "val_perplexity": 272.5017740962985
29
  }
30
  ],
31
  "train_expert_usage": [
32
  {
33
  "encoder": [
34
  [
35
- 1.1995620727539062,
36
- 1.0382475852966309,
37
- 1.1820646524429321,
38
- 1.1452935934066772
39
  ],
40
  [
41
- 1.0309076309204102,
42
- 1.3545637130737305,
43
- 1.255271315574646,
44
- 0.9244250655174255
45
  ]
46
  ],
47
  "decoder": [
48
  [
49
- 0.767426073551178,
50
- 0.17747089266777039,
51
- 0.3735591173171997,
52
- 0.1704733520746231
53
  ],
54
  [
55
- 0.13853856921195984,
56
- 0.5398667454719543,
57
- 0.7335297465324402,
58
- 0.0769944041967392
59
  ]
60
  ]
61
  },
62
  {
63
  "encoder": [
64
  [
65
- 1.220947027206421,
66
- 1.0275551080703735,
67
- 1.2033497095108032,
68
- 1.1133160591125488
69
  ],
70
  [
71
- 1.0806751251220703,
72
- 1.291507363319397,
73
- 1.1101988554000854,
74
- 1.0827864408493042
75
  ]
76
  ],
77
  "decoder": [
78
  [
79
- 0.5099791884422302,
80
- 0.2548719048500061,
81
- 0.5015906691551208,
82
- 0.2213464379310608
83
  ],
84
  [
85
- 0.2474392205476761,
86
- 0.3361818194389343,
87
- 0.8026777505874634,
88
- 0.10148938745260239
89
  ]
90
  ]
91
  },
92
  {
93
  "encoder": [
94
  [
95
- 1.1927927732467651,
96
- 1.0216560363769531,
97
- 1.242702841758728,
98
- 1.1080161333084106
99
  ],
100
  [
101
- 1.1292370557785034,
102
- 1.267861247062683,
103
- 1.047805905342102,
104
- 1.120263695716858
105
  ]
106
  ],
107
  "decoder": [
108
  [
109
- 0.48166799545288086,
110
- 0.27755507826805115,
111
- 0.4995720088481903,
112
- 0.2349848747253418
113
  ],
114
  [
115
- 0.246026873588562,
116
- 0.32668769359588623,
117
- 0.8019145131111145,
118
- 0.11915087699890137
119
  ]
120
  ]
121
  }
@@ -124,95 +124,95 @@
124
  {
125
  "encoder": [
126
  [
127
- 1.3101885318756104,
128
- 1.0473449230194092,
129
- 1.1509929895401,
130
- 1.0964527130126953
131
  ],
132
  [
133
- 0.9937760829925537,
134
- 1.3266657590866089,
135
- 1.1915743350982666,
136
- 1.0929629802703857
137
  ]
138
  ],
139
  "decoder": [
140
  [
141
- 0.7106418013572693,
142
- 0.2959778308868408,
143
- 0.3988703489303589,
144
- 0.20999424159526825
145
  ],
146
  [
147
- 0.3808821439743042,
148
- 0.277557909488678,
149
- 0.8908476233482361,
150
- 0.06619657576084137
151
  ]
152
  ]
153
  },
154
  {
155
  "encoder": [
156
  [
157
- 1.2734206914901733,
158
- 1.0350050926208496,
159
- 1.2253562211990356,
160
- 1.071197271347046
161
  ],
162
  [
163
- 1.047668695449829,
164
- 1.4553172588348389,
165
- 1.0208303928375244,
166
- 1.0811628103256226
167
  ]
168
  ],
169
  "decoder": [
170
  [
171
- 0.5223413705825806,
172
- 0.2563318610191345,
173
- 0.5943660736083984,
174
- 0.2424449622631073
175
  ],
176
  [
177
- 0.22053532302379608,
178
- 0.38325658440589905,
179
- 0.9159231781959534,
180
- 0.09576917439699173
181
  ]
182
  ]
183
  },
184
  {
185
  "encoder": [
186
  [
187
- 1.2540652751922607,
188
- 1.0315872430801392,
189
- 1.253417730331421,
190
- 1.0659087896347046
191
  ],
192
  [
193
- 1.1852425336837769,
194
- 1.2326953411102295,
195
- 1.0134551525115967,
196
- 1.173586130142212
197
  ]
198
  ],
199
  "decoder": [
200
  [
201
- 0.4280831813812256,
202
- 0.3081738352775574,
203
- 0.660526692867279,
204
- 0.21870052814483643
205
  ],
206
  [
207
- 0.16326090693473816,
208
- 0.34843143820762634,
209
- 0.9616851210594177,
210
- 0.1421067714691162
211
  ]
212
  ]
213
  }
214
  ],
215
- "best_val_loss": 5.607645124527238,
216
  "config": {
217
  "tokenizer": "bert-base-uncased",
218
  "max_seq_len": 128,
 
2
  "history": [
3
  {
4
  "epoch": 1,
5
+ "train_loss": 6.783949644676193,
6
  "train_aux_loss": 0.0,
7
+ "train_perplexity": 883.5515563058923,
8
+ "val_loss": 6.120485806811677,
9
  "val_aux_loss": 0.0,
10
+ "val_perplexity": 455.08572455110965
11
  },
12
  {
13
  "epoch": 2,
14
+ "train_loss": 5.72200544220322,
15
  "train_aux_loss": 0.0,
16
+ "train_perplexity": 305.51700570051946,
17
+ "val_loss": 5.783777441596244,
18
  "val_aux_loss": 0.0,
19
+ "val_perplexity": 324.984484631975
20
  },
21
  {
22
  "epoch": 3,
23
+ "train_loss": 5.255330442014935,
24
  "train_aux_loss": 0.0,
25
+ "train_perplexity": 191.58478305751783,
26
+ "val_loss": 5.606828254184237,
27
  "val_aux_loss": 0.0,
28
+ "val_perplexity": 272.279266370978
29
  }
30
  ],
31
  "train_expert_usage": [
32
  {
33
  "encoder": [
34
  [
35
+ 1.2391363382339478,
36
+ 1.0983222723007202,
37
+ 1.0011056661605835,
38
+ 1.2266035079956055
39
  ],
40
  [
41
+ 1.2305195331573486,
42
+ 0.7604071497917175,
43
+ 1.1566280126571655,
44
+ 1.4176130294799805
45
  ]
46
  ],
47
  "decoder": [
48
  [
49
+ 0.7768203616142273,
50
+ 0.23009872436523438,
51
+ 0.22161749005317688,
52
+ 0.26393088698387146
53
  ],
54
  [
55
+ 0.07864928245544434,
56
+ 0.7649437785148621,
57
+ 0.16985277831554413,
58
+ 0.4790216386318207
59
  ]
60
  ]
61
  },
62
  {
63
  "encoder": [
64
  [
65
+ 1.2392860651016235,
66
+ 1.1015535593032837,
67
+ 0.9848208427429199,
68
+ 1.2395071983337402
69
  ],
70
  [
71
+ 1.2409480810165405,
72
+ 1.0997846126556396,
73
+ 1.1543740034103394,
74
+ 1.0700610876083374
75
  ]
76
  ],
77
  "decoder": [
78
  [
79
+ 0.7814996838569641,
80
+ 0.21621061861515045,
81
+ 0.2551786005496979,
82
+ 0.2495078146457672
83
  ],
84
  [
85
+ 0.15144944190979004,
86
+ 0.5814668536186218,
87
+ 0.28734877705574036,
88
+ 0.4821316599845886
89
  ]
90
  ]
91
  },
92
  {
93
  "encoder": [
94
  [
95
+ 1.2064954042434692,
96
+ 1.114086389541626,
97
+ 0.9930238723754883,
98
+ 1.2515621185302734
99
  ],
100
  [
101
+ 1.157797932624817,
102
+ 1.213179111480713,
103
+ 1.1462993621826172,
104
+ 1.0478914976119995
105
  ]
106
  ],
107
  "decoder": [
108
  [
109
+ 0.7839962244033813,
110
+ 0.2211395800113678,
111
+ 0.2677685022354126,
112
+ 0.2284652441740036
113
  ],
114
  [
115
+ 0.19157297909259796,
116
+ 0.4467729926109314,
117
+ 0.3304468095302582,
118
+ 0.5325767397880554
119
  ]
120
  ]
121
  }
 
124
  {
125
  "encoder": [
126
  [
127
+ 1.2822349071502686,
128
+ 1.1881206035614014,
129
+ 0.9720463156700134,
130
+ 1.1625773906707764
131
  ],
132
  [
133
+ 1.4352425336837769,
134
+ 0.8882213234901428,
135
+ 1.248740792274475,
136
+ 1.0327744483947754
137
  ]
138
  ],
139
  "decoder": [
140
  [
141
+ 0.8861706852912903,
142
+ 0.21258454024791718,
143
+ 0.2510792911052704,
144
+ 0.2656497359275818
145
  ],
146
  [
147
+ 0.07652179896831512,
148
+ 0.5462296605110168,
149
+ 0.2719815671443939,
150
+ 0.720751166343689
151
  ]
152
  ]
153
  },
154
  {
155
  "encoder": [
156
  [
157
+ 1.2491365671157837,
158
+ 1.2312203645706177,
159
+ 0.9285868406295776,
160
+ 1.196035385131836
161
  ],
162
  [
163
+ 1.1820405721664429,
164
+ 1.1934090852737427,
165
+ 1.1655993461608887,
166
+ 1.0639300346374512
167
  ]
168
  ],
169
  "decoder": [
170
  [
171
+ 0.9253849387168884,
172
+ 0.21632608771324158,
173
+ 0.24233703315258026,
174
+ 0.23143617808818817
175
  ],
176
  [
177
+ 0.14991365373134613,
178
+ 0.43304792046546936,
179
+ 0.35336020588874817,
180
+ 0.6791624426841736
181
  ]
182
  ]
183
  },
184
  {
185
  "encoder": [
186
  [
187
+ 1.2165778875350952,
188
+ 1.2050294876098633,
189
+ 0.9812203049659729,
190
+ 1.2021514177322388
191
  ],
192
  [
193
+ 1.1700963973999023,
194
+ 1.3727154731750488,
195
+ 1.0938984155654907,
196
+ 0.9682688117027283
197
  ]
198
  ],
199
  "decoder": [
200
  [
201
+ 0.8936178088188171,
202
+ 0.21582241356372833,
203
+ 0.2633832097053528,
204
+ 0.24266082048416138
205
  ],
206
  [
207
+ 0.15016549825668335,
208
+ 0.48665276169776917,
209
+ 0.4501367211341858,
210
+ 0.5285292863845825
211
  ]
212
  ]
213
  }
214
  ],
215
+ "best_val_loss": 5.606828254184237,
216
  "config": {
217
  "tokenizer": "bert-base-uncased",
218
  "max_seq_len": 128,
model.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d8ca2ca01ce3311dfab7a8fe4b42dc60cac000c7d46108f12115879ff15f2b4
3
  size 85979282
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cfe54bf46ce4b9250bb8e6372ceb5a2c5b18eb665e7d65cbf3648127f096699
3
  size 85979282