File size: 8,354 Bytes
7a9bf38
307bb20
7a9bf38
 
 
9ff40db
7a9bf38
 
9ff40db
 
7a9bf38
9ff40db
7a9bf38
9ff40db
 
 
 
5c6245e
9ff40db
 
7a9bf38
ddad724
5c6245e
7a9bf38
 
9ff40db
 
7a9bf38
f34b5ae
5c6245e
ddad724
9ff40db
421b627
9ff40db
7a9bf38
f34b5ae
5c6245e
a2e7ada
500bcda
7a9bf38
ddad724
7a9bf38
78e1974
9ff40db
7a9bf38
f6aaddb
462b488
78e1974
b116680
0f24801
78e1974
7a9bf38
5395f5c
78e1974
f34b5ae
7a9bf38
6bba80d
7a9bf38
9ff40db
 
500bcda
b116680
307bb20
7a9bf38
 
421b627
9ff40db
fc0b710
9ff40db
1d11f2a
f6aaddb
ddad724
4595512
1d11f2a
7a9bf38
b116680
7a9bf38
b116680
462b488
7a9bf38
7b5c429
307bb20
b116680
9ff40db
500bcda
ddad724
d0c99cc
8054e37
7a9bf38
ddad724
7a9bf38
f34b5ae
ddad724
7a9bf38
a2e7ada
40749f2
9ff40db
f34b5ae
7a9bf38
cd5ef0f
ddad724
9ff40db
307bb20
7a9bf38
ded29d5
5184add
9ff40db
b116680
f34b5ae
ddad724
7a9bf38
d8a454f
7a9bf38
 
 
 
9ff40db
307bb20
9ff40db
 
421b627
cd5ef0f
ac03118
 
9ff40db
7a9bf38
9ff40db
 
7a9bf38
9ff40db
25deb0c
7a9bf38
 
9ff40db
2352a61
7a9bf38
 
421b627
462b488
7a9bf38
421b627
8054e37
ddad724
5395f5c
7a9bf38
ddad724
3613e9a
7a9bf38
ddad724
 
5c6245e
9ff40db
8054e37
ddad724
40749f2
7a9bf38
f37e8ca
f34b5ae
500bcda
421b627
307bb20
f34b5ae
 
2352a61
40749f2
5395f5c
85e15ca
b116680
4595512
ddad724
7a9bf38
9ff40db
7a9bf38
307bb20
7a9bf38
5c6245e
7a9bf38
 
421b627
2934e9f
7a9bf38
 
 
9ff40db
a2e7ada
f34b5ae
5c6245e
7a9bf38
0f24801
ddad724
5c6245e
 
78e1974
5c6245e
a2e7ada
4595512
7a9bf38
307bb20
7a9bf38
9ff40db
7a9bf38
 
a2e7ada
7a9bf38
 
a2e7ada
5c6245e
7a9bf38
b116680
5c6245e
a2e7ada
78e1974
421b627
69ec2ae
7a9bf38
69ec2ae
7a9bf38
f6aaddb
7a9bf38
 
421b627
7a9bf38
421b627
7a9bf38
 
5c6245e
307bb20
b116680
7a9bf38
 
 
 
a2e7ada
9ff40db
7a9bf38
338868a
9ff40db
7a9bf38
f6aaddb
69ec2ae
5c6245e
7a9bf38
 
 
338868a
5184add
f34b5ae
 
0f24801
9ff40db
5395f5c
78e1974
2934e9f
7a9bf38
 
338868a
5c6245e
7a9bf38
 
78e1974
7a9bf38
 
 
 
 
 
 
 
 
28a902f
d53855a
 
 
 
 
28a902f
7a9bf38
 
 
 
 
b17abd1
7a9bf38
9ff40db
7a9bf38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
{
  "_name_or_path": "distributed/optimized-gpt2-1b",
  "activation_function": "gelu_new",
  "all_reduce_scores": {
    "0": "NON_PARTICIPATING",
    "1": "NON_PARTICIPATING",
    "10": "NON_PARTICIPATING",
    "100": "NON_PARTICIPATING",
    "101": "NON_PARTICIPATING",
    "102": "SUCCESS",
    "103": "NON_PARTICIPATING",
    "104": "SUCCESS",
    "105": "NON_PARTICIPATING",
    "106": "SUCCESS",
    "107": "NON_PARTICIPATING",
    "108": "NON_PARTICIPATING",
    "109": "SUCCESS",
    "11": "SUCCESS",
    "110": "NON_PARTICIPATING",
    "111": "SUCCESS",
    "112": "NON_PARTICIPATING",
    "113": "SUCCESS",
    "114": "NON_PARTICIPATING",
    "115": "NON_PARTICIPATING",
    "116": "NON_PARTICIPATING",
    "117": "SUCCESS",
    "118": "NON_PARTICIPATING",
    "119": "NON_PARTICIPATING",
    "12": "SUCCESS",
    "120": "NON_PARTICIPATING",
    "121": "NON_PARTICIPATING",
    "122": "NON_PARTICIPATING",
    "123": "NON_PARTICIPATING",
    "124": "NON_PARTICIPATING",
    "125": "NON_PARTICIPATING",
    "126": "NON_PARTICIPATING",
    "127": "NON_PARTICIPATING",
    "128": "NON_PARTICIPATING",
    "129": "NON_PARTICIPATING",
    "13": "SUCCESS",
    "130": "SUCCESS",
    "131": "NON_PARTICIPATING",
    "132": "NON_PARTICIPATING",
    "133": "NON_PARTICIPATING",
    "134": "NON_PARTICIPATING",
    "135": "NON_PARTICIPATING",
    "136": "NON_PARTICIPATING",
    "137": "NON_PARTICIPATING",
    "138": "NON_PARTICIPATING",
    "139": "NON_PARTICIPATING",
    "14": "SUCCESS",
    "140": "NON_PARTICIPATING",
    "141": "NON_PARTICIPATING",
    "142": "NON_PARTICIPATING",
    "143": "NON_PARTICIPATING",
    "144": "NON_PARTICIPATING",
    "145": "NON_PARTICIPATING",
    "146": "NON_PARTICIPATING",
    "147": "NON_PARTICIPATING",
    "148": "SUCCESS",
    "149": "NON_PARTICIPATING",
    "15": "SUCCESS",
    "150": "NON_PARTICIPATING",
    "151": "NON_PARTICIPATING",
    "152": "NON_PARTICIPATING",
    "153": "NON_PARTICIPATING",
    "154": "SUCCESS",
    "155": "NON_PARTICIPATING",
    "156": "SUCCESS",
    "157": "NON_PARTICIPATING",
    "158": "NON_PARTICIPATING",
    "159": "NON_PARTICIPATING",
    "16": "SUCCESS",
    "160": "NON_PARTICIPATING",
    "161": "NON_PARTICIPATING",
    "162": "NON_PARTICIPATING",
    "163": "NON_PARTICIPATING",
    "164": "NON_PARTICIPATING",
    "165": "NON_PARTICIPATING",
    "166": "NON_PARTICIPATING",
    "167": "NON_PARTICIPATING",
    "168": "NON_PARTICIPATING",
    "169": "NON_PARTICIPATING",
    "17": "NON_PARTICIPATING",
    "170": "NON_PARTICIPATING",
    "171": "SUCCESS",
    "172": "NON_PARTICIPATING",
    "173": "NON_PARTICIPATING",
    "174": "NON_PARTICIPATING",
    "175": "NON_PARTICIPATING",
    "176": "NON_PARTICIPATING",
    "177": "NON_PARTICIPATING",
    "178": "SUCCESS",
    "179": "NON_PARTICIPATING",
    "18": "SUCCESS",
    "180": "NON_PARTICIPATING",
    "181": "NON_PARTICIPATING",
    "182": "NON_PARTICIPATING",
    "183": "NON_PARTICIPATING",
    "184": "NON_PARTICIPATING",
    "185": "NON_PARTICIPATING",
    "186": "NON_PARTICIPATING",
    "187": "NON_PARTICIPATING",
    "188": "NON_PARTICIPATING",
    "189": "NON_PARTICIPATING",
    "19": "SUCCESS",
    "190": "NON_PARTICIPATING",
    "191": "NON_PARTICIPATING",
    "192": "NON_PARTICIPATING",
    "193": "NON_PARTICIPATING",
    "194": "NON_PARTICIPATING",
    "195": "NON_PARTICIPATING",
    "196": "NON_PARTICIPATING",
    "197": "NON_PARTICIPATING",
    "198": "NON_PARTICIPATING",
    "199": "NON_PARTICIPATING",
    "2": "NON_PARTICIPATING",
    "20": "SUCCESS",
    "200": "NON_PARTICIPATING",
    "201": "SUCCESS",
    "202": "NON_PARTICIPATING",
    "203": "NON_PARTICIPATING",
    "204": "NON_PARTICIPATING",
    "205": "NON_PARTICIPATING",
    "206": "SUCCESS",
    "207": "NON_PARTICIPATING",
    "208": "NON_PARTICIPATING",
    "209": "SUCCESS",
    "21": "SUCCESS",
    "210": "SUCCESS",
    "211": "NON_PARTICIPATING",
    "212": "NON_PARTICIPATING",
    "213": "NON_PARTICIPATING",
    "214": "SUCCESS",
    "215": "NON_PARTICIPATING",
    "216": "NON_PARTICIPATING",
    "217": "NON_PARTICIPATING",
    "218": "NON_PARTICIPATING",
    "219": "NON_PARTICIPATING",
    "22": "SUCCESS",
    "220": "NON_PARTICIPATING",
    "221": "NON_PARTICIPATING",
    "222": "NON_PARTICIPATING",
    "223": "NON_PARTICIPATING",
    "224": "NON_PARTICIPATING",
    "225": "NON_PARTICIPATING",
    "226": "NON_PARTICIPATING",
    "227": "NON_PARTICIPATING",
    "228": "SUCCESS",
    "229": "NON_PARTICIPATING",
    "23": "SUCCESS",
    "230": "NON_PARTICIPATING",
    "231": "NON_PARTICIPATING",
    "232": "NON_PARTICIPATING",
    "233": "NON_PARTICIPATING",
    "234": "NON_PARTICIPATING",
    "235": "NON_PARTICIPATING",
    "236": "SUCCESS",
    "237": "NON_PARTICIPATING",
    "238": "NON_PARTICIPATING",
    "239": "NON_PARTICIPATING",
    "24": "SUCCESS",
    "240": "NON_PARTICIPATING",
    "241": "NON_PARTICIPATING",
    "242": "NON_PARTICIPATING",
    "243": "NON_PARTICIPATING",
    "244": "NON_PARTICIPATING",
    "245": "NON_PARTICIPATING",
    "246": "NON_PARTICIPATING",
    "247": "NON_PARTICIPATING",
    "248": "NON_PARTICIPATING",
    "249": "SUCCESS",
    "25": "SUCCESS",
    "250": "NON_PARTICIPATING",
    "251": "NON_PARTICIPATING",
    "252": "NON_PARTICIPATING",
    "253": "NON_PARTICIPATING",
    "254": "NON_PARTICIPATING",
    "255": "NON_PARTICIPATING",
    "26": "SUCCESS",
    "27": "SUCCESS",
    "28": "SUCCESS",
    "29": "SUCCESS",
    "3": "SUCCESS",
    "30": "NON_PARTICIPATING",
    "31": "NON_PARTICIPATING",
    "32": "SUCCESS",
    "33": "NON_PARTICIPATING",
    "34": "SUCCESS",
    "35": "SUCCESS",
    "36": "SUCCESS",
    "37": "SUCCESS",
    "38": "SUCCESS",
    "39": "SUCCESS",
    "4": "SUCCESS",
    "40": "SUCCESS",
    "41": "SUCCESS",
    "42": "SUCCESS",
    "43": "SUCCESS",
    "44": "NON_PARTICIPATING",
    "45": "SUCCESS",
    "46": "NON_PARTICIPATING",
    "47": "SUCCESS",
    "48": "NON_PARTICIPATING",
    "49": "SUCCESS",
    "5": "SUCCESS",
    "50": "SUCCESS",
    "51": "SUCCESS",
    "52": "NON_PARTICIPATING",
    "53": "SUCCESS",
    "54": "SUCCESS",
    "55": "SUCCESS",
    "56": "SUCCESS",
    "57": "SUCCESS",
    "58": "NON_PARTICIPATING",
    "59": "SUCCESS",
    "6": "NON_PARTICIPATING",
    "60": "SUCCESS",
    "61": "NON_PARTICIPATING",
    "62": "SUCCESS",
    "63": "SUCCESS",
    "64": "NON_PARTICIPATING",
    "65": "SUCCESS",
    "66": "SUCCESS",
    "67": "SUCCESS",
    "68": "SUCCESS",
    "69": "SUCCESS",
    "7": "SUCCESS",
    "70": "NON_PARTICIPATING",
    "71": "NON_PARTICIPATING",
    "72": "SUCCESS",
    "73": "NON_PARTICIPATING",
    "74": "SUCCESS",
    "75": "SUCCESS",
    "76": "SUCCESS",
    "77": "SUCCESS",
    "78": "SUCCESS",
    "79": "NON_PARTICIPATING",
    "8": "SUCCESS",
    "80": "SUCCESS",
    "81": "SUCCESS",
    "82": "SUCCESS",
    "83": "NON_PARTICIPATING",
    "84": "SUCCESS",
    "85": "NON_PARTICIPATING",
    "86": "NON_PARTICIPATING",
    "87": "NON_PARTICIPATING",
    "88": "SUCCESS",
    "89": "SUCCESS",
    "9": "SUCCESS",
    "90": "SUCCESS",
    "91": "SUCCESS",
    "92": "SUCCESS",
    "93": "NON_PARTICIPATING",
    "94": "SUCCESS",
    "95": "SUCCESS",
    "96": "SUCCESS",
    "97": "SUCCESS",
    "98": "SUCCESS",
    "99": "SUCCESS"
  },
  "architectures": [
    "GPTOptim"
  ],
  "attn_pdrop": 0.1,
  "auto_map": {
    "AutoConfig": "distributed/optimized-gpt2-500m--configuration_gpt_optimized.GPTOptimConfig",
    "AutoModelForCausalLM": "distributed/optimized-gpt2-500m--modeling_gpt_optimized.GPTOptim"
  },
  "block_list": [
    5607335,
    5607340,
    5607344,
    5607349,
    5607353
  ],
  "block_size": 1024,
  "bos_token_id": 50256,
  "embd_pdrop": 0.1,
  "eos_token_id": 50256,
  "initializer_range": 0.02,
  "inner_step": 15,
  "inner_steps": 0,
  "last_allreduce_block": 5605539,
  "layer_norm_epsilon": 1e-05,
  "model_type": "gpt_optimized",
  "n_embd": 1280,
  "n_head": 32,
  "n_inner": null,
  "n_layer": 48,
  "n_positions": 1024,
  "reorder_and_upcast_attn": false,
  "resid_pdrop": 0.1,
  "scale_attn_by_inverse_layer_idx": false,
  "scale_attn_weights": true,
  "summary_activation": null,
  "summary_first_dropout": 0.1,
  "summary_proj_to_labels": true,
  "summary_type": "cls_index",
  "summary_use_proj": true,
  "torch_dtype": "float32",
  "transformers_version": "4.39.3",
  "use_cache": true,
  "vocab_size": 50257
}