File size: 7,515 Bytes
6a6d34c
959452e
6a6d34c
f3b403e
124e1b5
462265c
f3b403e
350234c
08c32e3
 
 
 
 
c95d326
a36e458
591948c
25d6896
124e1b5
350234c
ae3b29e
5793e45
ae3b29e
4acf935
08c32e3
d5320a5
b2bc76d
08c32e3
124e1b5
08c32e3
28f2f9a
25d6896
7e66265
1aede53
462265c
591948c
b2bc76d
5793e45
ae3b29e
96910df
124e1b5
f3b403e
350234c
08c32e3
c6bd19b
0376d0e
ae3b29e
08c32e3
124e1b5
25d6896
ae3b29e
28f2f9a
08c32e3
a36e458
1aede53
a12a8a2
4cb180a
08c32e3
124e1b5
ae3b29e
81dbd7b
462265c
25d6896
f3b403e
7e871df
08c32e3
ae3b29e
 
96910df
d5320a5
08c32e3
ae3b29e
f11144b
 
ae3b29e
124e1b5
b2bc76d
08c32e3
 
ae3b29e
124e1b5
ae3b29e
54992d9
b2bc76d
d5320a5
a58dc5f
08c32e3
ae3b29e
b2bc76d
c95d326
f3b403e
a36e458
2df852f
462265c
b2bc76d
a608d28
28f2f9a
6d10042
08c32e3
 
124e1b5
08c32e3
 
ae3b29e
f3b403e
72769e4
ae3b29e
f60b0f3
08c32e3
 
 
 
124e1b5
ae3b29e
08c32e3
d5320a5
08c32e3
124e1b5
c6bd19b
28f2f9a
7e66265
 
a12a8a2
08c32e3
 
f3b403e
08c32e3
 
28f2f9a
a12a8a2
124e1b5
350234c
ae3b29e
08c32e3
ae3b29e
350234c
25d6896
ae3b29e
08c32e3
 
124e1b5
6d10042
a36e458
124e1b5
f3b403e
81dbd7b
7203c67
08c32e3
ae3b29e
08c32e3
ae3b29e
81dbd7b
08c32e3
591948c
350234c
08c32e3
 
a36e458
ae3b29e
08c32e3
b2bc76d
ae3b29e
5793e45
2eb79c8
08c32e3
2df852f
d5320a5
350234c
d5320a5
350234c
2df852f
7e871df
ca60942
ae3b29e
7e871df
350234c
b2bc76d
08c32e3
 
 
 
 
124e1b5
a7a010b
ae3b29e
 
 
f3b403e
08c32e3
ae3b29e
2eb79c8
681e288
ae3b29e
 
08c32e3
124e1b5
ae3b29e
124e1b5
350234c
ae3b29e
591948c
28f2f9a
462265c
08c32e3
ae3b29e
 
08c32e3
 
ae3b29e
08c32e3
a12a8a2
7c166be
ae3b29e
f11144b
350234c
ae3b29e
124e1b5
ae3b29e
08c32e3
 
ae3b29e
08c32e3
f11144b
462265c
c95d326
ae3b29e
08c32e3
124e1b5
08c32e3
ae3b29e
08c32e3
 
d5320a5
f11144b
ae3b29e
 
462265c
2eb79c8
462265c
ae3b29e
08c32e3
350234c
08c32e3
2eb79c8
08c32e3
ae3b29e
28f2f9a
08c32e3
a07938e
28f2f9a
0376d0e
124e1b5
08c32e3
2eb79c8
ae3b29e
c95d326
350234c
08c32e3
0376d0e
681e288
6a6d34c
 
 
 
 
 
1c5a335
5446a6f
 
1c5a335
6a6d34c
 
 
 
 
5446a6f
6a6d34c
ae3b29e
6a6d34c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
{
  "_name_or_path": "distributed/llama-1b",
  "all_reduce_scores": {
    "0": "NOT_ALIVE",
    "1": "NOT_ALIVE",
    "10": "NOT_ALIVE",
    "100": "NOT_ALIVE",
    "101": "NOT_ALIVE",
    "102": "NOT_ALIVE",
    "103": "NON_PARTICIPATING",
    "104": "NOT_ALIVE",
    "105": "NOT_ALIVE",
    "106": "NOT_ALIVE",
    "107": "NOT_ALIVE",
    "108": "NOT_ALIVE",
    "109": "NOT_ALIVE",
    "11": "NOT_ALIVE",
    "110": "NON_PARTICIPATING",
    "111": "NOT_ALIVE",
    "112": "SUCCESS",
    "113": "NOT_ALIVE",
    "114": "NON_PARTICIPATING",
    "115": "NOT_ALIVE",
    "116": "NON_PARTICIPATING",
    "117": "NON_PARTICIPATING",
    "118": "NOT_ALIVE",
    "119": "NOT_ALIVE",
    "12": "NON_PARTICIPATING",
    "120": "NOT_ALIVE",
    "121": "NOT_ALIVE",
    "122": "NON_PARTICIPATING",
    "123": "NOT_ALIVE",
    "124": "NON_PARTICIPATING",
    "125": "NON_PARTICIPATING",
    "126": "NOT_ALIVE",
    "127": "NOT_ALIVE",
    "128": "NOT_ALIVE",
    "129": "NOT_ALIVE",
    "13": "NOT_ALIVE",
    "130": "NOT_ALIVE",
    "131": "NOT_ALIVE",
    "132": "NON_PARTICIPATING",
    "133": "NOT_ALIVE",
    "134": "NON_PARTICIPATING",
    "135": "NON_PARTICIPATING",
    "136": "NON_PARTICIPATING",
    "137": "NOT_ALIVE",
    "138": "SUCCESS",
    "139": "NON_PARTICIPATING",
    "14": "SUCCESS",
    "140": "NOT_ALIVE",
    "141": "NON_PARTICIPATING",
    "142": "NON_PARTICIPATING",
    "143": "NON_PARTICIPATING",
    "144": "NON_PARTICIPATING",
    "145": "NON_PARTICIPATING",
    "146": "NOT_ALIVE",
    "147": "NOT_ALIVE",
    "148": "SUCCESS",
    "149": "NON_PARTICIPATING",
    "15": "NOT_ALIVE",
    "150": "NON_PARTICIPATING",
    "151": "NOT_ALIVE",
    "152": "NOT_ALIVE",
    "153": "NOT_ALIVE",
    "154": "SUCCESS",
    "155": "SUCCESS",
    "156": "NOT_ALIVE",
    "157": "NOT_ALIVE",
    "158": "NOT_ALIVE",
    "159": "NOT_ALIVE",
    "16": "NON_PARTICIPATING",
    "160": "NON_PARTICIPATING",
    "161": "NOT_ALIVE",
    "162": "NON_PARTICIPATING",
    "163": "NOT_ALIVE",
    "164": "NON_PARTICIPATING",
    "165": "NOT_ALIVE",
    "166": "NOT_ALIVE",
    "167": "NON_PARTICIPATING",
    "168": "NON_PARTICIPATING",
    "169": "NOT_ALIVE",
    "17": "NOT_ALIVE",
    "170": "NON_PARTICIPATING",
    "171": "NOT_ALIVE",
    "172": "NOT_ALIVE",
    "173": "SUCCESS",
    "174": "NOT_ALIVE",
    "175": "NOT_ALIVE",
    "176": "NOT_ALIVE",
    "177": "NOT_ALIVE",
    "178": "NON_PARTICIPATING",
    "179": "NOT_ALIVE",
    "18": "NOT_ALIVE",
    "180": "NOT_ALIVE",
    "181": "NOT_ALIVE",
    "182": "NOT_ALIVE",
    "183": "NOT_ALIVE",
    "184": "NOT_ALIVE",
    "185": "NOT_ALIVE",
    "186": "NOT_ALIVE",
    "187": "NOT_ALIVE",
    "188": "NON_PARTICIPATING",
    "189": "NOT_ALIVE",
    "19": "NOT_ALIVE",
    "190": "NON_PARTICIPATING",
    "191": "NOT_ALIVE",
    "192": "NOT_ALIVE",
    "193": "NON_PARTICIPATING",
    "194": "NOT_ALIVE",
    "195": "NOT_ALIVE",
    "196": "SUCCESS",
    "197": "SUCCESS",
    "198": "NOT_ALIVE",
    "199": "NON_PARTICIPATING",
    "2": "NOT_ALIVE",
    "20": "NOT_ALIVE",
    "200": "NOT_ALIVE",
    "201": "NOT_ALIVE",
    "202": "NOT_ALIVE",
    "203": "NOT_ALIVE",
    "204": "NON_PARTICIPATING",
    "205": "NOT_ALIVE",
    "206": "NOT_ALIVE",
    "207": "NOT_ALIVE",
    "208": "NOT_ALIVE",
    "209": "NOT_ALIVE",
    "21": "NOT_ALIVE",
    "210": "NON_PARTICIPATING",
    "211": "NOT_ALIVE",
    "212": "SUCCESS",
    "213": "NOT_ALIVE",
    "214": "NON_PARTICIPATING",
    "215": "SUCCESS",
    "216": "NOT_ALIVE",
    "217": "NON_PARTICIPATING",
    "218": "NOT_ALIVE",
    "219": "NOT_ALIVE",
    "22": "NOT_ALIVE",
    "220": "NOT_ALIVE",
    "221": "NOT_ALIVE",
    "222": "NOT_ALIVE",
    "223": "NOT_ALIVE",
    "224": "NOT_ALIVE",
    "225": "NOT_ALIVE",
    "226": "NOT_ALIVE",
    "227": "NON_PARTICIPATING",
    "228": "NOT_ALIVE",
    "229": "NOT_ALIVE",
    "23": "NON_PARTICIPATING",
    "230": "NOT_ALIVE",
    "231": "NON_PARTICIPATING",
    "232": "NOT_ALIVE",
    "233": "NOT_ALIVE",
    "234": "NOT_ALIVE",
    "235": "NOT_ALIVE",
    "236": "NOT_ALIVE",
    "237": "SUCCESS",
    "238": "NON_PARTICIPATING",
    "239": "NOT_ALIVE",
    "24": "NON_PARTICIPATING",
    "240": "NOT_ALIVE",
    "241": "NOT_ALIVE",
    "242": "NOT_ALIVE",
    "243": "NOT_ALIVE",
    "244": "NOT_ALIVE",
    "245": "NOT_ALIVE",
    "246": "NON_PARTICIPATING",
    "247": "NON_PARTICIPATING",
    "248": "NOT_ALIVE",
    "249": "NOT_ALIVE",
    "25": "SUCCESS",
    "250": "NOT_ALIVE",
    "251": "NOT_ALIVE",
    "252": "NON_PARTICIPATING",
    "253": "NOT_ALIVE",
    "254": "NON_PARTICIPATING",
    "255": "NON_PARTICIPATING",
    "26": "NOT_ALIVE",
    "27": "NON_PARTICIPATING",
    "28": "NOT_ALIVE",
    "29": "NOT_ALIVE",
    "3": "NON_PARTICIPATING",
    "30": "SUCCESS",
    "31": "NON_PARTICIPATING",
    "32": "NON_PARTICIPATING",
    "33": "NOT_ALIVE",
    "34": "NON_PARTICIPATING",
    "35": "NOT_ALIVE",
    "36": "NOT_ALIVE",
    "37": "NOT_ALIVE",
    "38": "NOT_ALIVE",
    "39": "SUCCESS",
    "4": "NOT_ALIVE",
    "40": "NON_PARTICIPATING",
    "41": "NON_PARTICIPATING",
    "42": "NON_PARTICIPATING",
    "43": "NON_PARTICIPATING",
    "44": "NON_PARTICIPATING",
    "45": "NOT_ALIVE",
    "46": "NOT_ALIVE",
    "47": "NON_PARTICIPATING",
    "48": "NON_PARTICIPATING",
    "49": "NON_PARTICIPATING",
    "5": "NON_PARTICIPATING",
    "50": "NOT_ALIVE",
    "51": "NON_PARTICIPATING",
    "52": "NON_PARTICIPATING",
    "53": "NOT_ALIVE",
    "54": "NON_PARTICIPATING",
    "55": "NOT_ALIVE",
    "56": "NON_PARTICIPATING",
    "57": "NON_PARTICIPATING",
    "58": "SUCCESS",
    "59": "NON_PARTICIPATING",
    "6": "NON_PARTICIPATING",
    "60": "NON_PARTICIPATING",
    "61": "NOT_ALIVE",
    "62": "NOT_ALIVE",
    "63": "SUCCESS",
    "64": "NOT_ALIVE",
    "65": "NON_PARTICIPATING",
    "66": "NOT_ALIVE",
    "67": "NOT_ALIVE",
    "68": "NON_PARTICIPATING",
    "69": "NOT_ALIVE",
    "7": "NON_PARTICIPATING",
    "70": "NOT_ALIVE",
    "71": "NOT_ALIVE",
    "72": "NOT_ALIVE",
    "73": "SUCCESS",
    "74": "NON_PARTICIPATING",
    "75": "NON_PARTICIPATING",
    "76": "NON_PARTICIPATING",
    "77": "NOT_ALIVE",
    "78": "NOT_ALIVE",
    "79": "NOT_ALIVE",
    "8": "NOT_ALIVE",
    "80": "NOT_ALIVE",
    "81": "NOT_ALIVE",
    "82": "NON_PARTICIPATING",
    "83": "NOT_ALIVE",
    "84": "NON_PARTICIPATING",
    "85": "NOT_ALIVE",
    "86": "NON_PARTICIPATING",
    "87": "NON_PARTICIPATING",
    "88": "NOT_ALIVE",
    "89": "NOT_ALIVE",
    "9": "NOT_ALIVE",
    "90": "NON_PARTICIPATING",
    "91": "NON_PARTICIPATING",
    "92": "NON_PARTICIPATING",
    "93": "NON_PARTICIPATING",
    "94": "SUCCESS",
    "95": "NOT_ALIVE",
    "96": "NON_PARTICIPATING",
    "97": "NON_PARTICIPATING",
    "98": "NON_PARTICIPATING",
    "99": "NOT_ALIVE"
  },
  "architectures": [
    "LlamaForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "block_list": [
    5975715,
    5975744
  ],
  "bos_token_id": 1,
  "eos_token_id": 2,
  "hidden_act": "silu",
  "hidden_size": 2048,
  "initializer_range": 0.02,
  "inner_step": 5,
  "intermediate_size": 5632,
  "last_allreduce_block": 5975514,
  "max_position_embeddings": 2048,
  "mlp_bias": false,
  "model_type": "llama",
  "num_attention_heads": 32,
  "num_hidden_layers": 22,
  "num_key_value_heads": 4,
  "pretraining_tp": 1,
  "rms_norm_eps": 1e-05,
  "rope_scaling": null,
  "rope_theta": 10000.0,
  "tie_word_embeddings": false,
  "torch_dtype": "float32",
  "transformers_version": "4.39.3",
  "use_cache": false,
  "vocab_size": 32000
}