File size: 8,354 Bytes
7a9bf38 307bb20 7a9bf38 9ff40db 7a9bf38 9ff40db 7a9bf38 9ff40db 7a9bf38 9ff40db 5c6245e 9ff40db 7a9bf38 ddad724 5c6245e 7a9bf38 9ff40db 7a9bf38 f34b5ae 5c6245e ddad724 9ff40db 421b627 9ff40db 7a9bf38 f34b5ae 5c6245e a2e7ada 500bcda 7a9bf38 ddad724 7a9bf38 78e1974 9ff40db 7a9bf38 f6aaddb 462b488 78e1974 b116680 0f24801 78e1974 7a9bf38 5395f5c 78e1974 f34b5ae 7a9bf38 6bba80d 7a9bf38 9ff40db 500bcda b116680 307bb20 7a9bf38 421b627 9ff40db fc0b710 9ff40db 1d11f2a f6aaddb ddad724 4595512 1d11f2a 7a9bf38 b116680 7a9bf38 b116680 462b488 7a9bf38 7b5c429 307bb20 b116680 9ff40db 500bcda ddad724 d0c99cc 8054e37 7a9bf38 ddad724 7a9bf38 f34b5ae ddad724 7a9bf38 a2e7ada 40749f2 9ff40db f34b5ae 7a9bf38 cd5ef0f ddad724 9ff40db 307bb20 7a9bf38 ded29d5 5184add 9ff40db b116680 f34b5ae ddad724 7a9bf38 d8a454f 7a9bf38 9ff40db 307bb20 9ff40db 421b627 cd5ef0f ac03118 9ff40db 7a9bf38 9ff40db 7a9bf38 9ff40db 25deb0c 7a9bf38 9ff40db 2352a61 7a9bf38 421b627 462b488 7a9bf38 421b627 8054e37 ddad724 5395f5c 7a9bf38 ddad724 3613e9a 7a9bf38 ddad724 5c6245e 9ff40db 8054e37 ddad724 40749f2 7a9bf38 f37e8ca f34b5ae 500bcda 421b627 307bb20 f34b5ae 2352a61 40749f2 5395f5c 85e15ca b116680 4595512 ddad724 7a9bf38 9ff40db 7a9bf38 307bb20 7a9bf38 5c6245e 7a9bf38 421b627 2934e9f 7a9bf38 9ff40db a2e7ada f34b5ae 5c6245e 7a9bf38 0f24801 ddad724 5c6245e 78e1974 5c6245e a2e7ada 4595512 7a9bf38 307bb20 7a9bf38 9ff40db 7a9bf38 a2e7ada 7a9bf38 a2e7ada 5c6245e 7a9bf38 b116680 5c6245e a2e7ada 78e1974 421b627 69ec2ae 7a9bf38 69ec2ae 7a9bf38 f6aaddb 7a9bf38 421b627 7a9bf38 421b627 7a9bf38 5c6245e 307bb20 b116680 7a9bf38 a2e7ada 9ff40db 7a9bf38 338868a 9ff40db 7a9bf38 f6aaddb 69ec2ae 5c6245e 7a9bf38 338868a 5184add f34b5ae 0f24801 9ff40db 5395f5c 78e1974 2934e9f 7a9bf38 338868a 5c6245e 7a9bf38 78e1974 7a9bf38 28a902f d53855a 28a902f 7a9bf38 b17abd1 7a9bf38 9ff40db 7a9bf38 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 |
{
"_name_or_path": "distributed/optimized-gpt2-1b",
"activation_function": "gelu_new",
"all_reduce_scores": {
"0": "NON_PARTICIPATING",
"1": "NON_PARTICIPATING",
"10": "NON_PARTICIPATING",
"100": "NON_PARTICIPATING",
"101": "NON_PARTICIPATING",
"102": "SUCCESS",
"103": "NON_PARTICIPATING",
"104": "SUCCESS",
"105": "NON_PARTICIPATING",
"106": "SUCCESS",
"107": "NON_PARTICIPATING",
"108": "NON_PARTICIPATING",
"109": "SUCCESS",
"11": "SUCCESS",
"110": "NON_PARTICIPATING",
"111": "SUCCESS",
"112": "NON_PARTICIPATING",
"113": "SUCCESS",
"114": "NON_PARTICIPATING",
"115": "NON_PARTICIPATING",
"116": "NON_PARTICIPATING",
"117": "SUCCESS",
"118": "NON_PARTICIPATING",
"119": "NON_PARTICIPATING",
"12": "SUCCESS",
"120": "NON_PARTICIPATING",
"121": "NON_PARTICIPATING",
"122": "NON_PARTICIPATING",
"123": "NON_PARTICIPATING",
"124": "NON_PARTICIPATING",
"125": "NON_PARTICIPATING",
"126": "NON_PARTICIPATING",
"127": "NON_PARTICIPATING",
"128": "NON_PARTICIPATING",
"129": "NON_PARTICIPATING",
"13": "SUCCESS",
"130": "SUCCESS",
"131": "NON_PARTICIPATING",
"132": "NON_PARTICIPATING",
"133": "NON_PARTICIPATING",
"134": "NON_PARTICIPATING",
"135": "NON_PARTICIPATING",
"136": "NON_PARTICIPATING",
"137": "NON_PARTICIPATING",
"138": "NON_PARTICIPATING",
"139": "NON_PARTICIPATING",
"14": "SUCCESS",
"140": "NON_PARTICIPATING",
"141": "NON_PARTICIPATING",
"142": "NON_PARTICIPATING",
"143": "NON_PARTICIPATING",
"144": "NON_PARTICIPATING",
"145": "NON_PARTICIPATING",
"146": "NON_PARTICIPATING",
"147": "NON_PARTICIPATING",
"148": "SUCCESS",
"149": "NON_PARTICIPATING",
"15": "SUCCESS",
"150": "NON_PARTICIPATING",
"151": "NON_PARTICIPATING",
"152": "NON_PARTICIPATING",
"153": "NON_PARTICIPATING",
"154": "SUCCESS",
"155": "NON_PARTICIPATING",
"156": "SUCCESS",
"157": "NON_PARTICIPATING",
"158": "NON_PARTICIPATING",
"159": "NON_PARTICIPATING",
"16": "SUCCESS",
"160": "NON_PARTICIPATING",
"161": "NON_PARTICIPATING",
"162": "NON_PARTICIPATING",
"163": "NON_PARTICIPATING",
"164": "NON_PARTICIPATING",
"165": "NON_PARTICIPATING",
"166": "NON_PARTICIPATING",
"167": "NON_PARTICIPATING",
"168": "NON_PARTICIPATING",
"169": "NON_PARTICIPATING",
"17": "NON_PARTICIPATING",
"170": "NON_PARTICIPATING",
"171": "SUCCESS",
"172": "NON_PARTICIPATING",
"173": "NON_PARTICIPATING",
"174": "NON_PARTICIPATING",
"175": "NON_PARTICIPATING",
"176": "NON_PARTICIPATING",
"177": "NON_PARTICIPATING",
"178": "SUCCESS",
"179": "NON_PARTICIPATING",
"18": "SUCCESS",
"180": "NON_PARTICIPATING",
"181": "NON_PARTICIPATING",
"182": "NON_PARTICIPATING",
"183": "NON_PARTICIPATING",
"184": "NON_PARTICIPATING",
"185": "NON_PARTICIPATING",
"186": "NON_PARTICIPATING",
"187": "NON_PARTICIPATING",
"188": "NON_PARTICIPATING",
"189": "NON_PARTICIPATING",
"19": "SUCCESS",
"190": "NON_PARTICIPATING",
"191": "NON_PARTICIPATING",
"192": "NON_PARTICIPATING",
"193": "NON_PARTICIPATING",
"194": "NON_PARTICIPATING",
"195": "NON_PARTICIPATING",
"196": "NON_PARTICIPATING",
"197": "NON_PARTICIPATING",
"198": "NON_PARTICIPATING",
"199": "NON_PARTICIPATING",
"2": "NON_PARTICIPATING",
"20": "SUCCESS",
"200": "NON_PARTICIPATING",
"201": "SUCCESS",
"202": "NON_PARTICIPATING",
"203": "NON_PARTICIPATING",
"204": "NON_PARTICIPATING",
"205": "NON_PARTICIPATING",
"206": "SUCCESS",
"207": "NON_PARTICIPATING",
"208": "NON_PARTICIPATING",
"209": "SUCCESS",
"21": "SUCCESS",
"210": "SUCCESS",
"211": "NON_PARTICIPATING",
"212": "NON_PARTICIPATING",
"213": "NON_PARTICIPATING",
"214": "SUCCESS",
"215": "NON_PARTICIPATING",
"216": "NON_PARTICIPATING",
"217": "NON_PARTICIPATING",
"218": "NON_PARTICIPATING",
"219": "NON_PARTICIPATING",
"22": "SUCCESS",
"220": "NON_PARTICIPATING",
"221": "NON_PARTICIPATING",
"222": "NON_PARTICIPATING",
"223": "NON_PARTICIPATING",
"224": "NON_PARTICIPATING",
"225": "NON_PARTICIPATING",
"226": "NON_PARTICIPATING",
"227": "NON_PARTICIPATING",
"228": "SUCCESS",
"229": "NON_PARTICIPATING",
"23": "SUCCESS",
"230": "NON_PARTICIPATING",
"231": "NON_PARTICIPATING",
"232": "NON_PARTICIPATING",
"233": "NON_PARTICIPATING",
"234": "NON_PARTICIPATING",
"235": "NON_PARTICIPATING",
"236": "SUCCESS",
"237": "NON_PARTICIPATING",
"238": "NON_PARTICIPATING",
"239": "NON_PARTICIPATING",
"24": "SUCCESS",
"240": "NON_PARTICIPATING",
"241": "NON_PARTICIPATING",
"242": "NON_PARTICIPATING",
"243": "NON_PARTICIPATING",
"244": "NON_PARTICIPATING",
"245": "NON_PARTICIPATING",
"246": "NON_PARTICIPATING",
"247": "NON_PARTICIPATING",
"248": "NON_PARTICIPATING",
"249": "SUCCESS",
"25": "SUCCESS",
"250": "NON_PARTICIPATING",
"251": "NON_PARTICIPATING",
"252": "NON_PARTICIPATING",
"253": "NON_PARTICIPATING",
"254": "NON_PARTICIPATING",
"255": "NON_PARTICIPATING",
"26": "SUCCESS",
"27": "SUCCESS",
"28": "SUCCESS",
"29": "SUCCESS",
"3": "SUCCESS",
"30": "NON_PARTICIPATING",
"31": "NON_PARTICIPATING",
"32": "SUCCESS",
"33": "NON_PARTICIPATING",
"34": "SUCCESS",
"35": "SUCCESS",
"36": "SUCCESS",
"37": "SUCCESS",
"38": "SUCCESS",
"39": "SUCCESS",
"4": "SUCCESS",
"40": "SUCCESS",
"41": "SUCCESS",
"42": "SUCCESS",
"43": "SUCCESS",
"44": "NON_PARTICIPATING",
"45": "SUCCESS",
"46": "NON_PARTICIPATING",
"47": "SUCCESS",
"48": "NON_PARTICIPATING",
"49": "SUCCESS",
"5": "SUCCESS",
"50": "SUCCESS",
"51": "SUCCESS",
"52": "NON_PARTICIPATING",
"53": "SUCCESS",
"54": "SUCCESS",
"55": "SUCCESS",
"56": "SUCCESS",
"57": "SUCCESS",
"58": "NON_PARTICIPATING",
"59": "SUCCESS",
"6": "NON_PARTICIPATING",
"60": "SUCCESS",
"61": "NON_PARTICIPATING",
"62": "SUCCESS",
"63": "SUCCESS",
"64": "NON_PARTICIPATING",
"65": "SUCCESS",
"66": "SUCCESS",
"67": "SUCCESS",
"68": "SUCCESS",
"69": "SUCCESS",
"7": "SUCCESS",
"70": "NON_PARTICIPATING",
"71": "NON_PARTICIPATING",
"72": "SUCCESS",
"73": "NON_PARTICIPATING",
"74": "SUCCESS",
"75": "SUCCESS",
"76": "SUCCESS",
"77": "SUCCESS",
"78": "SUCCESS",
"79": "NON_PARTICIPATING",
"8": "SUCCESS",
"80": "SUCCESS",
"81": "SUCCESS",
"82": "SUCCESS",
"83": "NON_PARTICIPATING",
"84": "SUCCESS",
"85": "NON_PARTICIPATING",
"86": "NON_PARTICIPATING",
"87": "NON_PARTICIPATING",
"88": "SUCCESS",
"89": "SUCCESS",
"9": "SUCCESS",
"90": "SUCCESS",
"91": "SUCCESS",
"92": "SUCCESS",
"93": "NON_PARTICIPATING",
"94": "SUCCESS",
"95": "SUCCESS",
"96": "SUCCESS",
"97": "SUCCESS",
"98": "SUCCESS",
"99": "SUCCESS"
},
"architectures": [
"GPTOptim"
],
"attn_pdrop": 0.1,
"auto_map": {
"AutoConfig": "distributed/optimized-gpt2-500m--configuration_gpt_optimized.GPTOptimConfig",
"AutoModelForCausalLM": "distributed/optimized-gpt2-500m--modeling_gpt_optimized.GPTOptim"
},
"block_list": [
5607335,
5607340,
5607344,
5607349,
5607353
],
"block_size": 1024,
"bos_token_id": 50256,
"embd_pdrop": 0.1,
"eos_token_id": 50256,
"initializer_range": 0.02,
"inner_step": 15,
"inner_steps": 0,
"last_allreduce_block": 5605539,
"layer_norm_epsilon": 1e-05,
"model_type": "gpt_optimized",
"n_embd": 1280,
"n_head": 32,
"n_inner": null,
"n_layer": 48,
"n_positions": 1024,
"reorder_and_upcast_attn": false,
"resid_pdrop": 0.1,
"scale_attn_by_inverse_layer_idx": false,
"scale_attn_weights": true,
"summary_activation": null,
"summary_first_dropout": 0.1,
"summary_proj_to_labels": true,
"summary_type": "cls_index",
"summary_use_proj": true,
"torch_dtype": "float32",
"transformers_version": "4.39.3",
"use_cache": true,
"vocab_size": 50257
}
|