File size: 1,648 Bytes
0d4a2b8 99f4202 0d4a2b8 99f4202 0d4a2b8 99f4202 0d4a2b8 99f4202 0d4a2b8 99f4202 0d4a2b8 99f4202 0d4a2b8 99f4202 0d4a2b8 99f4202 0d4a2b8 99f4202 0d4a2b8 99f4202 0d4a2b8 99f4202 0d4a2b8 99f4202 0d4a2b8 99f4202 0d4a2b8 99f4202 0d4a2b8 99f4202 0d4a2b8 99f4202 0d4a2b8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 | {
"architectures": [
"Olmo3ForCausalLM"
],
"attention_bias": false,
"attention_dropout": 0.0,
"bos_token_id": null,
"dtype": "bfloat16",
"eos_token_id": 100257,
"hidden_act": "silu",
"hidden_size": 4096,
"initializer_range": 0.02,
"intermediate_size": 11008,
"layer_types": [
"sliding_attention",
"sliding_attention",
"sliding_attention",
"full_attention",
"sliding_attention",
"sliding_attention",
"sliding_attention",
"full_attention",
"sliding_attention",
"sliding_attention",
"sliding_attention",
"full_attention",
"sliding_attention",
"sliding_attention",
"sliding_attention",
"full_attention",
"sliding_attention",
"sliding_attention",
"sliding_attention",
"full_attention",
"sliding_attention",
"sliding_attention",
"sliding_attention",
"full_attention",
"sliding_attention",
"sliding_attention",
"sliding_attention",
"full_attention",
"sliding_attention",
"sliding_attention",
"sliding_attention",
"full_attention"
],
"max_position_embeddings": 65536,
"model_type": "olmo3",
"num_attention_heads": 32,
"num_hidden_layers": 32,
"num_key_value_heads": 32,
"pad_token_id": 100277,
"rms_norm_eps": 1e-06,
"rope_parameters": {
"attention_factor": 1.2079441541679836,
"beta_fast": 32,
"beta_slow": 1,
"factor": 8.0,
"original_max_position_embeddings": 8192,
"rope_theta": 500000,
"rope_type": "yarn"
},
"sliding_window": 4096,
"tie_word_embeddings": false,
"transformers_version": "5.2.0",
"use_cache": true,
"vocab_size": 100278
}
|