File size: 1,648 Bytes
0d4a2b8
 
99f4202
0d4a2b8
 
 
 
 
99f4202
0d4a2b8
99f4202
0d4a2b8
99f4202
0d4a2b8
99f4202
 
 
0d4a2b8
99f4202
 
 
0d4a2b8
99f4202
 
 
0d4a2b8
99f4202
 
 
0d4a2b8
99f4202
 
 
0d4a2b8
99f4202
 
 
0d4a2b8
99f4202
 
 
0d4a2b8
99f4202
 
 
0d4a2b8
 
99f4202
 
0d4a2b8
99f4202
 
 
0d4a2b8
 
99f4202
 
 
 
 
 
 
0d4a2b8
99f4202
 
 
 
 
0d4a2b8
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
{
  "architectures": [
    "Olmo3ForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "bos_token_id": null,
  "dtype": "bfloat16",
  "eos_token_id": 100257,
  "hidden_act": "silu",
  "hidden_size": 4096,
  "initializer_range": 0.02,
  "intermediate_size": 11008,
  "layer_types": [
    "sliding_attention",
    "sliding_attention",
    "sliding_attention",
    "full_attention",
    "sliding_attention",
    "sliding_attention",
    "sliding_attention",
    "full_attention",
    "sliding_attention",
    "sliding_attention",
    "sliding_attention",
    "full_attention",
    "sliding_attention",
    "sliding_attention",
    "sliding_attention",
    "full_attention",
    "sliding_attention",
    "sliding_attention",
    "sliding_attention",
    "full_attention",
    "sliding_attention",
    "sliding_attention",
    "sliding_attention",
    "full_attention",
    "sliding_attention",
    "sliding_attention",
    "sliding_attention",
    "full_attention",
    "sliding_attention",
    "sliding_attention",
    "sliding_attention",
    "full_attention"
  ],
  "max_position_embeddings": 65536,
  "model_type": "olmo3",
  "num_attention_heads": 32,
  "num_hidden_layers": 32,
  "num_key_value_heads": 32,
  "pad_token_id": 100277,
  "rms_norm_eps": 1e-06,
  "rope_parameters": {
    "attention_factor": 1.2079441541679836,
    "beta_fast": 32,
    "beta_slow": 1,
    "factor": 8.0,
    "original_max_position_embeddings": 8192,
    "rope_theta": 500000,
    "rope_type": "yarn"
  },
  "sliding_window": 4096,
  "tie_word_embeddings": false,
  "transformers_version": "5.2.0",
  "use_cache": true,
  "vocab_size": 100278
}