File size: 1,143 Bytes
b041a84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
{
  "alpha_init": 1.12,
  "architectures": [
    "LiquidForCausalLM"
  ],
  "attn_drop": 0.1,
  "bos_token_id": 50256,
  "chaos_scale": 0.2,
  "chead_kernal_size": 5,
  "cry_depth": 6,
  "cry_groups": 2,
  "cry_heads": 8,
  "cry_layers": 2,
  "dim": 256,
  "drop": 0.1,
  "dtype": "float32",
  "eos_token_id": 2,
  "feature_heads": 8,
  "freeze_thres": 0.5524292423753732,
  "gas_thres": 2.429242375373078e-06,
  "gnn_n_heads": 4,
  "gnn_num_layers": 4,
  "hidden": 256,
  "learn_alpha": true,
  "maha_init": 1.21,
  "max_position_embeddings": 512,
  "memory_size": 256,
  "metric": "maha_diag",
  "model_type": "liquid-former",
  "n_attn_heads": 16,
  "n_delta_windows": 5,
  "n_thought_heads": 32,
  "n_wind_size": 5,
  "num_forms": 2,
  "num_heads": 16,
  "num_hidden_layers": 1,
  "num_layers": 1,
  "pad_token_id": 0,
  "proj_drop": 0.1,
  "router_gate_heads": 16,
  "seq_init": 0.0002,
  "seq_len": 512,
  "share_kv": true,
  "t_step": 2,
  "temperature": null,
  "thermo_amp": 0.5,
  "thermo_freq": 1.1,
  "thought_lr": 0.0005,
  "thought_temp": 0.898,
  "tie_weights": true,
  "transformers_version": "4.56.1",
  "vocab_size": 50257
}