tsingla98 commited on
Commit
47bd780
·
verified ·
1 Parent(s): d574535

Upload FrawdLLMForCausalLM

Browse files
Files changed (2) hide show
  1. config.json +3 -0
  2. hf_wrapper.py +5 -0
config.json CHANGED
@@ -11,10 +11,13 @@
11
  "dropout": 0.1,
12
  "dtype": "float32",
13
  "eos_token_id": 3,
 
14
  "model_type": "frawdllm",
15
  "n_embd": 768,
16
  "n_head": 12,
17
  "n_layer": 12,
 
 
18
  "pad_token_id": 0,
19
  "transformers_version": "4.57.3",
20
  "use_rmsnorm": false,
 
11
  "dropout": 0.1,
12
  "dtype": "float32",
13
  "eos_token_id": 3,
14
+ "hidden_size": 768,
15
  "model_type": "frawdllm",
16
  "n_embd": 768,
17
  "n_head": 12,
18
  "n_layer": 12,
19
+ "num_attention_heads": 12,
20
+ "num_hidden_layers": 12,
21
  "pad_token_id": 0,
22
  "transformers_version": "4.57.3",
23
  "use_rmsnorm": false,
hf_wrapper.py CHANGED
@@ -50,6 +50,11 @@ class FrawdLLMConfig(PretrainedConfig):
50
  self.use_rmsnorm = use_rmsnorm
51
  self.use_swiglu = use_swiglu
52
 
 
 
 
 
 
53
  super().__init__(
54
  pad_token_id=pad_token_id,
55
  bos_token_id=bos_token_id,
 
50
  self.use_rmsnorm = use_rmsnorm
51
  self.use_swiglu = use_swiglu
52
 
53
+ # Aliases for HuggingFace compatibility
54
+ self.num_hidden_layers = n_layer
55
+ self.hidden_size = n_embd
56
+ self.num_attention_heads = n_head
57
+
58
  super().__init__(
59
  pad_token_id=pad_token_id,
60
  bos_token_id=bos_token_id,