Sharjeelbaig commited on
Commit
83305e4
·
verified ·
1 Parent(s): 6fc8a82

release: hfstd v2 stabilization and inference suite

Browse files
README.md CHANGED
@@ -27,6 +27,6 @@ model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True)
27
 
28
  prompt = "Generate a landing page for marketing agency titled Velocity Landing"
29
  inputs = tokenizer(prompt, return_tensors="pt")
30
- outputs = model.generate(**inputs, max_new_tokens=220, temperature=0.7, do_sample=True)
31
  print(tokenizer.decode(outputs[0], skip_special_tokens=True))
32
  ```
 
27
 
28
  prompt = "Generate a landing page for marketing agency titled Velocity Landing"
29
  inputs = tokenizer(prompt, return_tensors="pt")
30
+ outputs = model.generate(**inputs, max_new_tokens=220, temperature=0.7, do_sample=True, use_cache=False)
31
  print(tokenizer.decode(outputs[0], skip_special_tokens=True))
32
  ```
config.json CHANGED
@@ -23,7 +23,7 @@
23
  "num_heads": 8,
24
  "num_layers": 8,
25
  "pad_token_id": 0,
26
- "top_k": 2,
27
  "unk_token_id": 3,
28
  "vocab_size": 1714
29
  }
 
23
  "num_heads": 8,
24
  "num_layers": 8,
25
  "pad_token_id": 0,
26
+ "router_top_k": 2,
27
  "unk_token_id": 3,
28
  "vocab_size": 1714
29
  }
configuration_neurocoder.py CHANGED
@@ -18,6 +18,7 @@ class NeuroCoderConfig(PretrainedConfig):
18
  ffn_multiplier: int = 4,
19
  moe_every_n_layers: int = 2,
20
  num_experts: int = 8,
 
21
  top_k: int = 2,
22
  capacity_factor_train: float = 1.25,
23
  capacity_factor_infer: float = 1.0,
@@ -38,7 +39,8 @@ class NeuroCoderConfig(PretrainedConfig):
38
  self.ffn_multiplier = ffn_multiplier
39
  self.moe_every_n_layers = moe_every_n_layers
40
  self.num_experts = num_experts
41
- self.top_k = top_k
 
42
  self.capacity_factor_train = capacity_factor_train
43
  self.capacity_factor_infer = capacity_factor_infer
44
  self.dropout = dropout
 
18
  ffn_multiplier: int = 4,
19
  moe_every_n_layers: int = 2,
20
  num_experts: int = 8,
21
+ router_top_k: int | None = None,
22
  top_k: int = 2,
23
  capacity_factor_train: float = 1.25,
24
  capacity_factor_infer: float = 1.0,
 
39
  self.ffn_multiplier = ffn_multiplier
40
  self.moe_every_n_layers = moe_every_n_layers
41
  self.num_experts = num_experts
42
+ # Keep MoE router top-k separate from generation top_k to avoid HF generation warnings.
43
+ self.router_top_k = router_top_k if router_top_k is not None else top_k
44
  self.capacity_factor_train = capacity_factor_train
45
  self.capacity_factor_infer = capacity_factor_infer
46
  self.dropout = dropout
modeling_neurocoder.py CHANGED
@@ -74,7 +74,7 @@ class MoEFeedForward(nn.Module):
74
  def __init__(self, config: NeuroCoderConfig) -> None:
75
  super().__init__()
76
  self.num_experts = config.num_experts
77
- self.top_k = config.top_k
78
  self.capacity_factor_train = config.capacity_factor_train
79
  self.capacity_factor_infer = config.capacity_factor_infer
80
  self.router = nn.Linear(config.hidden_size, config.num_experts, bias=False)
 
74
  def __init__(self, config: NeuroCoderConfig) -> None:
75
  super().__init__()
76
  self.num_experts = config.num_experts
77
+ self.top_k = config.router_top_k
78
  self.capacity_factor_train = config.capacity_factor_train
79
  self.capacity_factor_infer = config.capacity_factor_infer
80
  self.router = nn.Linear(config.hidden_size, config.num_experts, bias=False)