release: hfstd v2 stabilization and inference suite
Browse files- README.md +1 -1
- config.json +1 -1
- configuration_neurocoder.py +3 -1
- modeling_neurocoder.py +1 -1
README.md
CHANGED
|
@@ -27,6 +27,6 @@ model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True)
|
|
| 27 |
|
| 28 |
prompt = "Generate a landing page for marketing agency titled Velocity Landing"
|
| 29 |
inputs = tokenizer(prompt, return_tensors="pt")
|
| 30 |
-
outputs = model.generate(**inputs, max_new_tokens=220, temperature=0.7, do_sample=True)
|
| 31 |
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
|
| 32 |
```
|
|
|
|
| 27 |
|
| 28 |
prompt = "Generate a landing page for marketing agency titled Velocity Landing"
|
| 29 |
inputs = tokenizer(prompt, return_tensors="pt")
|
| 30 |
+
outputs = model.generate(**inputs, max_new_tokens=220, temperature=0.7, do_sample=True, use_cache=False)
|
| 31 |
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
|
| 32 |
```
|
config.json
CHANGED
|
@@ -23,7 +23,7 @@
|
|
| 23 |
"num_heads": 8,
|
| 24 |
"num_layers": 8,
|
| 25 |
"pad_token_id": 0,
|
| 26 |
-
"
|
| 27 |
"unk_token_id": 3,
|
| 28 |
"vocab_size": 1714
|
| 29 |
}
|
|
|
|
| 23 |
"num_heads": 8,
|
| 24 |
"num_layers": 8,
|
| 25 |
"pad_token_id": 0,
|
| 26 |
+
"router_top_k": 2,
|
| 27 |
"unk_token_id": 3,
|
| 28 |
"vocab_size": 1714
|
| 29 |
}
|
configuration_neurocoder.py
CHANGED
|
@@ -18,6 +18,7 @@ class NeuroCoderConfig(PretrainedConfig):
|
|
| 18 |
ffn_multiplier: int = 4,
|
| 19 |
moe_every_n_layers: int = 2,
|
| 20 |
num_experts: int = 8,
|
|
|
|
| 21 |
top_k: int = 2,
|
| 22 |
capacity_factor_train: float = 1.25,
|
| 23 |
capacity_factor_infer: float = 1.0,
|
|
@@ -38,7 +39,8 @@ class NeuroCoderConfig(PretrainedConfig):
|
|
| 38 |
self.ffn_multiplier = ffn_multiplier
|
| 39 |
self.moe_every_n_layers = moe_every_n_layers
|
| 40 |
self.num_experts = num_experts
|
| 41 |
-
|
|
|
|
| 42 |
self.capacity_factor_train = capacity_factor_train
|
| 43 |
self.capacity_factor_infer = capacity_factor_infer
|
| 44 |
self.dropout = dropout
|
|
|
|
| 18 |
ffn_multiplier: int = 4,
|
| 19 |
moe_every_n_layers: int = 2,
|
| 20 |
num_experts: int = 8,
|
| 21 |
+
router_top_k: int | None = None,
|
| 22 |
top_k: int = 2,
|
| 23 |
capacity_factor_train: float = 1.25,
|
| 24 |
capacity_factor_infer: float = 1.0,
|
|
|
|
| 39 |
self.ffn_multiplier = ffn_multiplier
|
| 40 |
self.moe_every_n_layers = moe_every_n_layers
|
| 41 |
self.num_experts = num_experts
|
| 42 |
+
# Keep MoE router top-k separate from generation top_k to avoid HF generation warnings.
|
| 43 |
+
self.router_top_k = router_top_k if router_top_k is not None else top_k
|
| 44 |
self.capacity_factor_train = capacity_factor_train
|
| 45 |
self.capacity_factor_infer = capacity_factor_infer
|
| 46 |
self.dropout = dropout
|
modeling_neurocoder.py
CHANGED
|
@@ -74,7 +74,7 @@ class MoEFeedForward(nn.Module):
|
|
| 74 |
def __init__(self, config: NeuroCoderConfig) -> None:
|
| 75 |
super().__init__()
|
| 76 |
self.num_experts = config.num_experts
|
| 77 |
-
self.top_k = config.
|
| 78 |
self.capacity_factor_train = config.capacity_factor_train
|
| 79 |
self.capacity_factor_infer = config.capacity_factor_infer
|
| 80 |
self.router = nn.Linear(config.hidden_size, config.num_experts, bias=False)
|
|
|
|
| 74 |
def __init__(self, config: NeuroCoderConfig) -> None:
|
| 75 |
super().__init__()
|
| 76 |
self.num_experts = config.num_experts
|
| 77 |
+
self.top_k = config.router_top_k
|
| 78 |
self.capacity_factor_train = config.capacity_factor_train
|
| 79 |
self.capacity_factor_infer = config.capacity_factor_infer
|
| 80 |
self.router = nn.Linear(config.hidden_size, config.num_experts, bias=False)
|