shanjiaz commited on
Commit
331eb39
·
verified ·
1 Parent(s): 8e28f55

Upload folder using huggingface_hub

Browse files
config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "DFlashDraftModel"
4
+ ],
5
+ "auto_map": {
6
+ "": "config.DFlashSpeculatorConfig"
7
+ },
8
+ "aux_hidden_state_layer_ids": null,
9
+ "base_model_ep_plan": null,
10
+ "block_size": 8,
11
+ "draft_vocab_size": 32000,
12
+ "dtype": "float32",
13
+ "has_no_defaults_at_init": false,
14
+ "num_hidden_layers": 3,
15
+ "speculators_config": {
16
+ "algorithm": "dflash",
17
+ "default_proposal_method": "greedy",
18
+ "proposal_methods": [
19
+ {
20
+ "accept_tolerance": 0.0,
21
+ "proposal_type": "greedy",
22
+ "speculative_tokens": 8,
23
+ "verifier_accept_k": 1
24
+ }
25
+ ],
26
+ "verifier": {
27
+ "architectures": [],
28
+ "name_or_path": "Qwen/Qwen3-8B"
29
+ }
30
+ },
31
+ "speculators_model_type": "dflash",
32
+ "speculators_version": "0.5.0.dev42",
33
+ "target_hidden_size": null,
34
+ "transformer_layer_config": {
35
+ "attention_bias": false,
36
+ "attention_dropout": 0.0,
37
+ "head_dim": 128,
38
+ "hidden_act": "silu",
39
+ "hidden_size": 4096,
40
+ "initializer_range": 0.02,
41
+ "intermediate_size": 12288,
42
+ "max_position_embeddings": 40960,
43
+ "mlp_bias": false,
44
+ "model_type": "llama",
45
+ "num_attention_heads": 32,
46
+ "num_hidden_layers": 3,
47
+ "num_key_value_heads": 8,
48
+ "pretraining_tp": 1,
49
+ "rms_norm_eps": 1e-06,
50
+ "rope_scaling": null,
51
+ "rope_theta": 10000.0,
52
+ "use_cache": true,
53
+ "vocab_size": 151936
54
+ },
55
+ "transformers_version": "4.57.6"
56
+ }
config.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any, Literal
2
+
3
+ from pydantic import Field, field_serializer, field_validator
4
+ from transformers import AutoConfig, PretrainedConfig
5
+ from transformers.models.qwen3.modeling_qwen3 import (
6
+ Qwen3Config,)
7
+ from speculators import SpeculatorModelConfig
8
+
9
+ __all__ = [
10
+ "DFlashSpeculatorConfig",
11
+ ]
12
+
13
+
14
+ @SpeculatorModelConfig.register("dflash")
15
+ class DFlashSpeculatorConfig(SpeculatorModelConfig):
16
+ """
17
+ Configuration for DFlash speculator with vocabulary mapping.
18
+
19
+ DFlash features vocabulary mapping between draft (64K) and target (128K)
20
+ vocabularies, enabling cross-tokenizer speculation.
21
+
22
+ :param transformer_layer_config: Configuration for the transformer decoder layer
23
+ :param draft_vocab_size: Size of draft model vocabulary for speculation
24
+ """
25
+
26
+ speculators_model_type: Literal["dflash"] = "dflash"
27
+ architectures: list[str] = Field(
28
+ default_factory=lambda: ["DFlashSpeculator"],
29
+ description="Model architectures that can load these weights",
30
+ )
31
+
32
+ transformer_layer_config: PretrainedConfig = Field(
33
+ default_factory=Qwen3Config,
34
+ description="Configuration for the transformer decoder layer",
35
+ )
36
+
37
+ draft_vocab_size: int = Field(
38
+ default=32000,
39
+ description="Size of draft model vocabulary for speculation",
40
+ )
41
+
42
+ num_hidden_layers: int = Field(
43
+ default=3,
44
+ description="Number of hidden layers in the DFlash model",
45
+ )
46
+
47
+ block_size: int = Field(
48
+ default=8,
49
+ description="Default size of the draft block predicted with a forward pass of the model",
50
+ )
51
+
52
+ target_hidden_size: int | None = Field(
53
+ default=None,
54
+ description="Hidden size of the target model (if different from draft model)",
55
+ )
56
+
57
+ aux_hidden_state_layer_ids: list[int] | None = Field(
58
+ default=None,
59
+ description="Layer IDs of the DFlash auxiliary hidden state layers",
60
+ )
61
+
62
+ @property
63
+ def target_vocab_size(self) -> int:
64
+ """Get target vocabulary size from transformer config."""
65
+ return self.transformer_layer_config.vocab_size
66
+
67
+ @field_serializer("transformer_layer_config")
68
+ def serialize_transformer_config(self, value: PretrainedConfig) -> dict:
69
+ """Serialize transformer config to dict."""
70
+ return value.to_diff_dict()
71
+
72
+ @field_validator("transformer_layer_config", mode="before")
73
+ @classmethod
74
+ def validate_transformer_config(cls, value: Any) -> PretrainedConfig:
75
+ """Validate and convert transformer config."""
76
+ if isinstance(value, dict):
77
+ config_class: type[PretrainedConfig] = Qwen3Config
78
+ if "model_type" in value:
79
+ config_class = AutoConfig.for_model(
80
+ model_type=value["model_type"]
81
+ ).__class__
82
+ return config_class(**value)
83
+ return value
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63cdfb13d15389fc64609cd223556193e297239ae8ce59f4f67007e46211af58
3
+ size 3027718256
optimizer_state_dict.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0701b4ec3e9223cea605caf32f5da186fdd1ed01f6cdefb173e9f0bf1811919a
3
+ size 2516750833
scheduler_state_dict.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0430ec9e2832e07e2cb1a3b53f1419af7a1c67481b8d007f8cac7e111cdd5cc
3
+ size 1531