mazesmazes commited on
Commit
3e44e14
·
verified ·
1 Parent(s): f847d27

Training in progress - step 1000

Browse files
Files changed (4) hide show
  1. asr_config.py +0 -8
  2. config.json +0 -1
  3. model.safetensors +1 -1
  4. projectors.py +0 -7
asr_config.py CHANGED
@@ -50,13 +50,6 @@ class ASRConfig(transformers.PretrainedConfig):
50
  projector_pool_stride: int = 4,
51
  downsample_rate: int = 5, # Granite default
52
  projector_hidden_dim: Optional[int] = None,
53
- # Projector dropout — applied between activation and the second
54
- # linear in MLPAudioProjector. Matches Granite-Speech 4.1's
55
- # Q-Former dropout (hidden_dropout_prob=0.1) used in its frozen-
56
- # encoder + LoRA-LLM training stage. Default 0.0 for backward
57
- # compatibility with existing checkpoints; experiment configs
58
- # opt in to 0.1.
59
- projector_dropout: float = 0.0,
60
  projector_type: str = "mlp", # "mlp", "mosa", "moe", "qformer"
61
  # MoE-specific configuration
62
  num_experts: int = 4, # Number of experts in MoE projectors
@@ -123,7 +116,6 @@ class ASRConfig(transformers.PretrainedConfig):
123
  self.projector_pool_stride = projector_pool_stride
124
  self.downsample_rate = downsample_rate
125
  self.projector_hidden_dim = projector_hidden_dim
126
- self.projector_dropout = projector_dropout
127
  self.projector_type = projector_type
128
  # MoE-specific configuration
129
  self.num_experts = num_experts
 
50
  projector_pool_stride: int = 4,
51
  downsample_rate: int = 5, # Granite default
52
  projector_hidden_dim: Optional[int] = None,
 
 
 
 
 
 
 
53
  projector_type: str = "mlp", # "mlp", "mosa", "moe", "qformer"
54
  # MoE-specific configuration
55
  num_experts: int = 4, # Number of experts in MoE projectors
 
116
  self.projector_pool_stride = projector_pool_stride
117
  self.downsample_rate = downsample_rate
118
  self.projector_hidden_dim = projector_hidden_dim
 
119
  self.projector_type = projector_type
120
  # MoE-specific configuration
121
  self.num_experts = num_experts
config.json CHANGED
@@ -262,7 +262,6 @@
262
  "pad_token_id": 151643,
263
  "pipeline_tag": "automatic-speech-recognition",
264
  "pretrained_model_path": "mazesmazes/tiny-audio-next",
265
- "projector_dropout": 0.1,
266
  "projector_hidden_dim": 2048,
267
  "projector_pool_stride": 4,
268
  "projector_type": "mlp",
 
262
  "pad_token_id": 151643,
263
  "pipeline_tag": "automatic-speech-recognition",
264
  "pretrained_model_path": "mazesmazes/tiny-audio-next",
 
265
  "projector_hidden_dim": 2048,
266
  "projector_pool_stride": 4,
267
  "projector_type": "mlp",
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8e6a3611eaa107e17233a3b4b06d53d1bcd0076b0bd76b436bac9cf4156a6db0
3
  size 2433494416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa0caab16d99a3c3d7ef0289332b36eee96b9a3602c1e02f8d2bf2c9f38e7b21
3
  size 2433494416
projectors.py CHANGED
@@ -55,12 +55,6 @@ class MLPAudioProjector(nn.Module):
55
  self.norm = LlamaRMSNorm(hidden_dim, eps=1e-6)
56
  self.norm.weight.data.fill_(self._NORM_INIT)
57
  self.act = nn.GELU()
58
- # Dropout matches Granite-Speech 4.1's Q-Former hidden_dropout_prob=0.1
59
- # in its frozen-encoder modality-alignment stage — the closest
60
- # published precedent for our regime. Default 0.0 in config means
61
- # nn.Dropout(0.0) is a no-op for existing experiments.
62
- projector_dropout = float(getattr(config, "projector_dropout", 0.0))
63
- self.dropout = nn.Dropout(projector_dropout)
64
  self.linear_2 = nn.Linear(hidden_dim, llm_dim, bias=False)
65
  # Output norm aligns the projector's RMS with the LM's embed_tokens
66
  # distribution. See _NORM_INIT comment above for the magnitude
@@ -86,7 +80,6 @@ class MLPAudioProjector(nn.Module):
86
  x = self.linear_1(x)
87
  x = self.norm(x)
88
  x = self.act(x)
89
- x = self.dropout(x)
90
  x = self.linear_2(x)
91
  return self.norm_2(x)
92
 
 
55
  self.norm = LlamaRMSNorm(hidden_dim, eps=1e-6)
56
  self.norm.weight.data.fill_(self._NORM_INIT)
57
  self.act = nn.GELU()
 
 
 
 
 
 
58
  self.linear_2 = nn.Linear(hidden_dim, llm_dim, bias=False)
59
  # Output norm aligns the projector's RMS with the LM's embed_tokens
60
  # distribution. See _NORM_INIT comment above for the magnitude
 
80
  x = self.linear_1(x)
81
  x = self.norm(x)
82
  x = self.act(x)
 
83
  x = self.linear_2(x)
84
  return self.norm_2(x)
85