mazesmazes commited on
Commit
e76797a
·
verified ·
1 Parent(s): 3e23818

Upload asr_config.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. asr_config.py +4 -2
asr_config.py CHANGED
@@ -25,7 +25,6 @@ class ASRConfig(transformers.PretrainedConfig):
25
  model_dtype: str = "bfloat16",
26
  num_beams: Optional[int] = None,
27
  system_prompt: str = "You are a helpful assistant.",
28
- user_prompt: str = "Please transcribe this English audio into text: <audio>",
29
  encoder_dim: Optional[int] = None,
30
  llm_dim: Optional[int] = None,
31
  # Encoder conv layers: list of (padding, kernel_size, stride) tuples
@@ -104,7 +103,6 @@ class ASRConfig(transformers.PretrainedConfig):
104
  self.attn_implementation = attn_implementation
105
  self.model_dtype = model_dtype
106
  self.system_prompt = system_prompt
107
- self.user_prompt = user_prompt
108
  self.encoder_dim = encoder_dim
109
  self.llm_dim = llm_dim
110
  # Default conv layers for Whisper/GLM-ASR: [(pad, kernel, stride), ...]
@@ -206,6 +204,10 @@ class ASRConfig(transformers.PretrainedConfig):
206
 
207
  super().__init__(**kwargs)
208
 
 
 
 
 
209
  self.auto_map = {
210
  "AutoConfig": "asr_config.ASRConfig",
211
  "AutoModel": "asr_modeling.ASRModel",
 
25
  model_dtype: str = "bfloat16",
26
  num_beams: Optional[int] = None,
27
  system_prompt: str = "You are a helpful assistant.",
 
28
  encoder_dim: Optional[int] = None,
29
  llm_dim: Optional[int] = None,
30
  # Encoder conv layers: list of (padding, kernel_size, stride) tuples
 
103
  self.attn_implementation = attn_implementation
104
  self.model_dtype = model_dtype
105
  self.system_prompt = system_prompt
 
106
  self.encoder_dim = encoder_dim
107
  self.llm_dim = llm_dim
108
  # Default conv layers for Whisper/GLM-ASR: [(pad, kernel, stride), ...]
 
204
 
205
  super().__init__(**kwargs)
206
 
207
+ # Point encoder to audio_config so pipeline uses correct feature extractor
208
+ # The pipeline looks for config.encoder._name_or_path for feature extractor
209
+ self.encoder = self.audio_config
210
+
211
  self.auto_map = {
212
  "AutoConfig": "asr_config.ASRConfig",
213
  "AutoModel": "asr_modeling.ASRModel",