Upload asr_config.py with huggingface_hub
Browse files- asr_config.py +4 -2
asr_config.py
CHANGED
|
@@ -25,7 +25,6 @@ class ASRConfig(transformers.PretrainedConfig):
|
|
| 25 |
model_dtype: str = "bfloat16",
|
| 26 |
num_beams: Optional[int] = None,
|
| 27 |
system_prompt: str = "You are a helpful assistant.",
|
| 28 |
-
user_prompt: str = "Please transcribe this English audio into text: <audio>",
|
| 29 |
encoder_dim: Optional[int] = None,
|
| 30 |
llm_dim: Optional[int] = None,
|
| 31 |
# Encoder conv layers: list of (padding, kernel_size, stride) tuples
|
|
@@ -104,7 +103,6 @@ class ASRConfig(transformers.PretrainedConfig):
|
|
| 104 |
self.attn_implementation = attn_implementation
|
| 105 |
self.model_dtype = model_dtype
|
| 106 |
self.system_prompt = system_prompt
|
| 107 |
-
self.user_prompt = user_prompt
|
| 108 |
self.encoder_dim = encoder_dim
|
| 109 |
self.llm_dim = llm_dim
|
| 110 |
# Default conv layers for Whisper/GLM-ASR: [(pad, kernel, stride), ...]
|
|
@@ -206,6 +204,10 @@ class ASRConfig(transformers.PretrainedConfig):
|
|
| 206 |
|
| 207 |
super().__init__(**kwargs)
|
| 208 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 209 |
self.auto_map = {
|
| 210 |
"AutoConfig": "asr_config.ASRConfig",
|
| 211 |
"AutoModel": "asr_modeling.ASRModel",
|
|
|
|
| 25 |
model_dtype: str = "bfloat16",
|
| 26 |
num_beams: Optional[int] = None,
|
| 27 |
system_prompt: str = "You are a helpful assistant.",
|
|
|
|
| 28 |
encoder_dim: Optional[int] = None,
|
| 29 |
llm_dim: Optional[int] = None,
|
| 30 |
# Encoder conv layers: list of (padding, kernel_size, stride) tuples
|
|
|
|
| 103 |
self.attn_implementation = attn_implementation
|
| 104 |
self.model_dtype = model_dtype
|
| 105 |
self.system_prompt = system_prompt
|
|
|
|
| 106 |
self.encoder_dim = encoder_dim
|
| 107 |
self.llm_dim = llm_dim
|
| 108 |
# Default conv layers for Whisper/GLM-ASR: [(pad, kernel, stride), ...]
|
|
|
|
| 204 |
|
| 205 |
super().__init__(**kwargs)
|
| 206 |
|
| 207 |
+
# Point encoder to audio_config so pipeline uses correct feature extractor
|
| 208 |
+
# The pipeline looks for config.encoder._name_or_path for feature extractor
|
| 209 |
+
self.encoder = self.audio_config
|
| 210 |
+
|
| 211 |
self.auto_map = {
|
| 212 |
"AutoConfig": "asr_config.ASRConfig",
|
| 213 |
"AutoModel": "asr_modeling.ASRModel",
|