shangeth
/

SpeechLLM

Feature Extraction

Model card Files Files and versions

shangeth commited on Jun 4, 2024

Commit

54ac8cb

·

verified ·

1 Parent(s): 1098244

Upload model

Files changed (3) hide show

README.md +7 -7
config.json +1 -6
config.py +9 -0

README.md CHANGED Viewed

@@ -1,19 +1,19 @@
 ---
-library_name: transformers
 language:
 - en
-metrics:
-- wer
 license: apache-2.0
-datasets:
-- librispeech_asr
-- mozilla-foundation/common_voice_16_1
-- DynamicSuperb/EmotionalSpeechAudioClassification_RAVDESS-EmotionalSound
 tags:
 - multi-modal
 - conversational
 - speechllm
 - speech2text
 ---
 # Model Card for Model ID

 ---
 language:
 - en
 license: apache-2.0
+library_name: transformers
 tags:
 - multi-modal
 - conversational
 - speechllm
 - speech2text
+datasets:
+- librispeech_asr
+- mozilla-foundation/common_voice_16_1
+- DynamicSuperb/EmotionalSpeechAudioClassification_RAVDESS-EmotionalSound
+metrics:
+- wer
 ---
 # Model Card for Model ID

config.json CHANGED Viewed

@@ -1,14 +1,9 @@
 {
-  "architectures": [
-    "CustomModel"
-  ],
   "audio_enc_dim": 1280,
   "auto_map": {
-    "AutoConfig": "MyConfig.CustomModelConfig",
-    "AutoModel": "MyModel.CustomModel"
   },
   "llm_dim": 2048,
   "model_type": "custom_model",
-  "torch_dtype": "float32",
   "transformers_version": "4.38.2"
 }

 {
   "audio_enc_dim": 1280,
   "auto_map": {
+    "AutoConfig": "config.SpeechLLMModelConfig"
   },
   "llm_dim": 2048,
   "model_type": "custom_model",
   "transformers_version": "4.38.2"
 }

config.py ADDED Viewed

	@@ -0,0 +1,9 @@

+from transformers import PretrainedConfig
+class SpeechLLMModelConfig(PretrainedConfig):
+    model_type = "custom_model"
+    def __init__(self, audio_enc_dim=1280, llm_dim=2048, **kwargs):
+        super().__init__(**kwargs)
+        self.audio_enc_dim = audio_enc_dim
+        self.llm_dim = llm_dim