| | --- |
| | license: mit |
| | language: |
| | - en |
| | library_name: transformers |
| | --- |
| | |
| | ``` |
| | |
| | |
| | print('Add Audio...') |
| | #Add Head |
| | # Combine pre-trained encoder and pre-trained decoder to form a Seq2Seq model |
| | _AudioFeatureExtractor = AutoFeatureExtractor.from_pretrained("openai/whisper-small") |
| | _AudioTokenizer = AutoTokenizer.from_pretrained("openai/whisper-small") |
| | _SpeechEncoderDecoder = SpeechEncoderDecoderModel.from_encoder_decoder_pretrained("openai/whisper-small","openai/whisper-small") |
| | |
| | # Add Pad tokems |
| | _SpeechEncoderDecoder.config.decoder_start_token_id = _AudioTokenizer.cls_token_id |
| | _SpeechEncoderDecoder.config.pad_token_id = _AudioTokenizer.pad_token_id |
| | LM_MODEL.SpeechEncoderDecoder = _SpeechEncoderDecoder |
| | # Add Sub Components |
| | LM_MODEL.Decoder_AudioTokenizer = _AudioTokenizer |
| | LM_MODEL.Encoder_AudioFeatureExtractor = _AudioFeatureExtractor |
| | LM_MODEL |
| | |
| | ``` |