| import torch | |
| from transformers import AutoFeatureExtractor, WhisperForAudioClassification | |
| import librosa | |
| def get_language(audio_path): | |
| feature_extractor = AutoFeatureExtractor.from_pretrained("/home/investigacion/disco4TB/workspace_pablo/firvox_whisper_research/whisper-medium-fleurs-lang-id/lang_identification_models_noFirVox_audios") | |
| model = WhisperForAudioClassification.from_pretrained("/home/investigacion/disco4TB/workspace_pablo/firvox_whisper_research/whisper-medium-fleurs-lang-id/lang_identification_models_noFirVox_audios").to("cuda") | |
| audio, sr= librosa.load(audio_path, sr=16000) | |
| inputs = feature_extractor(audio, sampling_rate=sr, return_tensors="pt") | |
| input_features = inputs.input_features.to("cuda") | |
| with torch.no_grad(): | |
| logits = model(input_features).logits | |
| predicted_class_ids = torch.argmax(logits).item() | |
| predicted_label = model.config.id2label[predicted_class_ids] | |
| return predicted_label |