Spaces:
Runtime error
Runtime error
| import os | |
| import torch | |
| import torchaudio | |
| from transformers import Wav2Vec2ForCTC, Wav2Vec2Tokenizer | |
| # Set the Hugging Face API token | |
| os.environ["HUGGINGFACE_TOKEN"] = "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" | |
| # Load the pre-trained model and tokenizer | |
| model_name = "BrendaTellez/sounds2" | |
| model = Wav2Vec2ForCTC.from_pretrained(model_name, use_auth_token=True) | |
| tokenizer = Wav2Vec2Tokenizer.from_pretrained(model_name, use_auth_token=True) | |
| # Get the audio file from the user | |
| file_path = input("Enter the path to the audio file: ") | |
| # Load the audio file using torchaudio | |
| waveform, sample_rate = torchaudio.load(file_path) | |
| # Resample the audio to match the sample rate expected by the model | |
| if waveform.shape[0] != model.config.sample_rate: | |
| resampler = torchaudio.transforms.Resample(waveform.shape[1], model.config.sample_rate) | |
| waveform = resampler(waveform) | |
| # Tokenize the audio using the model's tokenizer | |
| inputs = tokenizer(waveform.numpy(), return_tensors="pt", padding=True) | |
| # Use the model to classify the audio | |
| with torch.no_grad(): | |
| logits = model(inputs.input_values, attention_mask=inputs.attention_mask).logits | |
| predicted_class_id = torch.argmax(logits, dim=-1) | |
| predicted_class_label = tokenizer.decode(predicted_class_id[0]) | |
| print(f"The audio file is classified as: {predicted_class_label}") | |