Spaces:

BrendaTellez
/

SoundClassification

Runtime error

BrendaTellez commited on Feb 13, 2023

Commit

76b33ae

1 Parent(s): c0c221d

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,23 +1,25 @@
 import torch
 from transformers import Wav2Vec2ForCTC, Wav2Vec2Tokenizer
-# Get user input for the text
-text = input("Enter a sentence to classify the sound: ")
-# Get the name of the sound classification model
-model_name = "BrendaTellez/SoundClassification"
-print("Using sound classification model:", model_name)
-# Load the model and tokenizer from the Hugging Face Model Hub
-api_token = "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"
-model = Wav2Vec2ForCTC.from_pretrained(model_name, use_auth_token=True, token=api_token)
-tokenizer = Wav2Vec2Tokenizer.from_pretrained(model_name, use_auth_token=True, token=api_token)
-# Load the audio file
-audio_file = r"/kaggle/input/environmental-sound-classification-50/audio/audio/1-101296-A-19.wav"
-# Tokenize the audio file
-inputs = tokenizer(audio_file, return_tensors="pt", padding=True)
 # Use the model to classify the audio
 with torch.no_grad():
@@ -26,5 +28,4 @@ with torch.no_grad():
 predicted_class_id = torch.argmax(logits, dim=-1)
 predicted_class_label = tokenizer.decode(predicted_class_id[0])
-# Print the predicted class label for the user to see
-print("The predicted sound class for the sentence", text, "is:", predicted_class_label)

 import torch
+import torchaudio
 from transformers import Wav2Vec2ForCTC, Wav2Vec2Tokenizer
+# Load the pre-trained model and tokenizer
+model_name = "BrendaTellez/SoundClassificationCNNRNN"
+model = Wav2Vec2ForCTC.from_pretrained(model_name)
+tokenizer = Wav2Vec2Tokenizer.from_pretrained(model_name)
+# Get the audio file from the user
+file_path = input("Enter the path to the audio file: ")
+# Load the audio file using torchaudio
+waveform, sample_rate = torchaudio.load(file_path)
+# Resample the audio to match the sample rate expected by the model
+if waveform.shape[0] != model.config.sample_rate:
+    resampler = torchaudio.transforms.Resample(waveform.shape[1], model.config.sample_rate)
+    waveform = resampler(waveform)
+# Tokenize the audio using the model's tokenizer
+inputs = tokenizer(waveform.numpy(), return_tensors="pt", padding=True)
 # Use the model to classify the audio
 with torch.no_grad():
 predicted_class_id = torch.argmax(logits, dim=-1)
 predicted_class_label = tokenizer.decode(predicted_class_id[0])
+print(f"The audio file is classified as: {predicted_class_label}")