BrendaTellez commited on
Commit
76b33ae
·
1 Parent(s): c0c221d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -15
app.py CHANGED
@@ -1,23 +1,25 @@
1
  import torch
 
2
  from transformers import Wav2Vec2ForCTC, Wav2Vec2Tokenizer
3
 
4
- # Get user input for the text
5
- text = input("Enter a sentence to classify the sound: ")
 
 
6
 
7
- # Get the name of the sound classification model
8
- model_name = "BrendaTellez/SoundClassification"
9
- print("Using sound classification model:", model_name)
10
 
11
- # Load the model and tokenizer from the Hugging Face Model Hub
12
- api_token = "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"
13
- model = Wav2Vec2ForCTC.from_pretrained(model_name, use_auth_token=True, token=api_token)
14
- tokenizer = Wav2Vec2Tokenizer.from_pretrained(model_name, use_auth_token=True, token=api_token)
15
 
16
- # Load the audio file
17
- audio_file = r"/kaggle/input/environmental-sound-classification-50/audio/audio/1-101296-A-19.wav"
 
 
18
 
19
- # Tokenize the audio file
20
- inputs = tokenizer(audio_file, return_tensors="pt", padding=True)
21
 
22
  # Use the model to classify the audio
23
  with torch.no_grad():
@@ -26,5 +28,4 @@ with torch.no_grad():
26
  predicted_class_id = torch.argmax(logits, dim=-1)
27
  predicted_class_label = tokenizer.decode(predicted_class_id[0])
28
 
29
- # Print the predicted class label for the user to see
30
- print("The predicted sound class for the sentence", text, "is:", predicted_class_label)
 
1
  import torch
2
+ import torchaudio
3
  from transformers import Wav2Vec2ForCTC, Wav2Vec2Tokenizer
4
 
5
+ # Load the pre-trained model and tokenizer
6
+ model_name = "BrendaTellez/SoundClassificationCNNRNN"
7
+ model = Wav2Vec2ForCTC.from_pretrained(model_name)
8
+ tokenizer = Wav2Vec2Tokenizer.from_pretrained(model_name)
9
 
10
+ # Get the audio file from the user
11
+ file_path = input("Enter the path to the audio file: ")
 
12
 
13
+ # Load the audio file using torchaudio
14
+ waveform, sample_rate = torchaudio.load(file_path)
 
 
15
 
16
+ # Resample the audio to match the sample rate expected by the model
17
+ if waveform.shape[0] != model.config.sample_rate:
18
+ resampler = torchaudio.transforms.Resample(waveform.shape[1], model.config.sample_rate)
19
+ waveform = resampler(waveform)
20
 
21
+ # Tokenize the audio using the model's tokenizer
22
+ inputs = tokenizer(waveform.numpy(), return_tensors="pt", padding=True)
23
 
24
  # Use the model to classify the audio
25
  with torch.no_grad():
 
28
  predicted_class_id = torch.argmax(logits, dim=-1)
29
  predicted_class_label = tokenizer.decode(predicted_class_id[0])
30
 
31
+ print(f"The audio file is classified as: {predicted_class_label}")