| | import os
|
| | import pandas as pd
|
| | import numpy as np
|
| | import torch
|
| | import torch.nn as nn
|
| | import torch.optim as optim
|
| | from torch.utils.data import Dataset, DataLoader
|
| | import librosa
|
| | from sklearn.model_selection import train_test_split
|
| | from sklearn.preprocessing import LabelEncoder
|
| |
|
| |
|
| | class AudioDataset(Dataset):
|
| | def __init__(self, audio_paths, labels):
|
| | self.audio_paths = audio_paths
|
| | self.labels = labels
|
| |
|
| | def __len__(self):
|
| | return len(self.audio_paths)
|
| |
|
| | def __getitem__(self, idx):
|
| | audio_path = self.audio_paths[idx]
|
| |
|
| |
|
| | audio, sr = librosa.load(audio_path, sr=16000, mono=True)
|
| |
|
| |
|
| | mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=40)
|
| |
|
| |
|
| | if mfccs.shape[1] > 100:
|
| | mfccs = mfccs[:, :100]
|
| | else:
|
| | mfccs = np.pad(mfccs, ((0, 0), (0, 100 - mfccs.shape[1])), mode='constant')
|
| |
|
| |
|
| | mfccs_tensor = torch.FloatTensor(mfccs).unsqueeze(0)
|
| |
|
| |
|
| | label = torch.LongTensor([self.labels[idx]])[0]
|
| |
|
| | return mfccs_tensor, label
|
| |
|
| |
|
| | class AudioClassifier(nn.Module):
|
| | def __init__(self, num_classes):
|
| | super(AudioClassifier, self).__init__()
|
| | self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
|
| | self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
|
| | self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
|
| | self.pool = nn.MaxPool2d(2, 2)
|
| | self.fc1 = nn.Linear(128 * 5 * 12, 128)
|
| | self.fc2 = nn.Linear(128, num_classes)
|
| | self.relu = nn.ReLU()
|
| | self.dropout = nn.Dropout(0.5)
|
| |
|
| | def forward(self, x):
|
| | x = self.pool(self.relu(self.conv1(x)))
|
| | x = self.pool(self.relu(self.conv2(x)))
|
| | x = self.pool(self.relu(self.conv3(x)))
|
| | x = x.view(-1, 128 * 5 * 12)
|
| | x = self.relu(self.fc1(x))
|
| | x = self.dropout(x)
|
| | x = self.fc2(x)
|
| | return x
|
| | model=AudioClassifier(2)
|
| | model.load_state_dict(torch.load('audio_classifier_model_2.pth',map_location=torch.device('cpu')))
|
| | model.eval()
|
| | k=(np.load(file="label_encoder_classes.npy",allow_pickle=True))
|
| | classes=k
|
| | print(k)
|
| |
|
| | def classify_audio(audio_file):
|
| |
|
| | audio, sr = librosa.load(audio_file, sr=16000, mono=True)
|
| | mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=40)
|
| | if mfccs.shape[1] > 100:
|
| | mfccs = mfccs[:, :100]
|
| | else:
|
| | mfccs = np.pad(mfccs, ((0, 0), (0, 100 - mfccs.shape[1])), mode='constant')
|
| |
|
| |
|
| | mfccs_tensor = torch.FloatTensor(mfccs).unsqueeze(0).unsqueeze(0)
|
| |
|
| |
|
| | with torch.no_grad():
|
| | outputs = model(mfccs_tensor)
|
| | _, predicted = torch.max(outputs, 1)
|
| |
|
| |
|
| | predicted_label = predicted.item()
|
| |
|
| | return classes[predicted_label]
|
| | import gradio as gr
|
| | iface = gr.Interface(
|
| | fn=classify_audio,
|
| | inputs=gr.Audio(type="filepath"),
|
| | outputs="text",
|
| | title="💓 Heartbeat",
|
| | description=(
|
| | "Upload an audio file of a baby's heartbeat to check for abnormalities.<br>"
|
| | "Trained on: Bhaskaran, A., & Arora, M. (2022). Indian Institute of Science Fetal Heart Sound Database (IIScFHSDB) (version 1.0). PhysioNet. https://doi.org/10.13026/9vvw-cx05.<br>"
|
| | "Original publication: Amrutha, B; Sidhesh Kumar, J; George, S. & Arora, M. Heart rate estimation and validation algorithm for fetal phonocardiography. Physiological Measurement, 2022."
|
| | )
|
| | )
|
| |
|
| |
|
| | iface.launch() |