Heartbeat / app.py
Sankie005's picture
initial commit
d8548c0 verified
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import librosa
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
# Custom dataset
class AudioDataset(Dataset):
def __init__(self, audio_paths, labels):
self.audio_paths = audio_paths
self.labels = labels
def __len__(self):
return len(self.audio_paths)
def __getitem__(self, idx):
audio_path = self.audio_paths[idx]
# Load audio file using librosa
audio, sr = librosa.load(audio_path, sr=16000, mono=True)
# Extract MFCCs
mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=40)
# Pad or truncate to ensure consistent shape
if mfccs.shape[1] > 100:
mfccs = mfccs[:, :100]
else:
mfccs = np.pad(mfccs, ((0, 0), (0, 100 - mfccs.shape[1])), mode='constant')
# Convert to torch tensor
mfccs_tensor = torch.FloatTensor(mfccs).unsqueeze(0) # Add channel dimension
# Convert label to Long tensor
label = torch.LongTensor([self.labels[idx]])[0]
return mfccs_tensor, label
# CNN Model
class AudioClassifier(nn.Module):
def __init__(self, num_classes):
super(AudioClassifier, self).__init__()
self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
self.pool = nn.MaxPool2d(2, 2)
self.fc1 = nn.Linear(128 * 5 * 12, 128)
self.fc2 = nn.Linear(128, num_classes)
self.relu = nn.ReLU()
self.dropout = nn.Dropout(0.5)
def forward(self, x):
x = self.pool(self.relu(self.conv1(x)))
x = self.pool(self.relu(self.conv2(x)))
x = self.pool(self.relu(self.conv3(x)))
x = x.view(-1, 128 * 5 * 12)
x = self.relu(self.fc1(x))
x = self.dropout(x)
x = self.fc2(x)
return x
model=AudioClassifier(2)
model.load_state_dict(torch.load('audio_classifier_model_2.pth',map_location=torch.device('cpu')))
model.eval()
k=(np.load(file="label_encoder_classes.npy",allow_pickle=True))
classes=k
print(k)
def classify_audio(audio_file):
# Load and preprocess the audio
audio, sr = librosa.load(audio_file, sr=16000, mono=True)
mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=40)
if mfccs.shape[1] > 100:
mfccs = mfccs[:, :100]
else:
mfccs = np.pad(mfccs, ((0, 0), (0, 100 - mfccs.shape[1])), mode='constant')
# Convert to torch tensor
mfccs_tensor = torch.FloatTensor(mfccs).unsqueeze(0).unsqueeze(0)
# Make prediction
with torch.no_grad():
outputs = model(mfccs_tensor)
_, predicted = torch.max(outputs, 1)
# Convert prediction to label
predicted_label = predicted.item()
return classes[predicted_label]
import gradio as gr
iface = gr.Interface(
fn=classify_audio,
inputs=gr.Audio(type="filepath"),
outputs="text",
title="💓 Heartbeat",
description=(
"Upload an audio file of a baby's heartbeat to check for abnormalities.<br>"
"Trained on: Bhaskaran, A., & Arora, M. (2022). Indian Institute of Science Fetal Heart Sound Database (IIScFHSDB) (version 1.0). PhysioNet. https://doi.org/10.13026/9vvw-cx05.<br>"
"Original publication: Amrutha, B; Sidhesh Kumar, J; George, S. & Arora, M. Heart rate estimation and validation algorithm for fetal phonocardiography. Physiological Measurement, 2022."
)
)
# Launch the app
iface.launch()