{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "84427819", "metadata": { "execution": { "iopub.execute_input": "2026-02-23T11:34:27.597639Z", "iopub.status.busy": "2026-02-23T11:34:27.597226Z", "iopub.status.idle": "2026-02-23T11:34:44.131586Z", "shell.execute_reply": "2026-02-23T11:34:44.130498Z" }, "papermill": { "duration": 16.540574, "end_time": "2026-02-23T11:34:44.133901", "exception": false, "start_time": "2026-02-23T11:34:27.593327", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: librosa in /usr/local/lib/python3.12/dist-packages (0.11.0)\r\n", "Requirement already satisfied: audioread>=2.1.9 in /usr/local/lib/python3.12/dist-packages (from librosa) (3.1.0)\r\n", "Requirement already satisfied: numba>=0.51.0 in /usr/local/lib/python3.12/dist-packages (from librosa) (0.60.0)\r\n", "Requirement already satisfied: numpy>=1.22.3 in /usr/local/lib/python3.12/dist-packages (from librosa) (2.0.2)\r\n", "Requirement already satisfied: scipy>=1.6.0 in /usr/local/lib/python3.12/dist-packages (from librosa) (1.16.3)\r\n", "Requirement already satisfied: scikit-learn>=1.1.0 in /usr/local/lib/python3.12/dist-packages (from librosa) (1.6.1)\r\n", "Requirement already satisfied: joblib>=1.0 in /usr/local/lib/python3.12/dist-packages (from librosa) (1.5.3)\r\n", "Requirement already satisfied: decorator>=4.3.0 in /usr/local/lib/python3.12/dist-packages (from librosa) (4.4.2)\r\n", "Requirement already satisfied: soundfile>=0.12.1 in /usr/local/lib/python3.12/dist-packages (from librosa) (0.13.1)\r\n", "Requirement already satisfied: pooch>=1.1 in /usr/local/lib/python3.12/dist-packages (from librosa) (1.8.2)\r\n", "Requirement already satisfied: soxr>=0.3.2 in /usr/local/lib/python3.12/dist-packages (from librosa) (1.0.0)\r\n", "Requirement already satisfied: typing_extensions>=4.1.1 in /usr/local/lib/python3.12/dist-packages (from librosa) (4.15.0)\r\n", "Requirement already satisfied: lazy_loader>=0.1 in /usr/local/lib/python3.12/dist-packages (from librosa) (0.4)\r\n", "Requirement already satisfied: msgpack>=1.0 in /usr/local/lib/python3.12/dist-packages (from librosa) (1.1.2)\r\n", "Requirement already satisfied: packaging in /usr/local/lib/python3.12/dist-packages (from lazy_loader>=0.1->librosa) (25.0)\r\n", "Requirement already satisfied: llvmlite<0.44,>=0.43.0dev0 in /usr/local/lib/python3.12/dist-packages (from numba>=0.51.0->librosa) (0.43.0)\r\n", "Requirement already satisfied: platformdirs>=2.5.0 in /usr/local/lib/python3.12/dist-packages (from pooch>=1.1->librosa) (4.5.1)\r\n", "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.12/dist-packages (from pooch>=1.1->librosa) (2.32.4)\r\n", "Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.12/dist-packages (from scikit-learn>=1.1.0->librosa) (3.6.0)\r\n", "Requirement already satisfied: cffi>=1.0 in /usr/local/lib/python3.12/dist-packages (from soundfile>=0.12.1->librosa) (2.0.0)\r\n", "Requirement already satisfied: pycparser in /usr/local/lib/python3.12/dist-packages (from cffi>=1.0->soundfile>=0.12.1->librosa) (2.23)\r\n", "Requirement already satisfied: charset_normalizer<4,>=2 in /usr/local/lib/python3.12/dist-packages (from requests>=2.19.0->pooch>=1.1->librosa) (3.4.4)\r\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.12/dist-packages (from requests>=2.19.0->pooch>=1.1->librosa) (3.11)\r\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.12/dist-packages (from requests>=2.19.0->pooch>=1.1->librosa) (2.5.0)\r\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.12/dist-packages (from requests>=2.19.0->pooch>=1.1->librosa) (2026.1.4)\r\n", "Requirement already satisfied: soundfile in /usr/local/lib/python3.12/dist-packages (0.13.1)\r\n", "Requirement already satisfied: cffi>=1.0 in /usr/local/lib/python3.12/dist-packages (from soundfile) (2.0.0)\r\n", "Requirement already satisfied: numpy in /usr/local/lib/python3.12/dist-packages (from soundfile) (2.0.2)\r\n", "Requirement already satisfied: pycparser in /usr/local/lib/python3.12/dist-packages (from cffi>=1.0->soundfile) (2.23)\r\n", "Collecting noisereduce\r\n", " Downloading noisereduce-3.0.3-py3-none-any.whl.metadata (14 kB)\r\n", "Requirement already satisfied: scipy in /usr/local/lib/python3.12/dist-packages (from noisereduce) (1.16.3)\r\n", "Requirement already satisfied: matplotlib in /usr/local/lib/python3.12/dist-packages (from noisereduce) (3.10.0)\r\n", "Requirement already satisfied: numpy in /usr/local/lib/python3.12/dist-packages (from noisereduce) (2.0.2)\r\n", "Requirement already satisfied: tqdm in /usr/local/lib/python3.12/dist-packages (from noisereduce) (4.67.1)\r\n", "Requirement already satisfied: joblib in /usr/local/lib/python3.12/dist-packages (from noisereduce) (1.5.3)\r\n", "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.12/dist-packages (from matplotlib->noisereduce) (1.3.3)\r\n", "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.12/dist-packages (from matplotlib->noisereduce) (0.12.1)\r\n", "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.12/dist-packages (from matplotlib->noisereduce) (4.61.1)\r\n", "Requirement already satisfied: kiwisolver>=1.3.1 in /usr/local/lib/python3.12/dist-packages (from matplotlib->noisereduce) (1.4.9)\r\n", "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.12/dist-packages (from matplotlib->noisereduce) (25.0)\r\n", "Requirement already satisfied: pillow>=8 in /usr/local/lib/python3.12/dist-packages (from matplotlib->noisereduce) (11.3.0)\r\n", "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.12/dist-packages (from matplotlib->noisereduce) (3.3.1)\r\n", "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.12/dist-packages (from matplotlib->noisereduce) (2.9.0.post0)\r\n", "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.12/dist-packages (from python-dateutil>=2.7->matplotlib->noisereduce) (1.17.0)\r\n", "Downloading noisereduce-3.0.3-py3-none-any.whl (22 kB)\r\n", "Installing collected packages: noisereduce\r\n", "Successfully installed noisereduce-3.0.3\r\n", "Requirement already satisfied: pandas in /usr/local/lib/python3.12/dist-packages (2.3.3)\r\n", "Requirement already satisfied: numpy>=1.26.0 in /usr/local/lib/python3.12/dist-packages (from pandas) (2.0.2)\r\n", "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.12/dist-packages (from pandas) (2.9.0.post0)\r\n", "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.12/dist-packages (from pandas) (2025.2)\r\n", "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.12/dist-packages (from pandas) (2025.3)\r\n", "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.12/dist-packages (from python-dateutil>=2.8.2->pandas) (1.17.0)\r\n" ] } ], "source": [ "!pip install librosa\n", "!pip install soundfile\n", "!pip install noisereduce\n", "!pip install pandas" ] }, { "cell_type": "code", "execution_count": 2, "id": "8bff4c02", "metadata": { "execution": { "iopub.execute_input": "2026-02-23T11:34:44.140482Z", "iopub.status.busy": "2026-02-23T11:34:44.139825Z", "iopub.status.idle": "2026-02-23T11:34:49.825022Z", "shell.execute_reply": "2026-02-23T11:34:49.824211Z" }, "papermill": { "duration": 5.690644, "end_time": "2026-02-23T11:34:49.826991", "exception": false, "start_time": "2026-02-23T11:34:44.136347", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "import os\n", "import numpy as np\n", "import librosa\n", "import soundfile as sf\n", "import noisereduce as nr\n", "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 3, "id": "4dfbf6ff", "metadata": { "execution": { "iopub.execute_input": "2026-02-23T11:34:49.833688Z", "iopub.status.busy": "2026-02-23T11:34:49.832979Z", "iopub.status.idle": "2026-02-23T11:34:49.841222Z", "shell.execute_reply": "2026-02-23T11:34:49.840274Z" }, "papermill": { "duration": 0.013487, "end_time": "2026-02-23T11:34:49.842894", "exception": false, "start_time": "2026-02-23T11:34:49.829407", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# Load audio\n", "def load_audio(file_path, sr=16000):\n", " audio, sample_rate = librosa.load(file_path, sr=sr)\n", " return audio, sample_rate\n", "\n", "# Bandpass filter for breathing range\n", "from scipy.signal import butter, lfilter\n", "\n", "def butter_bandpass(lowcut, highcut, fs, order=5):\n", " nyq = 0.5 * fs\n", " low = lowcut / nyq\n", " high = highcut / nyq\n", " b, a = butter(order, [low, high], btype='band')\n", " return b, a\n", "\n", "def bandpass_filter(data, lowcut=100, highcut=2000, fs=16000):\n", " b, a = butter_bandpass(lowcut, highcut, fs)\n", " y = lfilter(b, a, data)\n", " return y\n", "\n", "# Noise reduction\n", "def noise_suppression(audio, sr):\n", " return nr.reduce_noise(y=audio, sr=sr, prop_decrease=1.0)\n", "\n", "# Full preprocessing pipeline\n", "def preprocess_audio(file_path):\n", " audio, sr = load_audio(file_path)\n", " filtered = bandpass_filter(audio, fs=sr)\n", " cleaned = noise_suppression(filtered, sr)\n", " return cleaned, sr" ] }, { "cell_type": "code", "execution_count": 4, "id": "1d0977cb", "metadata": { "execution": { "iopub.execute_input": "2026-02-23T11:34:49.848932Z", "iopub.status.busy": "2026-02-23T11:34:49.848226Z", "iopub.status.idle": "2026-02-23T11:36:15.739658Z", "shell.execute_reply": "2026-02-23T11:36:15.738213Z" }, "papermill": { "duration": 85.89757, "end_time": "2026-02-23T11:36:15.742632", "exception": false, "start_time": "2026-02-23T11:34:49.845062", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Preprocessing complete. Audio files zipped at /kaggle/working/cleaned_audio.zip\n" ] } ], "source": [ "import os\n", "import pandas as pd\n", "import soundfile as sf\n", "from zipfile import ZipFile\n", "\n", "input_dir = \"/kaggle/input/datasets/liamgraphics/simulated-ward-dataset\"\n", "output_dir = \"/kaggle/working/cleaned_audio/\"\n", "os.makedirs(output_dir, exist_ok=True)\n", "\n", "metadata = []\n", "\n", "for file_name in os.listdir(input_dir):\n", " if file_name.endswith(\".wav\"):\n", " file_path = os.path.join(input_dir, file_name)\n", " cleaned_audio, sr = preprocess_audio(file_path) # your preprocessing function\n", " \n", " output_path = os.path.join(output_dir, file_name)\n", " sf.write(output_path, cleaned_audio, sr)\n", " \n", " metadata.append({\n", " \"file_name\": file_name,\n", " \"sample_rate\": sr,\n", " \"length_sec\": len(cleaned_audio)/sr\n", " })\n", "\n", "# Save metadata CSV (optional, outside ZIP)\n", "metadata_df = pd.DataFrame(metadata)\n", "metadata_csv_path = os.path.join(output_dir, \"metadata.csv\")\n", "metadata_df.to_csv(metadata_csv_path, index=False)\n", "\n", "# Create ZIP file containing only .wav files\n", "zip_path = \"/kaggle/working/cleaned_audio.zip\"\n", "with ZipFile(zip_path, 'w') as zipf:\n", " for file_name in os.listdir(output_dir):\n", " if file_name.endswith(\".wav\"): # only include audio files\n", " file_full_path = os.path.join(output_dir, file_name)\n", " zipf.write(file_full_path, arcname=file_name)\n", "\n", "print(f\"Preprocessing complete. Audio files zipped at {zip_path}\")" ] } ], "metadata": { "kaggle": { "accelerator": "none", "dataSources": [ { "databundleVersionId": 15774559, "datasetId": 9539684, "sourceId": 14909052, "sourceType": "datasetVersion" } ], "dockerImageVersionId": 31286, "isGpuEnabled": false, "isInternetEnabled": true, "language": "python", "sourceType": "notebook" }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.12" }, "papermill": { "default_parameters": {}, "duration": 112.882623, "end_time": "2026-02-23T11:36:17.369657", "environment_variables": {}, "exception": null, "input_path": "__notebook__.ipynb", "output_path": "__notebook__.ipynb", "parameters": {}, "start_time": "2026-02-23T11:34:24.487034", "version": "2.6.0" } }, "nbformat": 4, "nbformat_minor": 5 }