{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "96b34bd1", "metadata": {}, "outputs": [], "source": [ "import os\n", "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "import xgboost as xgb\n", "import sklearn" ] }, { "cell_type": "code", "execution_count": 2, "id": "37b99e9f", "metadata": {}, "outputs": [], "source": [ "model_name = 'models/xgboost_first_model.json'\n", "xgb_model1 = xgb.XGBClassifier()\n", "xgb_model1.load_model(model_name)\n", "model_name = 'models/xgboost_second_model.json'\n", "xgb_model2 = xgb.XGBClassifier()\n", "xgb_model2.load_model(model_name)\n", "model_name = 'models/xgboost_third_model.json'\n", "xgb_model3 = xgb.XGBClassifier()\n", "xgb_model3.load_model(model_name)" ] }, { "cell_type": "code", "execution_count": 3, "id": "40aa73e8", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2022\n", "2023\n", "2024\n", "2025\n" ] } ], "source": [ "m3_pred = []\n", "m2_pred = []\n", "m1_pred = []\n", "bird_list = []\n", "year_list = []\n", "dur_list = []\n", "tim_list = []\n", "fil_list = []\n", "aug_list = []\n", "\n", "for year in [2022,2023,2024,2025]:\n", "# for year in [2022,2023,2024]:\n", " print(year)\n", " birds = os.listdir(f'/mnt/d/acoustics-data/birdclef-{year}/train_audio')\n", " for bird in birds:\n", " file = f'/mnt/d/acoustics-data/processed/birdclef-{year}/{bird}_features.npy'\n", " try:\n", " X = np.load(file,)\n", " n = X.shape[0]\n", " bird_list += [bird for _ in range(n)]\n", " year_list += [year for _ in range(n)]\n", " y_pred_proba = xgb_model1.predict_proba(X)[:,1]\n", " m1_pred += list(y_pred_proba)\n", " y_pred_proba = xgb_model2.predict_proba(X)[:,1]\n", " m2_pred += list(y_pred_proba)\n", " y_pred_proba = xgb_model3.predict_proba(X)[:,1]\n", " m3_pred += list(y_pred_proba)\n", " tab = pd.read_csv(f'/mnt/d/acoustics-data/processed/birdclef-{year}/{bird}_slices.csv')\n", " dur_list += tab['duration'].tolist()\n", " tim_list += tab['start_time'].tolist()\n", " fil_list += tab['file_name'].tolist()\n", " aug_list += tab['is_augmented'].tolist()\n", " except:\n", " pass" ] }, { "cell_type": "code", "execution_count": 4, "id": "876d3800", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | bird | \n", "year | \n", "model_1 | \n", "model_2 | \n", "model_3 | \n", "duration | \n", "time | \n", "file_name | \n", "is_augmented | \n", "
|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "afrsil1 | \n", "2022 | \n", "0.520478 | \n", "0.992990 | \n", "0.997792 | \n", "3.5004 | \n", "0.0 | \n", "XC207432.ogg | \n", "0 | \n", "
| 1 | \n", "afrsil1 | \n", "2022 | \n", "0.512172 | \n", "0.992990 | \n", "0.998557 | \n", "3.5004 | \n", "0.0 | \n", "XC207432.ogg | \n", "1 | \n", "
| 2 | \n", "afrsil1 | \n", "2022 | \n", "0.374894 | \n", "0.985676 | \n", "0.996787 | \n", "3.5004 | \n", "0.0 | \n", "XC207432.ogg | \n", "1 | \n", "
| 3 | \n", "afrsil1 | \n", "2022 | \n", "0.348209 | \n", "0.987380 | \n", "0.994968 | \n", "3.5004 | \n", "0.0 | \n", "XC207432.ogg | \n", "1 | \n", "
| 4 | \n", "afrsil1 | \n", "2022 | \n", "0.469575 | \n", "0.984070 | \n", "0.995817 | \n", "3.5004 | \n", "0.0 | \n", "XC207432.ogg | \n", "1 | \n", "