{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "from sklearn.feature_selection import mutual_info_classif\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "data = pd.read_csv('pcos_cleaned.csv')\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "y = data[\"PCOS (Y/N)\"]\n", "X = data.drop([\"PCOS (Y/N)\"], axis=1)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Feature Mutual Information\n", "38 Follicle No. (R) 0.240107\n", "37 Follicle No. (L) 0.198132\n", "33 Fast food (Y/N) 0.095965\n", "29 hair growth(Y/N) 0.094711\n", "30 Skin darkening (Y/N) 0.094472\n", "28 Weight gain(Y/N) 0.091420\n", "10 Cycle length(days) 0.074662\n", "23 AMH(ng/mL) 0.066603\n", "18 FSH/LH 0.065068\n", "24 PRL(ng/mL) 0.061647\n", "9 Cycle(R/I) 0.052702\n", "13 No. of abortions 0.028979\n", "7 RR (breaths/min) 0.028374\n", "20 Waist(inch) 0.026092\n", "12 Pregnant(Y/N) 0.024060\n", "32 Pimples(Y/N) 0.023784\n", "39 Avg. F size (L) (mm) 0.022989\n", "22 TSH (mIU/L) 0.022234\n", "31 Hair loss(Y/N) 0.019978\n", "40 Avg. F size (R) (mm) 0.019886\n", "16 FSH(mIU/mL) 0.019688\n", "1 Age (yrs) 0.019323\n", "0 Unnamed: 0 0.017659\n", "17 LH(mIU/mL) 0.017577\n", "4 BMI 0.015053\n", "25 Vit D3 (ng/mL) 0.014276\n", "6 Pulse rate(bpm) 0.013627\n", "34 Reg.Exercise(Y/N) 0.009540\n", "36 BP _Diastolic (mmHg) 0.008657\n", "14 I beta-HCG(mIU/mL) 0.007298\n", "35 BP _Systolic (mmHg) 0.006151\n", "41 Endometrium (mm) 0.004497\n", "3 Height(Cm) 0.000000\n", "5 Blood Group 0.000000\n", "8 Hb(g/dl) 0.000000\n", "2 Weight (Kg) 0.000000\n", "11 Marraige Status (Yrs) 0.000000\n", "15 II beta-HCG(mIU/mL) 0.000000\n", "19 Hip(inch) 0.000000\n", "21 Waist:Hip Ratio 0.000000\n", "27 RBS(mg/dl) 0.000000\n", "26 PRG(ng/mL) 0.000000\n" ] } ], "source": [ "# Calculate Mutual Information\n", "mi = mutual_info_classif(X, y)\n", "\n", "# Create a DataFrame to show feature importance\n", "mi_df = pd.DataFrame({'Feature': X.columns, 'Mutual Information': mi})\n", "\n", "# Sort features by mutual information value\n", "mi_df = mi_df.sort_values(by='Mutual Information', ascending=False)\n", "\n", "print(mi_df)\n" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " PCOS (Y/N) Follicle No. (R) Follicle No. (L) Skin darkening (Y/N) \\\n", "0 0 3 3 0 \n", "1 0 5 3 0 \n", "2 1 15 13 0 \n", "3 0 2 2 0 \n", "4 0 4 3 0 \n", "\n", " hair growth(Y/N) Weight gain(Y/N) Cycle length(days) AMH(ng/mL) \\\n", "0 0 0 5 2.07 \n", "1 0 0 5 1.53 \n", "2 0 0 5 6.63 \n", "3 0 0 5 1.22 \n", "4 0 0 5 2.26 \n", "\n", " Fast food (Y/N) Cycle(R/I) FSH/LH PRL(ng/mL) Pimples(Y/N) Age (yrs) \\\n", "0 1.0 0 2.160326 45.16 0 28 \n", "1 0.0 0 6.174312 20.09 0 36 \n", "2 1.0 0 6.295455 10.52 1 33 \n", "3 0.0 0 3.415254 36.90 0 37 \n", "4 0.0 0 4.422222 30.09 0 25 \n", "\n", " BMI \n", "0 19.3 \n", "1 24.9 \n", "2 25.3 \n", "3 29.7 \n", "4 20.1 \n" ] } ], "source": [ "pcos_df = pd.read_csv('pcos_cleaned.csv')\n", "pcos_df.columns = pcos_df.columns.str.strip() # Remove any leading/trailing whitespace\n", "selected_features = ['PCOS (Y/N)', 'Follicle No. (R)', 'Follicle No. (L)', \n", " 'Skin darkening (Y/N)', 'hair growth(Y/N)', 'Weight gain(Y/N)', \n", " 'Cycle length(days)', 'AMH(ng/mL)', 'Fast food (Y/N)', \n", " 'Cycle(R/I)', 'FSH/LH', 'PRL(ng/mL)', 'Pimples(Y/N)', \n", " 'Age (yrs)', 'BMI']\n", "\n", "new_dataset = pcos_df[selected_features]\n", "\n", "# Save the new dataset to a CSV file if needed\n", "new_dataset.to_csv('new_pcos_dataset.csv', index=False)\n", "\n", "# Display the new dataset\n", "print(new_dataset.head())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.7" } }, "nbformat": 4, "nbformat_minor": 2 }