Spaces:
Sleeping
Sleeping
Commit ·
1e50c8e
1
Parent(s): 49d9cc7
update eda and req
Browse files- notebooks/02_eda.ipynb +23 -3
- requirements.txt +5 -3
- src/utils/helpers.py +6 -0
notebooks/02_eda.ipynb
CHANGED
|
@@ -10,13 +10,25 @@
|
|
| 10 |
},
|
| 11 |
{
|
| 12 |
"cell_type": "code",
|
| 13 |
-
"execution_count":
|
| 14 |
"id": "548992be",
|
| 15 |
"metadata": {},
|
| 16 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
"source": [
|
| 18 |
"import matplotlib.pyplot as plt\n",
|
| 19 |
-
"import seaborn as sns"
|
| 20 |
]
|
| 21 |
},
|
| 22 |
{
|
|
@@ -108,7 +120,15 @@
|
|
| 108 |
"name": "python3"
|
| 109 |
},
|
| 110 |
"language_info": {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
"name": "python",
|
|
|
|
|
|
|
| 112 |
"version": "3.12.12"
|
| 113 |
}
|
| 114 |
},
|
|
|
|
| 10 |
},
|
| 11 |
{
|
| 12 |
"cell_type": "code",
|
| 13 |
+
"execution_count": 1,
|
| 14 |
"id": "548992be",
|
| 15 |
"metadata": {},
|
| 16 |
+
"outputs": [
|
| 17 |
+
{
|
| 18 |
+
"ename": "ModuleNotFoundError",
|
| 19 |
+
"evalue": "No module named 'seaborn'",
|
| 20 |
+
"output_type": "error",
|
| 21 |
+
"traceback": [
|
| 22 |
+
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
|
| 23 |
+
"\u001b[31mModuleNotFoundError\u001b[39m Traceback (most recent call last)",
|
| 24 |
+
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[1]\u001b[39m\u001b[32m, line 2\u001b[39m\n\u001b[32m 1\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mmatplotlib\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mpyplot\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mplt\u001b[39;00m\n\u001b[32m----> \u001b[39m\u001b[32m2\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mseaborn\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01msns\u001b[39;00m \n",
|
| 25 |
+
"\u001b[31mModuleNotFoundError\u001b[39m: No module named 'seaborn'"
|
| 26 |
+
]
|
| 27 |
+
}
|
| 28 |
+
],
|
| 29 |
"source": [
|
| 30 |
"import matplotlib.pyplot as plt\n",
|
| 31 |
+
"import seaborn as sns "
|
| 32 |
]
|
| 33 |
},
|
| 34 |
{
|
|
|
|
| 120 |
"name": "python3"
|
| 121 |
},
|
| 122 |
"language_info": {
|
| 123 |
+
"codemirror_mode": {
|
| 124 |
+
"name": "ipython",
|
| 125 |
+
"version": 3
|
| 126 |
+
},
|
| 127 |
+
"file_extension": ".py",
|
| 128 |
+
"mimetype": "text/x-python",
|
| 129 |
"name": "python",
|
| 130 |
+
"nbconvert_exporter": "python",
|
| 131 |
+
"pygments_lexer": "ipython3",
|
| 132 |
"version": "3.12.12"
|
| 133 |
}
|
| 134 |
},
|
requirements.txt
CHANGED
|
@@ -1,3 +1,5 @@
|
|
| 1 |
-
python=3.
|
| 2 |
-
|
| 3 |
-
|
|
|
|
|
|
|
|
|
| 1 |
+
# python==3.12
|
| 2 |
+
pandas==3.0.1
|
| 3 |
+
matplotlib==3.10.8
|
| 4 |
+
seaborn==0.13.2
|
| 5 |
+
scikit-learn==1.8.0
|
src/utils/helpers.py
CHANGED
|
@@ -10,6 +10,12 @@ def _check_and_balance(df: pd.DataFrame, target_col: str = "target", random_stat
|
|
| 10 |
"""Return a balanced dataframe by undersampling majority classes to the minority count.
|
| 11 |
|
| 12 |
If the dataframe is already balanced (all classes equal), it's returned unchanged.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
"""
|
| 14 |
counts = df[target_col].value_counts()
|
| 15 |
if counts.nunique() == 1:
|
|
|
|
| 10 |
"""Return a balanced dataframe by undersampling majority classes to the minority count.
|
| 11 |
|
| 12 |
If the dataframe is already balanced (all classes equal), it's returned unchanged.
|
| 13 |
+
Args:
|
| 14 |
+
df (pd.DataFrame): The input dataframe to balance.
|
| 15 |
+
target_col (str, optional): The name of the target column. Defaults to "target".
|
| 16 |
+
random_state (int, optional): Random state for reproducibility. Defaults to 42.
|
| 17 |
+
Returns:
|
| 18 |
+
pd.DataFrame: A balanced dataframe.
|
| 19 |
"""
|
| 20 |
counts = df[target_col].value_counts()
|
| 21 |
if counts.nunique() == 1:
|