elsayedelmandoh commited on
Commit
1e50c8e
·
1 Parent(s): 49d9cc7

update eda and req

Browse files
notebooks/02_eda.ipynb CHANGED
@@ -10,13 +10,25 @@
10
  },
11
  {
12
  "cell_type": "code",
13
- "execution_count": null,
14
  "id": "548992be",
15
  "metadata": {},
16
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
17
  "source": [
18
  "import matplotlib.pyplot as plt\n",
19
- "import seaborn as sns"
20
  ]
21
  },
22
  {
@@ -108,7 +120,15 @@
108
  "name": "python3"
109
  },
110
  "language_info": {
 
 
 
 
 
 
111
  "name": "python",
 
 
112
  "version": "3.12.12"
113
  }
114
  },
 
10
  },
11
  {
12
  "cell_type": "code",
13
+ "execution_count": 1,
14
  "id": "548992be",
15
  "metadata": {},
16
+ "outputs": [
17
+ {
18
+ "ename": "ModuleNotFoundError",
19
+ "evalue": "No module named 'seaborn'",
20
+ "output_type": "error",
21
+ "traceback": [
22
+ "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
23
+ "\u001b[31mModuleNotFoundError\u001b[39m Traceback (most recent call last)",
24
+ "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[1]\u001b[39m\u001b[32m, line 2\u001b[39m\n\u001b[32m 1\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mmatplotlib\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mpyplot\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mplt\u001b[39;00m\n\u001b[32m----> \u001b[39m\u001b[32m2\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mseaborn\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01msns\u001b[39;00m \n",
25
+ "\u001b[31mModuleNotFoundError\u001b[39m: No module named 'seaborn'"
26
+ ]
27
+ }
28
+ ],
29
  "source": [
30
  "import matplotlib.pyplot as plt\n",
31
+ "import seaborn as sns "
32
  ]
33
  },
34
  {
 
120
  "name": "python3"
121
  },
122
  "language_info": {
123
+ "codemirror_mode": {
124
+ "name": "ipython",
125
+ "version": 3
126
+ },
127
+ "file_extension": ".py",
128
+ "mimetype": "text/x-python",
129
  "name": "python",
130
+ "nbconvert_exporter": "python",
131
+ "pygments_lexer": "ipython3",
132
  "version": "3.12.12"
133
  }
134
  },
requirements.txt CHANGED
@@ -1,3 +1,5 @@
1
- python=3.13
2
- pydantic=2.10.4
3
- pytest=7.4.0
 
 
 
1
+ # python==3.12
2
+ pandas==3.0.1
3
+ matplotlib==3.10.8
4
+ seaborn==0.13.2
5
+ scikit-learn==1.8.0
src/utils/helpers.py CHANGED
@@ -10,6 +10,12 @@ def _check_and_balance(df: pd.DataFrame, target_col: str = "target", random_stat
10
  """Return a balanced dataframe by undersampling majority classes to the minority count.
11
 
12
  If the dataframe is already balanced (all classes equal), it's returned unchanged.
 
 
 
 
 
 
13
  """
14
  counts = df[target_col].value_counts()
15
  if counts.nunique() == 1:
 
10
  """Return a balanced dataframe by undersampling majority classes to the minority count.
11
 
12
  If the dataframe is already balanced (all classes equal), it's returned unchanged.
13
+ Args:
14
+ df (pd.DataFrame): The input dataframe to balance.
15
+ target_col (str, optional): The name of the target column. Defaults to "target".
16
+ random_state (int, optional): Random state for reproducibility. Defaults to 42.
17
+ Returns:
18
+ pd.DataFrame: A balanced dataframe.
19
  """
20
  counts = df[target_col].value_counts()
21
  if counts.nunique() == 1: