saherPervaiz commited on
Commit
a536b1b
·
verified ·
1 Parent(s): ceaabd5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -19
app.py CHANGED
@@ -9,6 +9,7 @@ from sklearn.neighbors import KNeighborsClassifier
9
  from sklearn.tree import DecisionTreeClassifier
10
  from sklearn.naive_bayes import GaussianNB
11
  from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
 
12
  import matplotlib.pyplot as plt
13
  import seaborn as sns
14
 
@@ -28,13 +29,65 @@ if uploaded_file is not None:
28
  if df.empty:
29
  st.warning("The dataset is empty. Please upload a valid CSV file.")
30
  else:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  target = st.selectbox("Select Target Variable", df.columns)
32
  features = [col for col in df.columns if col != target]
33
  X = df[features]
34
  y = df[target]
35
 
36
- # Determine if the target is continuous or categorical
37
- is_classification = y.dtype == 'object' or len(y.unique()) <= 10 # If target is categorical or has few unique values, treat as classification
 
 
 
 
 
 
 
 
 
38
 
39
  # Ensure there is enough data before proceeding with train-test split
40
  if len(X) == 0 or len(y) == 0:
@@ -164,20 +217,4 @@ if uploaded_file is not None:
164
  table_data = metrics_df.values
165
  table_columns = metrics_df.columns.tolist()
166
 
167
- table = ax.table(cellText=table_data, colLabels=table_columns, loc='center', cellLoc='center', colLoc='center')
168
- table.auto_set_font_size(False)
169
- table.set_fontsize(10)
170
- table.scale(1.2, 1.2) # Adjust the scale for better appearance
171
-
172
- # Save the table as a PNG file
173
- png_file = "model_report.png"
174
- fig.savefig(png_file, bbox_inches='tight', dpi=300)
175
-
176
- # Provide a download button for the PNG file
177
- with open(png_file, "rb") as file:
178
- st.download_button(
179
- label="Download as PNG",
180
- data=file,
181
- file_name="model_report.png",
182
- mime="image/png"
183
- )
 
9
  from sklearn.tree import DecisionTreeClassifier
10
  from sklearn.naive_bayes import GaussianNB
11
  from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
12
+ import numpy as np
13
  import matplotlib.pyplot as plt
14
  import seaborn as sns
15
 
 
29
  if df.empty:
30
  st.warning("The dataset is empty. Please upload a valid CSV file.")
31
  else:
32
+ # Handle Null Values (Missing Data)
33
+ st.write("Handling Missing (Null) Values:")
34
+ # Option to drop rows with null values or fill them
35
+ fill_method = st.selectbox("Choose how to handle missing values", ["Drop rows", "Fill with mean/median"])
36
+ if fill_method == "Drop rows":
37
+ df = df.dropna()
38
+ elif fill_method == "Fill with mean/median":
39
+ for col in df.columns:
40
+ if df[col].dtype in ['float64', 'int64']:
41
+ df[col].fillna(df[col].mean(), inplace=True) # For numeric columns, fill with mean
42
+ else:
43
+ df[col].fillna(df[col].mode()[0], inplace=True) # For categorical columns, fill with mode
44
+
45
+ # Handle Outliers using IQR method
46
+ st.write("Handling Outliers:")
47
+ # Define function to remove outliers using IQR
48
+ def remove_outliers_iqr(dataframe):
49
+ Q1 = dataframe.quantile(0.25)
50
+ Q3 = dataframe.quantile(0.75)
51
+ IQR = Q3 - Q1
52
+ # Filter out rows that are outside the IQR range
53
+ return dataframe[~((dataframe < (Q1 - 1.5 * IQR)) | (dataframe > (Q3 + 1.5 * IQR))).any(axis=1)]
54
+
55
+ # Remove outliers from the numerical columns
56
+ df = remove_outliers_iqr(df)
57
+
58
+ # Handle Extreme Values by Capping (Winsorization)
59
+ st.write("Handling Extreme Values (Capping):")
60
+ def cap_extreme_values(dataframe):
61
+ for col in dataframe.select_dtypes(include=[np.number]).columns:
62
+ # Define the thresholds for extreme values (95th percentile and 5th percentile)
63
+ lower_limit = dataframe[col].quantile(0.05)
64
+ upper_limit = dataframe[col].quantile(0.95)
65
+ # Cap the extreme values
66
+ dataframe[col] = np.clip(dataframe[col], lower_limit, upper_limit)
67
+ return dataframe
68
+
69
+ df = cap_extreme_values(df)
70
+
71
+ # Show cleaned dataset
72
+ st.write("Cleaned Dataset:")
73
+ st.dataframe(df)
74
+
75
  target = st.selectbox("Select Target Variable", df.columns)
76
  features = [col for col in df.columns if col != target]
77
  X = df[features]
78
  y = df[target]
79
 
80
+ # Label Encoding for categorical columns
81
+ label_encoder = LabelEncoder()
82
+
83
+ # Encode the target variable (if it's categorical)
84
+ if y.dtype == 'object' or len(y.unique()) <= 10: # If the target variable is categorical
85
+ y = label_encoder.fit_transform(y)
86
+
87
+ # Encode categorical feature columns (if any)
88
+ for col in X.columns:
89
+ if X[col].dtype == 'object' or len(X[col].unique()) <= 10: # If the column is categorical
90
+ X[col] = label_encoder.fit_transform(X[col])
91
 
92
  # Ensure there is enough data before proceeding with train-test split
93
  if len(X) == 0 or len(y) == 0:
 
217
  table_data = metrics_df.values
218
  table_columns = metrics_df.columns.tolist()
219
 
220
+ table = ax.table(cellText=table_data, colLabels=table_columns, loc='center', cellLoc='center',