Spaces:

saherPervaiz
/

ModelTrain

Running

App Files Files Community

saherPervaiz commited on Jan 12, 2025

Commit

a536b1b

verified ·

1 Parent(s): ceaabd5

Update app.py

Browse files

Files changed (1) hide show

app.py +56 -19

app.py CHANGED Viewed

@@ -9,6 +9,7 @@ from sklearn.neighbors import KNeighborsClassifier
 from sklearn.tree import DecisionTreeClassifier
 from sklearn.naive_bayes import GaussianNB
 from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
 import matplotlib.pyplot as plt
 import seaborn as sns
@@ -28,13 +29,65 @@ if uploaded_file is not None:
     if df.empty:
         st.warning("The dataset is empty. Please upload a valid CSV file.")
     else:
         target = st.selectbox("Select Target Variable", df.columns)
         features = [col for col in df.columns if col != target]
         X = df[features]
         y = df[target]
-        # Determine if the target is continuous or categorical
-        is_classification = y.dtype == 'object' or len(y.unique()) <= 10  # If target is categorical or has few unique values, treat as classification
         # Ensure there is enough data before proceeding with train-test split
         if len(X) == 0 or len(y) == 0:
@@ -164,20 +217,4 @@ if uploaded_file is not None:
             table_data = metrics_df.values
             table_columns = metrics_df.columns.tolist()
-            table = ax.table(cellText=table_data, colLabels=table_columns, loc='center', cellLoc='center', colLoc='center')
-            table.auto_set_font_size(False)
-            table.set_fontsize(10)
-            table.scale(1.2, 1.2)  # Adjust the scale for better appearance
-            # Save the table as a PNG file
-            png_file = "model_report.png"
-            fig.savefig(png_file, bbox_inches='tight', dpi=300)
-            # Provide a download button for the PNG file
-            with open(png_file, "rb") as file:
-                st.download_button(
-                    label="Download as PNG",
-                    data=file,
-                    file_name="model_report.png",
-                    mime="image/png"
-                )

 from sklearn.tree import DecisionTreeClassifier
 from sklearn.naive_bayes import GaussianNB
 from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
+import numpy as np
 import matplotlib.pyplot as plt
 import seaborn as sns
     if df.empty:
         st.warning("The dataset is empty. Please upload a valid CSV file.")
     else:
+        # Handle Null Values (Missing Data)
+        st.write("Handling Missing (Null) Values:")
+        # Option to drop rows with null values or fill them
+        fill_method = st.selectbox("Choose how to handle missing values", ["Drop rows", "Fill with mean/median"])
+        if fill_method == "Drop rows":
+            df = df.dropna()
+        elif fill_method == "Fill with mean/median":
+            for col in df.columns:
+                if df[col].dtype in ['float64', 'int64']:
+                    df[col].fillna(df[col].mean(), inplace=True)  # For numeric columns, fill with mean
+                else:
+                    df[col].fillna(df[col].mode()[0], inplace=True)  # For categorical columns, fill with mode
+        # Handle Outliers using IQR method
+        st.write("Handling Outliers:")
+        # Define function to remove outliers using IQR
+        def remove_outliers_iqr(dataframe):
+            Q1 = dataframe.quantile(0.25)
+            Q3 = dataframe.quantile(0.75)
+            IQR = Q3 - Q1
+            # Filter out rows that are outside the IQR range
+            return dataframe[~((dataframe < (Q1 - 1.5 * IQR)) | (dataframe > (Q3 + 1.5 * IQR))).any(axis=1)]
+        # Remove outliers from the numerical columns
+        df = remove_outliers_iqr(df)
+        # Handle Extreme Values by Capping (Winsorization)
+        st.write("Handling Extreme Values (Capping):")
+        def cap_extreme_values(dataframe):
+            for col in dataframe.select_dtypes(include=[np.number]).columns:
+                # Define the thresholds for extreme values (95th percentile and 5th percentile)
+                lower_limit = dataframe[col].quantile(0.05)
+                upper_limit = dataframe[col].quantile(0.95)
+                # Cap the extreme values
+                dataframe[col] = np.clip(dataframe[col], lower_limit, upper_limit)
+            return dataframe
+        df = cap_extreme_values(df)
+        # Show cleaned dataset
+        st.write("Cleaned Dataset:")
+        st.dataframe(df)
         target = st.selectbox("Select Target Variable", df.columns)
         features = [col for col in df.columns if col != target]
         X = df[features]
         y = df[target]
+        # Label Encoding for categorical columns
+        label_encoder = LabelEncoder()
+        # Encode the target variable (if it's categorical)
+        if y.dtype == 'object' or len(y.unique()) <= 10:  # If the target variable is categorical
+            y = label_encoder.fit_transform(y)
+        # Encode categorical feature columns (if any)
+        for col in X.columns:
+            if X[col].dtype == 'object' or len(X[col].unique()) <= 10:  # If the column is categorical
+                X[col] = label_encoder.fit_transform(X[col])
         # Ensure there is enough data before proceeding with train-test split
         if len(X) == 0 or len(y) == 0:
             table_data = metrics_df.values
             table_columns = metrics_df.columns.tolist()
+            table = ax.table(cellText=table_data, colLabels=table_columns, loc='center', cellLoc='center',