saherPervaiz commited on
Commit
488e1b9
·
verified ·
1 Parent(s): b0fc3db

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -60
app.py CHANGED
@@ -1,46 +1,26 @@
1
  import streamlit as st
2
  import pandas as pd
3
- from utils.data_cleaning import handle_missing_values, remove_outliers_iqr, cap_extreme_values
4
- from utils.model_training import encode_categorical, train_and_evaluate
5
- from utils.visualizations import plot_correlation_heatmap, plot_model_performance, save_plot_as_png
6
- from io import BytesIO
7
- from utils.data_cleaning import handle_missing_values, remove_outliers_iqr, cap_extreme_values
8
- from utils.model_training import encode_categorical, train_and_evaluate
9
- from utils.visualizations import plot_correlation_heatmap, plot_model_performance, save_plot_as_png
10
- # Streamlit app title
11
- st.title("Model Training with Outlier Removal, Metrics, and Correlation Heatmap")
12
 
13
  # File uploader
 
14
  uploaded_file = st.file_uploader("Choose a CSV file", type=["csv"])
15
 
16
  if uploaded_file is not None:
17
- # Read the uploaded CSV file
18
  df = pd.read_csv(uploaded_file)
19
 
20
- # Display the dataset
21
  st.write("Dataset:")
22
  st.dataframe(df)
23
 
24
- # Convert categorical (str) data to numerical
25
- df = encode_categorical(df)
26
-
27
- # Handle missing values
28
- st.write("Handling Missing (Null) Values:")
29
- fill_method = st.selectbox("Choose how to handle missing values", ["Drop rows", "Fill with mean/median"])
30
- df = handle_missing_values(df, method=fill_method)
31
-
32
- # Remove outliers using the IQR method
33
- st.write("Removing Outliers Using IQR:")
34
- numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns
35
- for col in numeric_cols:
36
- df = remove_outliers_iqr(df, col)
37
-
38
- # Capping Extreme Values (based on 5% and 95% percentiles)
39
- st.write("Handling Extreme Values (Capping):")
40
  df = cap_extreme_values(df)
41
 
42
- # Display dataset after cleaning
43
- st.write("Dataset After Cleaning:")
44
  st.dataframe(df)
45
 
46
  # Add clean data download option
@@ -66,40 +46,18 @@ if uploaded_file is not None:
66
  mime="image/png"
67
  )
68
 
69
- # Select target variable
70
  target = st.selectbox("Select Target Variable", df.columns)
71
  features = [col for col in df.columns if col != target]
72
  X = df[features]
73
  y = df[target]
74
 
75
- # Model Training and Evaluation
76
- if len(y.unique()) > 1: # Ensure the target variable has at least two unique classes/values
77
- model_type = 'classification' if y.dtype == 'object' or len(y.unique()) <= 10 else 'regression'
78
- st.subheader(f"{model_type.title()} Model Training")
79
-
80
- train_size = st.slider("Select Training Size", min_value=0.1, max_value=0.9, value=0.8)
81
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1-train_size, random_state=42)
82
-
83
- metrics = train_and_evaluate(X_train, X_test, y_train, y_test, model_type=model_type)
84
-
85
- # Displaying model performance metrics
86
- metrics_df = pd.DataFrame(metrics)
87
- st.subheader(f"{model_type.title()} Model Performance Metrics")
88
  st.dataframe(metrics_df)
89
-
90
- # Model performance graph
91
- st.subheader(f"{model_type.title()} Model Performance Bar Graph")
92
- performance_graph = plot_model_performance(metrics_df)
93
- st.pyplot(performance_graph)
94
-
95
- # Save performance graph as PNG
96
- performance_graph_buf = save_plot_as_png(performance_graph)
97
- st.download_button(
98
- label=f"Download {model_type.title()} Performance Graph as PNG",
99
- data=performance_graph_buf,
100
- file_name=f"{model_type}_performance_graph.png",
101
- mime="image/png"
102
- )
103
-
104
- else:
105
- st.error("The target variable must contain at least two unique values for classification or regression.")
 
1
  import streamlit as st
2
  import pandas as pd
3
+ from data_cleaning import handle_missing_values, remove_outliers_iqr, cap_extreme_values
4
+ from model_training import train_classification_model, train_regression_model
5
+ from visualization import plot_correlation_heatmap, save_plot_as_png
 
 
 
 
 
 
6
 
7
  # File uploader
8
+ st.title("Model Training with Metrics and Correlation Heatmap")
9
  uploaded_file = st.file_uploader("Choose a CSV file", type=["csv"])
10
 
11
  if uploaded_file is not None:
 
12
  df = pd.read_csv(uploaded_file)
13
 
14
+ # Show the dataset
15
  st.write("Dataset:")
16
  st.dataframe(df)
17
 
18
+ # Clean data: Missing values, outliers, and extreme values
19
+ df = handle_missing_values(df)
20
+ df = remove_outliers_iqr(df)
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  df = cap_extreme_values(df)
22
 
23
+ st.write("Cleaned Dataset:")
 
24
  st.dataframe(df)
25
 
26
  # Add clean data download option
 
46
  mime="image/png"
47
  )
48
 
49
+ # Target and features selection
50
  target = st.selectbox("Select Target Variable", df.columns)
51
  features = [col for col in df.columns if col != target]
52
  X = df[features]
53
  y = df[target]
54
 
55
+ # Train and evaluate models
56
+ if y.dtype == 'object' or len(y.unique()) <= 10: # Classification
57
+ st.subheader("Classification Model Training")
58
+ metrics_df = train_classification_model(X, y)
 
 
 
 
 
 
 
 
 
59
  st.dataframe(metrics_df)
60
+ else: # Regression
61
+ st.subheader("Regression Model Training")
62
+ regression_metrics_df = train_regression_model(X, y)
63
+ st.dataframe(regression_metrics_df)