Spaces:

saherPervaiz
/

Depression

Sleeping

App Files Files Community

saherPervaiz commited on Jan 14, 2025

Commit

eee2b22

verified ·

1 Parent(s): 557bc92

Update utils/visualizations.py

Browse files

Files changed (1) hide show

utils/visualizations.py +60 -85

utils/visualizations.py CHANGED Viewed

@@ -1,86 +1,61 @@
-import seaborn as sns
-import matplotlib.pyplot as plt
 import pandas as pd
-# Correlation Heatmap
-def plot_correlation_heatmap(df):
-    """
-    Plot a correlation heatmap for numeric columns in the dataframe.
-    """
-    # Select only numeric columns
-    numeric_df = df.select_dtypes(include=['float64', 'int64'])
-    # Compute the correlation matrix
-    corr = numeric_df.corr()
-    # Plot the heatmap
-    plt.figure(figsize=(10, 8))
-    sns.heatmap(corr, annot=True, cmap='coolwarm', fmt='.2f', linewidths=0.5)
-    plt.title("Correlation Heatmap")
-    return plt
-# Save plot as PNG image
-def save_plot_as_png(plot, filename="plot.png"):
-    """
-    Save a given plot as a PNG file.
-    """
-    plot.savefig(filename, format='png')
-    return filename
-# Distribution Plot (Histogram)
-def plot_histogram(df, column):
-    """
-    Plot a histogram for a specified column in the dataframe.
-    """
-    plt.figure(figsize=(8, 6))
-    sns.histplot(df[column], kde=True, color='blue', bins=20)
-    plt.title(f"Distribution of {column}")
-    plt.xlabel(column)
-    plt.ylabel("Frequency")
-    return plt
-# Box Plot (For Outliers)
-def plot_box_plot(df, column):
-    """
-    Plot a box plot for a specified column to visualize outliers.
-    """
-    plt.figure(figsize=(8, 6))
-    sns.boxplot(x=df[column], color='orange')
-    plt.title(f"Box Plot of {column}")
-    plt.xlabel(column)
-    return plt
-# Pair Plot (For Visualizing Relationships Between Features)
-def plot_pair_plot(df):
-    """
-    Plot a pair plot to visualize relationships between numeric columns in the dataframe.
-    """
-    numeric_df = df.select_dtypes(include=['float64', 'int64'])
-    pair_plot = sns.pairplot(numeric_df, hue='target', palette='coolwarm')  # Assuming 'target' is a column for classification
-    pair_plot.fig.set_size_inches(10, 8)
-    return pair_plot
-# Scatter Plot (For Visualizing Relationship Between Two Features)
-def plot_scatter_plot(df, x_column, y_column):
-    """
-    Plot a scatter plot to visualize the relationship between two features.
-    """
-    plt.figure(figsize=(8, 6))
-    sns.scatterplot(x=df[x_column], y=df[y_column], color='green')
-    plt.title(f"Scatter Plot between {x_column} and {y_column}")
-    plt.xlabel(x_column)
-    plt.ylabel(y_column)
-    return plt
-# Bar Plot (For Comparing Categorical Data)
-def plot_bar_plot(df, column):
-    """
-    Plot a bar plot for a categorical column.
-    """
-    plt.figure(figsize=(8, 6))
-    sns.countplot(x=df[column], palette='viridis')
-    plt.title(f"Bar Plot of {column}")
-    plt.xlabel(column)
-    plt.ylabel("Count")
-    return plt

+import streamlit as st
 import pandas as pd
+from utils.visualizations import plot_correlation_heatmap, save_plot_as_png
+# File uploader
+st.title("Model Training with Metrics and Correlation Heatmap")
+uploaded_file = st.file_uploader("Choose a CSV file", type=["csv"])
+if uploaded_file is not None:
+    df = pd.read_csv(uploaded_file)
+    # Show the dataset
+    st.write("Dataset:")
+    st.dataframe(df)
+    # Clean data: Missing values, outliers, and extreme values (You can add the functions like handle_missing_values, etc.)
+    # df = handle_missing_values(df)  # Un-comment when cleaning functions are added
+    # df = remove_outliers_iqr(df)    # Un-comment when cleaning functions are added
+    # df = cap_extreme_values(df)     # Un-comment when cleaning functions are added
+    st.write("Cleaned Dataset (after applying any cleaning steps):")
+    st.dataframe(df)
+    # Add clean data download option
+    st.subheader("Download Cleaned Dataset")
+    st.download_button(
+        label="Download Cleaned Dataset (CSV)",
+        data=df.to_csv(index=False),
+        file_name="cleaned_dataset.csv",
+        mime="text/csv"
+    )
+    # Correlation Heatmap
+    st.subheader("Correlation Heatmap")
+    corr_plot = plot_correlation_heatmap(df)
+    st.pyplot(corr_plot)  # Display the heatmap in Streamlit
+    # Save heatmap as PNG and allow download
+    heatmap_buf = save_plot_as_png(corr_plot)
+    st.download_button(
+        label="Download Correlation Heatmap as PNG",
+        data=heatmap_buf,
+        file_name="correlation_heatmap.png",
+        mime="image/png"
+    )
+    # Target and features selection
+    target = st.selectbox("Select Target Variable", df.columns)
+    features = [col for col in df.columns if col != target]
+    X = df[features]
+    y = df[target]
+    # Assuming model training and evaluation functions (train_classification_model, etc.) are implemented and imported
+    if y.dtype == 'object' or len(y.unique()) <= 10:  # Classification
+        st.subheader("Classification Model Training")
+        # Example: metrics_df = train_classification_model(X, y)
+        # st.dataframe(metrics_df)
+    else:  # Regression
+        st.subheader("Regression Model Training")
+        # Example: regression_metrics_df = train_regression_model(X, y)
+        # st.dataframe(regression_metrics_df)