# eda.py # Import necessary libraries import streamlit as st import pandas as pd import plotly.express as px import matplotlib.pyplot as plt import seaborn as sns import numpy as np # Function to load data with caching for performance @st.cache_data def load_data(): return pd.DataFrame({ 'Class': ['Plastic', 'Metal', 'Paper', 'Miscellaneous Trash', 'Cardboard', 'Vegetation', 'Glass', 'Food Organics', 'Textile Trash'], 'Number of Images': [921, 790, 500, 495, 461, 436, 420, 411, 318] }) # Main function to run the Streamlit app def run(): st.title('📊 Exploratory Data Analysis - Waste Classification') # Load the data data = load_data() # Create a selectbox for users to choose visualization visualization_option = st.selectbox( "Choose a visualization:", ("Dataset Information and Distribution", "Sample Images") ) if visualization_option == "Dataset Information and Distribution": st.subheader("Dataset Information and Distribution") # Add checkbox for showing dataset information show_dataset_info = st.checkbox("Show Dataset Information", value=True) if show_dataset_info: st.write(data) st.write("The dataset shows an uneven distribution across the nine waste categories. " "This imbalance may impact model performance and will need to be addressed during the model training phase.") # Bar chart fig_bar = px.bar(data, x='Class', y='Number of Images', color='Class', title='Number of Images per Waste Category') st.plotly_chart(fig_bar, use_container_width=True) # Pie chart fig_pie = px.pie(data, values='Number of Images', names='Class', title='Proportion of Images per Waste Category') st.plotly_chart(fig_pie, use_container_width=True) st.write("These charts show the distribution of images across different waste categories. " "Plastic and Metal categories have significantly more images, which could lead to bias in the model.") elif visualization_option == "Sample Images": st.subheader("Sample Images") st.write("Here are sample images from each waste category:") categories = ['cardboard', 'food_organics', 'glass', 'metal', 'misc', 'paper', 'plastic', 'textile', 'vegetation'] # Create a selectbox for choosing a specific category selected_category = st.selectbox("Select a waste category:", categories) st.write(f"**{selected_category.capitalize()}**") cols = st.columns(3) for i in range(1, 4): with cols[i-1]: img_path = f'./visualization/{selected_category} ({i}).jpg' st.image(img_path, caption=f'{selected_category.capitalize()} ({i})', use_column_width=True) st.write("These sample images provide a visual representation of the selected waste category in our dataset.") # Add an option to view all categories if st.checkbox("View all categories"): for category in categories: if category != selected_category: st.write(f"**{category.capitalize()}**") cols = st.columns(3) for i in range(1, 4): with cols[i-1]: img_path = f'./visualization/{category} ({i}).jpg' st.image(img_path, caption=f'{category.capitalize()} ({i})', use_column_width=True) st.markdown("---") # Add a horizontal line after each category # Entry point of the script if __name__ == "__main__": run()