amariayudha's picture
Upload 37 files
c9c73fb verified
# eda.py
# Import necessary libraries
import streamlit as st
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
# Function to load data with caching for performance
@st.cache_data
def load_data():
return pd.DataFrame({
'Class': ['Plastic', 'Metal', 'Paper', 'Miscellaneous Trash', 'Cardboard', 'Vegetation', 'Glass', 'Food Organics', 'Textile Trash'],
'Number of Images': [921, 790, 500, 495, 461, 436, 420, 411, 318]
})
# Main function to run the Streamlit app
def run():
st.title('📊 Exploratory Data Analysis - Waste Classification')
# Load the data
data = load_data()
# Create a selectbox for users to choose visualization
visualization_option = st.selectbox(
"Choose a visualization:",
("Dataset Information and Distribution", "Sample Images")
)
if visualization_option == "Dataset Information and Distribution":
st.subheader("Dataset Information and Distribution")
# Add checkbox for showing dataset information
show_dataset_info = st.checkbox("Show Dataset Information", value=True)
if show_dataset_info:
st.write(data)
st.write("The dataset shows an uneven distribution across the nine waste categories. "
"This imbalance may impact model performance and will need to be addressed during the model training phase.")
# Bar chart
fig_bar = px.bar(data, x='Class', y='Number of Images', color='Class',
title='Number of Images per Waste Category')
st.plotly_chart(fig_bar, use_container_width=True)
# Pie chart
fig_pie = px.pie(data, values='Number of Images', names='Class',
title='Proportion of Images per Waste Category')
st.plotly_chart(fig_pie, use_container_width=True)
st.write("These charts show the distribution of images across different waste categories. "
"Plastic and Metal categories have significantly more images, which could lead to bias in the model.")
elif visualization_option == "Sample Images":
st.subheader("Sample Images")
st.write("Here are sample images from each waste category:")
categories = ['cardboard', 'food_organics', 'glass', 'metal', 'misc', 'paper', 'plastic', 'textile', 'vegetation']
# Create a selectbox for choosing a specific category
selected_category = st.selectbox("Select a waste category:", categories)
st.write(f"**{selected_category.capitalize()}**")
cols = st.columns(3)
for i in range(1, 4):
with cols[i-1]:
img_path = f'./visualization/{selected_category} ({i}).jpg'
st.image(img_path, caption=f'{selected_category.capitalize()} ({i})', use_column_width=True)
st.write("These sample images provide a visual representation of the selected waste category in our dataset.")
# Add an option to view all categories
if st.checkbox("View all categories"):
for category in categories:
if category != selected_category:
st.write(f"**{category.capitalize()}**")
cols = st.columns(3)
for i in range(1, 4):
with cols[i-1]:
img_path = f'./visualization/{category} ({i}).jpg'
st.image(img_path, caption=f'{category.capitalize()} ({i})', use_column_width=True)
st.markdown("---") # Add a horizontal line after each category
# Entry point of the script
if __name__ == "__main__":
run()