Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import seaborn as sns | |
| import matplotlib.pyplot as plt | |
| from PIL import Image | |
| from datasets import load_dataset | |
| import random | |
| def run(): | |
| st.title('Tomato Leaf Health Classification') | |
| st.subheader("this page contains the EDA about tomato leaf health classification") | |
| # image = Image.open("./src/credit_card.jpg") | |
| # st.image(image, caption="Credit Card") | |
| # write | |
| st.write("the EDA will explore and analyse classifier tomato leaf health") | |
| # fetch dataset | |
| dataset_dict = load_dataset("Heizsenberg/leaf-image-dataset") | |
| label_names = dataset_dict["train"].features["label"].names | |
| dataset_df = dataset_dict['train'].to_pandas() | |
| dataset_df["label_name"] = dataset_df["label"].map(dict(enumerate(label_names))) | |
| st.write("sample from the dataframe") | |
| st.write(dataset_df.sample(15)) | |
| st.write("content of the dataframe") | |
| st.write("Total images:", len(dataset_df)) | |
| st.write("Total classes:", dataset_df["label"].nunique()) | |
| st.write("Tomato Leaf Training dataset class distribution") | |
| fig, ax = plt.subplots(figsize=(10,5)) | |
| sns.countplot(data=dataset_df, x="label_name", order=dataset_df["label_name"].value_counts().index, ax=ax) | |
| plt.xticks(rotation=90) | |
| plt.title("Class Distribution") | |
| st.pyplot(fig) | |
| st.write("sample image size and mode") | |
| sample_path_obj = random.choice(dataset_df["image"].values) | |
| sample_path = sample_path_obj['path'] | |
| img = Image.open(sample_path) | |
| st.write("Image size:", img.size) | |
| st.write("Image mode:", img.mode) | |
| st.write("sample from each classes") | |
| fig_samp, ax_samp = plt.subplots(4, 3, figsize=(12,12)) | |
| # samples = dataset_df.sample(10) | |
| samples = dataset_df.groupby("label_name").sample(1, random_state=42) | |
| for ax, (_, row) in zip(ax_samp.flatten(), samples.iterrows()): | |
| image_path = row['image'] | |
| img = Image.open(image_path['path']) | |
| ax.imshow(img) | |
| ax.set_title(row["label_name"]) | |
| ax.axis("off") | |
| plt.tight_layout() | |
| # Show inside Streamlit | |
| st.pyplot(fig_samp) | |
| st.write(""" | |
| ## Insight | |
| 1. dataset contains around 16.011 in 10 classes | |
| 2. class distribution generally spread evenly with few exceptions on `tomato_tomato_mosaic_virus` has lowest samples and `Tomato_YellowLeaf_curl_virus` having the largest samples, showing complexity in detecting the diseases and easier detection of tomato mosaic virus | |
| 3. the dataset images is on size (256x256) which needs to be rescaled for lower GPU load | |
| 4. several samples is shown from 10 different classes, showing both healthy and disease afflicted leaves | |
| """) | |
| if __name__ == '__main__': | |
| run() |