File size: 4,661 Bytes

7146f0f

#import libraries
import pandas as pd
import numpy as np
import streamlit as st
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import os
import glob
import cv2
from skimage.feature import local_binary_pattern

import tensorflow as tf
from pathlib import Path

def run():
    #introduction
    st.title("Exploratory Data Analysis of Skin Type")
    #konsep markdown
    st.write('This page contains Exploratory Data Analysis of Skin Type based on previous model')
    st.write('---')
    
    #memasukan gambar
    link_gambar = ('https://i.ytimg.com/vi/vic-EMOivpA/maxresdefault.jpg')
    st.image(link_gambar, caption='What is your skin type?', use_container_width=True)

    #menampilkan dataframe
    st.write('## DataFrame')
    main_path = '/content/drive/MyDrive/Oily-Dry-Skin-Types/'
    train_path = os.path.join(main_path, 'train')
    test_path = os.path.join(main_path, 'test')
    val_path = os.path.join(main_path, 'valid')

    #EDA 1
    # Path ke folder train
    st.write('EDA 1: Histogram Colour Analysis for Every Skin Type')
    classes = os.listdir(train_path)  # Pastikan train_path adalah path asli folder train

    for skin_type in classes:
        path = os.path.join(train_path, skin_type)  # Iterasi tiap folder kelas
        hist_r, hist_g, hist_b = np.zeros(256), np.zeros(256), np.zeros(256)

    # Iterasi setiap gambar dalam kelas
    for img_name in os.listdir(path):
        img_path = os.path.join(path, img_name)
        img = cv2.imread(img_path)
        if img is not None:
            hist_r += cv2.calcHist([img], [2], None, [256], [0, 256]).flatten()  # Red channel
            hist_g += cv2.calcHist([img], [1], None, [256], [0, 256]).flatten()  # Green channel
            hist_b += cv2.calcHist([img], [0], None, [256], [0, 256]).flatten()  # Blue channel

    # Plot histogram warna
    plt.figure(figsize=(10, 5))
    plt.title(f'Color Histogram for {skin_type}')
    plt.plot(hist_r, color='r', label='Red')
    plt.plot(hist_g, color='g', label='Green')
    plt.plot(hist_b, color='b', label='Blue')
    plt.legend()
    plt.show()

    st.write("**Insight**")
    st.write("From the analysis above, we can see that there's no significant color distribution for specific skin type. This is perhaps because the data is not standardized (camera angel)")

    #EDA 2
    st.write("EDA 2: Texture Analysis for Every Skin Type")
    # Parameter LBP
    radius = 3
    n_points = 8 * radius

    for skin_type in classes:
        path = os.path.join(train_path, skin_type)
        lbp_values = []

        for img_name in os.listdir(path):
            img_path = os.path.join(path, img_name)
            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
            if img is not None:
                lbp = local_binary_pattern(img, n_points, radius, method='uniform')
                lbp_values.append(np.histogram(lbp.ravel(), bins=range(0, n_points + 3))[0])

        # Plot rata-rata histogram LBP
        avg_lbp = np.mean(lbp_values, axis=0)
        plt.bar(range(len(avg_lbp)), avg_lbp)
        plt.title(f'Texture Analysis (LBP) for {skin_type}')
        plt.xlabel('LBP Value')
        plt.ylabel('Frequency')
        plt.show()

    st.write("***Insight***")
    st.write("From the analysis above, we can't barely see the difference of skin texture for every skin type. Again, perhaps due to the data is not standardized(no make up rule so the skin texture is clear)")

    #EDA 3
    st.write("EDA 3: Pores and Spot Analysis for Every Skin Type")
    for skin_type in classes:
    path = os.path.join(train_path, skin_type)
    spot_counts = []

    for img_name in os.listdir(path):
        img_path = os.path.join(path, img_name)
        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        if img is not None:
            # Preprocessing
            _, thresh = cv2.threshold(img, 50, 255, cv2.THRESH_BINARY_INV)
            contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

            # Hitung jumlah spot (contours)
            spot_counts.append(len(contours))

    # Plot distribusi jumlah spot
    plt.hist(spot_counts, bins=20)
    plt.title(f'Spot Distribution for {skin_type}')
    plt.xlabel('Number of Spots')
    plt.ylabel('Frequency')
    plt.show()

    st.write("***Insight***")
    st.write("From the analysis above, we can see the difference of pores and spot for every skin type. The list of skin type that have the most pores and spot is as followed: normal skin, oily skin, dry skin")