#import libraries import pandas as pd import numpy as np import streamlit as st import matplotlib.pyplot as plt import seaborn as sns import plotly.express as px import os import glob import cv2 from skimage.feature import local_binary_pattern import tensorflow as tf from pathlib import Path def run(): #introduction st.title("Exploratory Data Analysis of Skin Type") #konsep markdown st.write('This page contains Exploratory Data Analysis of Skin Type based on previous model') st.write('---') #memasukan gambar link_gambar = ('https://i.ytimg.com/vi/vic-EMOivpA/maxresdefault.jpg') st.image(link_gambar, caption='What is your skin type?', use_container_width=True) #menampilkan dataframe st.write('## DataFrame') main_path = '/content/drive/MyDrive/Oily-Dry-Skin-Types/' train_path = os.path.join(main_path, 'train') test_path = os.path.join(main_path, 'test') val_path = os.path.join(main_path, 'valid') #EDA 1 # Path ke folder train st.write('EDA 1: Histogram Colour Analysis for Every Skin Type') classes = os.listdir(train_path) # Pastikan train_path adalah path asli folder train for skin_type in classes: path = os.path.join(train_path, skin_type) # Iterasi tiap folder kelas hist_r, hist_g, hist_b = np.zeros(256), np.zeros(256), np.zeros(256) # Iterasi setiap gambar dalam kelas for img_name in os.listdir(path): img_path = os.path.join(path, img_name) img = cv2.imread(img_path) if img is not None: hist_r += cv2.calcHist([img], [2], None, [256], [0, 256]).flatten() # Red channel hist_g += cv2.calcHist([img], [1], None, [256], [0, 256]).flatten() # Green channel hist_b += cv2.calcHist([img], [0], None, [256], [0, 256]).flatten() # Blue channel # Plot histogram warna plt.figure(figsize=(10, 5)) plt.title(f'Color Histogram for {skin_type}') plt.plot(hist_r, color='r', label='Red') plt.plot(hist_g, color='g', label='Green') plt.plot(hist_b, color='b', label='Blue') plt.legend() plt.show() st.write("**Insight**") st.write("From the analysis above, we can see that there's no significant color distribution for specific skin type. This is perhaps because the data is not standardized (camera angel)") #EDA 2 st.write("EDA 2: Texture Analysis for Every Skin Type") # Parameter LBP radius = 3 n_points = 8 * radius for skin_type in classes: path = os.path.join(train_path, skin_type) lbp_values = [] for img_name in os.listdir(path): img_path = os.path.join(path, img_name) img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE) if img is not None: lbp = local_binary_pattern(img, n_points, radius, method='uniform') lbp_values.append(np.histogram(lbp.ravel(), bins=range(0, n_points + 3))[0]) # Plot rata-rata histogram LBP avg_lbp = np.mean(lbp_values, axis=0) plt.bar(range(len(avg_lbp)), avg_lbp) plt.title(f'Texture Analysis (LBP) for {skin_type}') plt.xlabel('LBP Value') plt.ylabel('Frequency') plt.show() st.write("***Insight***") st.write("From the analysis above, we can't barely see the difference of skin texture for every skin type. Again, perhaps due to the data is not standardized(no make up rule so the skin texture is clear)") #EDA 3 st.write("EDA 3: Pores and Spot Analysis for Every Skin Type") for skin_type in classes: path = os.path.join(train_path, skin_type) spot_counts = [] for img_name in os.listdir(path): img_path = os.path.join(path, img_name) img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE) if img is not None: # Preprocessing _, thresh = cv2.threshold(img, 50, 255, cv2.THRESH_BINARY_INV) contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) # Hitung jumlah spot (contours) spot_counts.append(len(contours)) # Plot distribusi jumlah spot plt.hist(spot_counts, bins=20) plt.title(f'Spot Distribution for {skin_type}') plt.xlabel('Number of Spots') plt.ylabel('Frequency') plt.show() st.write("***Insight***") st.write("From the analysis above, we can see the difference of pores and spot for every skin type. The list of skin type that have the most pores and spot is as followed: normal skin, oily skin, dry skin")