Skin_Type / eda.py
dini15's picture
Upload eda.py
7146f0f verified
#import libraries
import pandas as pd
import numpy as np
import streamlit as st
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import os
import glob
import cv2
from skimage.feature import local_binary_pattern
import tensorflow as tf
from pathlib import Path
def run():
#introduction
st.title("Exploratory Data Analysis of Skin Type")
#konsep markdown
st.write('This page contains Exploratory Data Analysis of Skin Type based on previous model')
st.write('---')
#memasukan gambar
link_gambar = ('https://i.ytimg.com/vi/vic-EMOivpA/maxresdefault.jpg')
st.image(link_gambar, caption='What is your skin type?', use_container_width=True)
#menampilkan dataframe
st.write('## DataFrame')
main_path = '/content/drive/MyDrive/Oily-Dry-Skin-Types/'
train_path = os.path.join(main_path, 'train')
test_path = os.path.join(main_path, 'test')
val_path = os.path.join(main_path, 'valid')
#EDA 1
# Path ke folder train
st.write('EDA 1: Histogram Colour Analysis for Every Skin Type')
classes = os.listdir(train_path) # Pastikan train_path adalah path asli folder train
for skin_type in classes:
path = os.path.join(train_path, skin_type) # Iterasi tiap folder kelas
hist_r, hist_g, hist_b = np.zeros(256), np.zeros(256), np.zeros(256)
# Iterasi setiap gambar dalam kelas
for img_name in os.listdir(path):
img_path = os.path.join(path, img_name)
img = cv2.imread(img_path)
if img is not None:
hist_r += cv2.calcHist([img], [2], None, [256], [0, 256]).flatten() # Red channel
hist_g += cv2.calcHist([img], [1], None, [256], [0, 256]).flatten() # Green channel
hist_b += cv2.calcHist([img], [0], None, [256], [0, 256]).flatten() # Blue channel
# Plot histogram warna
plt.figure(figsize=(10, 5))
plt.title(f'Color Histogram for {skin_type}')
plt.plot(hist_r, color='r', label='Red')
plt.plot(hist_g, color='g', label='Green')
plt.plot(hist_b, color='b', label='Blue')
plt.legend()
plt.show()
st.write("**Insight**")
st.write("From the analysis above, we can see that there's no significant color distribution for specific skin type. This is perhaps because the data is not standardized (camera angel)")
#EDA 2
st.write("EDA 2: Texture Analysis for Every Skin Type")
# Parameter LBP
radius = 3
n_points = 8 * radius
for skin_type in classes:
path = os.path.join(train_path, skin_type)
lbp_values = []
for img_name in os.listdir(path):
img_path = os.path.join(path, img_name)
img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
if img is not None:
lbp = local_binary_pattern(img, n_points, radius, method='uniform')
lbp_values.append(np.histogram(lbp.ravel(), bins=range(0, n_points + 3))[0])
# Plot rata-rata histogram LBP
avg_lbp = np.mean(lbp_values, axis=0)
plt.bar(range(len(avg_lbp)), avg_lbp)
plt.title(f'Texture Analysis (LBP) for {skin_type}')
plt.xlabel('LBP Value')
plt.ylabel('Frequency')
plt.show()
st.write("***Insight***")
st.write("From the analysis above, we can't barely see the difference of skin texture for every skin type. Again, perhaps due to the data is not standardized(no make up rule so the skin texture is clear)")
#EDA 3
st.write("EDA 3: Pores and Spot Analysis for Every Skin Type")
for skin_type in classes:
path = os.path.join(train_path, skin_type)
spot_counts = []
for img_name in os.listdir(path):
img_path = os.path.join(path, img_name)
img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
if img is not None:
# Preprocessing
_, thresh = cv2.threshold(img, 50, 255, cv2.THRESH_BINARY_INV)
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Hitung jumlah spot (contours)
spot_counts.append(len(contours))
# Plot distribusi jumlah spot
plt.hist(spot_counts, bins=20)
plt.title(f'Spot Distribution for {skin_type}')
plt.xlabel('Number of Spots')
plt.ylabel('Frequency')
plt.show()
st.write("***Insight***")
st.write("From the analysis above, we can see the difference of pores and spot for every skin type. The list of skin type that have the most pores and spot is as followed: normal skin, oily skin, dry skin")