Skin_Types / eda.py
dini15's picture
Update eda.py
cc02d66 verified
import streamlit as st
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import cv2
from skimage.feature import local_binary_pattern
# Fungsi untuk validasi path
def validate_path(path):
if not os.path.exists(path):
st.error(f"Path {path} does not exist. Please check your dataset directory.")
return False
return True
# Fungsi untuk menghitung histogram warna
def calculate_color_histogram(image):
hist_r = cv2.calcHist([image], [2], None, [256], [0, 256]).flatten() # Red
hist_g = cv2.calcHist([image], [1], None, [256], [0, 256]).flatten() # Green
hist_b = cv2.calcHist([image], [0], None, [256], [0, 256]).flatten() # Blue
return hist_r, hist_g, hist_b
# Fungsi untuk plotting LBP
def calculate_lbp(image, radius, n_points):
lbp = local_binary_pattern(image, n_points, radius, method='uniform')
hist, _ = np.histogram(lbp.ravel(), bins=range(0, n_points + 3))
return hist
# Fungsi utama aplikasi Streamlit
def run():
st.title("Exploratory Data Analysis of Skin Type")
st.write("This page contains Exploratory Data Analysis of Skin Type based on the previous model.")
st.write("---")
st.image('https://i.ytimg.com/vi/vic-EMOivpA/maxresdefault.jpg', caption='What is your skin type?', use_container_width=True)
st.write('This model is trained based on 3 classes: Normal Type, Dry Type, Oily Type. The model will predict skin type based on features in the uploaded image. The results will be assigned to the class that has the highest probability. Each class has a maximum probability of 33.37%')
# Set dataset path
main_path = './Oily-Dry-Skin-Types/'
train_path = os.path.join(main_path, 'train')
# Validasi path dataset
if not validate_path(train_path):
return
# Ambil daftar kelas dari folder train
classes = [d for d in os.listdir(train_path) if os.path.isdir(os.path.join(train_path, d))]
st.write("## Classes in Dataset:", classes)
# EDA 1: Histogram Warna
st.write("### EDA 1: Color Histogram Analysis")
for skin_type in classes:
path = os.path.join(train_path, skin_type)
hist_r, hist_g, hist_b = np.zeros(256), np.zeros(256), np.zeros(256)
for img_name in os.listdir(path):
img_path = os.path.join(path, img_name)
img = cv2.imread(img_path)
if img is not None:
r, g, b = calculate_color_histogram(img)
hist_r += r
hist_g += g
hist_b += b
# Plot histogram warna
plt.figure(figsize=(10, 5))
plt.plot(hist_r, color='r', label='Red')
plt.plot(hist_g, color='g', label='Green')
plt.plot(hist_b, color='b', label='Blue')
plt.title(f"Color Histogram for {skin_type}")
plt.legend()
st.pyplot(plt)
plt.clf()
st.write("Insight: From the results of the analysis above, we can conclude that there are no significant differences in the color spectrum that dominates a particular skin type. This could be due to non-standardized image capture so that much of the data is biased due to camera light")
# EDA 2: Texture Analysis (LBP)
st.write("### EDA 2: Texture Analysis (LBP)")
radius = 3
n_points = 8 * radius
for skin_type in classes:
path = os.path.join(train_path, skin_type)
lbp_histograms = []
for img_name in os.listdir(path):
img_path = os.path.join(path, img_name)
img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
if img is not None:
hist = calculate_lbp(img, radius, n_points)
lbp_histograms.append(hist)
# Plot rata-rata histogram LBP
avg_hist = np.mean(lbp_histograms, axis=0)
plt.bar(range(len(avg_hist)), avg_hist)
plt.title(f"LBP Histogram for {skin_type}")
plt.xlabel("LBP Value")
plt.ylabel("Frequency")
st.pyplot(plt)
plt.clf()
st.write("Insight: From the results of the analysis above, it can be seen that there are almost no striking differences between each skin type. This could be because many of the respondents who were the data source in making this model used make up so that the original facial skin texture was not visible.")
# EDA 3: Spot/Pores Analysis
st.write("### EDA 3: Spot and Pores Analysis")
for skin_type in classes:
path = os.path.join(train_path, skin_type)
spot_counts = []
for img_name in os.listdir(path):
img_path = os.path.join(path, img_name)
img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
if img is not None:
_, thresh = cv2.threshold(img, 50, 255, cv2.THRESH_BINARY_INV)
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
spot_counts.append(len(contours))
# Plot distribusi jumlah spot
plt.hist(spot_counts, bins=20, alpha=0.7, label=f"{skin_type}")
plt.title(f"Spot Distribution for {skin_type}")
plt.xlabel("Number of Spots")
plt.ylabel("Frequency")
st.pyplot(plt)
plt.clf()
st.write("Insight: From the results of the analysis above, it can be concluded that normal skin types tend to have more pores and spots compared to other skin types. The order of skin types that have lots of spots and pores: 1. Normal Skin, 2. Oily Skin, 3. Dry Skin")
if __name__ == "__main__":
run()