Upload eda.py
Browse files
eda.py
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#import libraries
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import numpy as np
|
| 4 |
+
import streamlit as st
|
| 5 |
+
import matplotlib.pyplot as plt
|
| 6 |
+
import seaborn as sns
|
| 7 |
+
import plotly.express as px
|
| 8 |
+
import os
|
| 9 |
+
import glob
|
| 10 |
+
import cv2
|
| 11 |
+
from skimage.feature import local_binary_pattern
|
| 12 |
+
|
| 13 |
+
import tensorflow as tf
|
| 14 |
+
from pathlib import Path
|
| 15 |
+
|
| 16 |
+
def run():
|
| 17 |
+
#introduction
|
| 18 |
+
st.title("Exploratory Data Analysis of Skin Type")
|
| 19 |
+
#konsep markdown
|
| 20 |
+
st.write('This page contains Exploratory Data Analysis of Skin Type based on previous model')
|
| 21 |
+
st.write('---')
|
| 22 |
+
|
| 23 |
+
#memasukan gambar
|
| 24 |
+
link_gambar = ('https://i.ytimg.com/vi/vic-EMOivpA/maxresdefault.jpg')
|
| 25 |
+
st.image(link_gambar, caption='What is your skin type?', use_container_width=True)
|
| 26 |
+
|
| 27 |
+
#menampilkan dataframe
|
| 28 |
+
st.write('## DataFrame')
|
| 29 |
+
main_path = '/content/drive/MyDrive/Oily-Dry-Skin-Types/'
|
| 30 |
+
train_path = os.path.join(main_path, 'train')
|
| 31 |
+
test_path = os.path.join(main_path, 'test')
|
| 32 |
+
val_path = os.path.join(main_path, 'valid')
|
| 33 |
+
|
| 34 |
+
#EDA 1
|
| 35 |
+
# Path ke folder train
|
| 36 |
+
st.write('EDA 1: Histogram Colour Analysis for Every Skin Type')
|
| 37 |
+
classes = os.listdir(train_path) # Pastikan train_path adalah path asli folder train
|
| 38 |
+
|
| 39 |
+
for skin_type in classes:
|
| 40 |
+
path = os.path.join(train_path, skin_type) # Iterasi tiap folder kelas
|
| 41 |
+
hist_r, hist_g, hist_b = np.zeros(256), np.zeros(256), np.zeros(256)
|
| 42 |
+
|
| 43 |
+
# Iterasi setiap gambar dalam kelas
|
| 44 |
+
for img_name in os.listdir(path):
|
| 45 |
+
img_path = os.path.join(path, img_name)
|
| 46 |
+
img = cv2.imread(img_path)
|
| 47 |
+
if img is not None:
|
| 48 |
+
hist_r += cv2.calcHist([img], [2], None, [256], [0, 256]).flatten() # Red channel
|
| 49 |
+
hist_g += cv2.calcHist([img], [1], None, [256], [0, 256]).flatten() # Green channel
|
| 50 |
+
hist_b += cv2.calcHist([img], [0], None, [256], [0, 256]).flatten() # Blue channel
|
| 51 |
+
|
| 52 |
+
# Plot histogram warna
|
| 53 |
+
plt.figure(figsize=(10, 5))
|
| 54 |
+
plt.title(f'Color Histogram for {skin_type}')
|
| 55 |
+
plt.plot(hist_r, color='r', label='Red')
|
| 56 |
+
plt.plot(hist_g, color='g', label='Green')
|
| 57 |
+
plt.plot(hist_b, color='b', label='Blue')
|
| 58 |
+
plt.legend()
|
| 59 |
+
plt.show()
|
| 60 |
+
|
| 61 |
+
st.write("**Insight**")
|
| 62 |
+
st.write("From the analysis above, we can see that there's no significant color distribution for specific skin type. This is perhaps because the data is not standardized (camera angel)")
|
| 63 |
+
|
| 64 |
+
#EDA 2
|
| 65 |
+
st.write("EDA 2: Texture Analysis for Every Skin Type")
|
| 66 |
+
# Parameter LBP
|
| 67 |
+
radius = 3
|
| 68 |
+
n_points = 8 * radius
|
| 69 |
+
|
| 70 |
+
for skin_type in classes:
|
| 71 |
+
path = os.path.join(train_path, skin_type)
|
| 72 |
+
lbp_values = []
|
| 73 |
+
|
| 74 |
+
for img_name in os.listdir(path):
|
| 75 |
+
img_path = os.path.join(path, img_name)
|
| 76 |
+
img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
|
| 77 |
+
if img is not None:
|
| 78 |
+
lbp = local_binary_pattern(img, n_points, radius, method='uniform')
|
| 79 |
+
lbp_values.append(np.histogram(lbp.ravel(), bins=range(0, n_points + 3))[0])
|
| 80 |
+
|
| 81 |
+
# Plot rata-rata histogram LBP
|
| 82 |
+
avg_lbp = np.mean(lbp_values, axis=0)
|
| 83 |
+
plt.bar(range(len(avg_lbp)), avg_lbp)
|
| 84 |
+
plt.title(f'Texture Analysis (LBP) for {skin_type}')
|
| 85 |
+
plt.xlabel('LBP Value')
|
| 86 |
+
plt.ylabel('Frequency')
|
| 87 |
+
plt.show()
|
| 88 |
+
|
| 89 |
+
st.write("***Insight***")
|
| 90 |
+
st.write("From the analysis above, we can't barely see the difference of skin texture for every skin type. Again, perhaps due to the data is not standardized(no make up rule so the skin texture is clear)")
|
| 91 |
+
|
| 92 |
+
#EDA 3
|
| 93 |
+
st.write("EDA 3: Pores and Spot Analysis for Every Skin Type")
|
| 94 |
+
for skin_type in classes:
|
| 95 |
+
path = os.path.join(train_path, skin_type)
|
| 96 |
+
spot_counts = []
|
| 97 |
+
|
| 98 |
+
for img_name in os.listdir(path):
|
| 99 |
+
img_path = os.path.join(path, img_name)
|
| 100 |
+
img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
|
| 101 |
+
if img is not None:
|
| 102 |
+
# Preprocessing
|
| 103 |
+
_, thresh = cv2.threshold(img, 50, 255, cv2.THRESH_BINARY_INV)
|
| 104 |
+
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
| 105 |
+
|
| 106 |
+
# Hitung jumlah spot (contours)
|
| 107 |
+
spot_counts.append(len(contours))
|
| 108 |
+
|
| 109 |
+
# Plot distribusi jumlah spot
|
| 110 |
+
plt.hist(spot_counts, bins=20)
|
| 111 |
+
plt.title(f'Spot Distribution for {skin_type}')
|
| 112 |
+
plt.xlabel('Number of Spots')
|
| 113 |
+
plt.ylabel('Frequency')
|
| 114 |
+
plt.show()
|
| 115 |
+
|
| 116 |
+
st.write("***Insight***")
|
| 117 |
+
st.write("From the analysis above, we can see the difference of pores and spot for every skin type. The list of skin type that have the most pores and spot is as followed: normal skin, oily skin, dry skin")
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
|