dini15 commited on
Commit
5cee8f3
·
verified ·
1 Parent(s): 2b79d8c

Update eda.py

Browse files
Files changed (1) hide show
  1. eda.py +125 -121
eda.py CHANGED
@@ -1,121 +1,125 @@
1
- #import libraries
2
- import pandas as pd
3
- import numpy as np
4
- import streamlit as st
5
- import matplotlib.pyplot as plt
6
- import seaborn as sns
7
- import plotly.express as px
8
- import os
9
- import glob
10
- import cv2
11
- from skimage.feature import local_binary_pattern
12
-
13
- import tensorflow as tf
14
- from pathlib import Path
15
-
16
- def run():
17
- #introduction
18
- st.title("Exploratory Data Analysis of Skin Type")
19
- #konsep markdown
20
- st.write('This page contains Exploratory Data Analysis of Skin Type based on previous model')
21
- st.write('---')
22
-
23
- #memasukan gambar
24
- link_gambar = ('https://i.ytimg.com/vi/vic-EMOivpA/maxresdefault.jpg')
25
- st.image(link_gambar, caption='What is your skin type?', use_container_width=True)
26
-
27
- #menampilkan dataframe
28
- st.write('## DataFrame')
29
- main_path = '/content/drive/MyDrive/Oily-Dry-Skin-Types/'
30
- train_path = os.path.join(main_path, 'train')
31
- test_path = os.path.join(main_path, 'test')
32
- val_path = os.path.join(main_path, 'valid')
33
-
34
- #EDA 1
35
- # Path ke folder train
36
- st.write('EDA 1: Histogram Colour Analysis for Every Skin Type')
37
- classes = os.listdir(train_path) # Pastikan train_path adalah path asli folder train
38
-
39
- for skin_type in classes:
40
- path = os.path.join(train_path, skin_type) # Iterasi tiap folder kelas
41
- hist_r, hist_g, hist_b = np.zeros(256), np.zeros(256), np.zeros(256)
42
-
43
- # Iterasi setiap gambar dalam kelas
44
- for img_name in os.listdir(path):
45
- img_path = os.path.join(path, img_name)
46
- img = cv2.imread(img_path)
47
- if img is not None:
48
- hist_r += cv2.calcHist([img], [2], None, [256], [0, 256]).flatten() # Red channel
49
- hist_g += cv2.calcHist([img], [1], None, [256], [0, 256]).flatten() # Green channel
50
- hist_b += cv2.calcHist([img], [0], None, [256], [0, 256]).flatten() # Blue channel
51
-
52
- # Plot histogram warna
53
- plt.figure(figsize=(10, 5))
54
- plt.title(f'Color Histogram for {skin_type}')
55
- plt.plot(hist_r, color='r', label='Red')
56
- plt.plot(hist_g, color='g', label='Green')
57
- plt.plot(hist_b, color='b', label='Blue')
58
- plt.legend()
59
- plt.show()
60
-
61
- st.write("**Insight**")
62
- st.write("From the analysis above, we can see that there's no significant color distribution for specific skin type. This is perhaps because the data is not standardized (camera angel)")
63
-
64
- #EDA 2
65
- st.write("EDA 2: Texture Analysis for Every Skin Type")
66
- # Parameter LBP
67
- radius = 3
68
- n_points = 8 * radius
69
-
70
- for skin_type in classes:
71
- path = os.path.join(train_path, skin_type)
72
- lbp_values = []
73
-
74
- for img_name in os.listdir(path):
75
- img_path = os.path.join(path, img_name)
76
- img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
77
- if img is not None:
78
- lbp = local_binary_pattern(img, n_points, radius, method='uniform')
79
- lbp_values.append(np.histogram(lbp.ravel(), bins=range(0, n_points + 3))[0])
80
-
81
- # Plot rata-rata histogram LBP
82
- avg_lbp = np.mean(lbp_values, axis=0)
83
- plt.bar(range(len(avg_lbp)), avg_lbp)
84
- plt.title(f'Texture Analysis (LBP) for {skin_type}')
85
- plt.xlabel('LBP Value')
86
- plt.ylabel('Frequency')
87
- plt.show()
88
-
89
- st.write("***Insight***")
90
- st.write("From the analysis above, we can't barely see the difference of skin texture for every skin type. Again, perhaps due to the data is not standardized(no make up rule so the skin texture is clear)")
91
-
92
- #EDA 3
93
- st.write("EDA 3: Pores and Spot Analysis for Every Skin Type")
94
- for skin_type in classes:
95
- path = os.path.join(train_path, skin_type)
96
- spot_counts = []
97
-
98
- for img_name in os.listdir(path):
99
- img_path = os.path.join(path, img_name)
100
- img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
101
- if img is not None:
102
- # Preprocessing
103
- _, thresh = cv2.threshold(img, 50, 255, cv2.THRESH_BINARY_INV)
104
- contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
105
-
106
- # Hitung jumlah spot (contours)
107
- spot_counts.append(len(contours))
108
-
109
- # Plot distribusi jumlah spot
110
- plt.hist(spot_counts, bins=20)
111
- plt.title(f'Spot Distribution for {skin_type}')
112
- plt.xlabel('Number of Spots')
113
- plt.ylabel('Frequency')
114
- plt.show()
115
-
116
- st.write("***Insight***")
117
- st.write("From the analysis above, we can see the difference of pores and spot for every skin type. The list of skin type that have the most pores and spot is as followed: normal skin, oily skin, dry skin")
118
-
119
-
120
-
121
-
 
 
 
 
 
1
+ import streamlit as st
2
+ import numpy as np
3
+ import matplotlib.pyplot as plt
4
+ import seaborn as sns
5
+ import os
6
+ import cv2
7
+ from skimage.feature import local_binary_pattern
8
+
9
+ # Fungsi untuk validasi path
10
+ def validate_path(path):
11
+ if not os.path.exists(path):
12
+ st.error(f"Path {path} does not exist. Please check your dataset directory.")
13
+ return False
14
+ return True
15
+
16
+ # Fungsi untuk menghitung histogram warna
17
+ def calculate_color_histogram(image):
18
+ hist_r = cv2.calcHist([image], [2], None, [256], [0, 256]).flatten() # Red
19
+ hist_g = cv2.calcHist([image], [1], None, [256], [0, 256]).flatten() # Green
20
+ hist_b = cv2.calcHist([image], [0], None, [256], [0, 256]).flatten() # Blue
21
+ return hist_r, hist_g, hist_b
22
+
23
+ # Fungsi untuk plotting LBP
24
+ def calculate_lbp(image, radius, n_points):
25
+ lbp = local_binary_pattern(image, n_points, radius, method='uniform')
26
+ hist, _ = np.histogram(lbp.ravel(), bins=range(0, n_points + 3))
27
+ return hist
28
+
29
+ # Fungsi utama aplikasi Streamlit
30
+ def run():
31
+ st.title("Exploratory Data Analysis of Skin Type")
32
+ st.write("This page contains Exploratory Data Analysis of Skin Type based on the previous model.")
33
+ st.write("---")
34
+
35
+ st.image('https://i.ytimg.com/vi/vic-EMOivpA/maxresdefault.jpg', caption='What is your skin type?', use_container_width=True)
36
+
37
+ # Set dataset path
38
+ main_path = "path/to/your/dataset"
39
+ train_path = os.path.join(main_path, 'train')
40
+
41
+ # Validasi path dataset
42
+ if not validate_path(train_path):
43
+ return
44
+
45
+ # Ambil daftar kelas dari folder train
46
+ classes = [d for d in os.listdir(train_path) if os.path.isdir(os.path.join(train_path, d))]
47
+ st.write("## Classes in Dataset:", classes)
48
+
49
+ # EDA 1: Histogram Warna
50
+ st.write("### EDA 1: Color Histogram Analysis")
51
+ for skin_type in classes:
52
+ path = os.path.join(train_path, skin_type)
53
+ hist_r, hist_g, hist_b = np.zeros(256), np.zeros(256), np.zeros(256)
54
+ for img_name in os.listdir(path):
55
+ img_path = os.path.join(path, img_name)
56
+ img = cv2.imread(img_path)
57
+ if img is not None:
58
+ r, g, b = calculate_color_histogram(img)
59
+ hist_r += r
60
+ hist_g += g
61
+ hist_b += b
62
+
63
+ # Plot histogram warna
64
+ plt.figure(figsize=(10, 5))
65
+ plt.plot(hist_r, color='r', label='Red')
66
+ plt.plot(hist_g, color='g', label='Green')
67
+ plt.plot(hist_b, color='b', label='Blue')
68
+ plt.title(f"Color Histogram for {skin_type}")
69
+ plt.legend()
70
+ st.pyplot(plt)
71
+ plt.clf()
72
+
73
+ st.write("Insight: No significant color difference was found across skin types.")
74
+
75
+ # EDA 2: Texture Analysis (LBP)
76
+ st.write("### EDA 2: Texture Analysis (LBP)")
77
+ radius = 3
78
+ n_points = 8 * radius
79
+
80
+ for skin_type in classes:
81
+ path = os.path.join(train_path, skin_type)
82
+ lbp_histograms = []
83
+ for img_name in os.listdir(path):
84
+ img_path = os.path.join(path, img_name)
85
+ img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
86
+ if img is not None:
87
+ hist = calculate_lbp(img, radius, n_points)
88
+ lbp_histograms.append(hist)
89
+
90
+ # Plot rata-rata histogram LBP
91
+ avg_hist = np.mean(lbp_histograms, axis=0)
92
+ plt.bar(range(len(avg_hist)), avg_hist)
93
+ plt.title(f"LBP Histogram for {skin_type}")
94
+ plt.xlabel("LBP Value")
95
+ plt.ylabel("Frequency")
96
+ st.pyplot(plt)
97
+ plt.clf()
98
+
99
+ st.write("Insight: Texture differences are not significant due to unstandardized data.")
100
+
101
+ # EDA 3: Spot/Pores Analysis
102
+ st.write("### EDA 3: Spot and Pores Analysis")
103
+ for skin_type in classes:
104
+ path = os.path.join(train_path, skin_type)
105
+ spot_counts = []
106
+ for img_name in os.listdir(path):
107
+ img_path = os.path.join(path, img_name)
108
+ img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
109
+ if img is not None:
110
+ _, thresh = cv2.threshold(img, 50, 255, cv2.THRESH_BINARY_INV)
111
+ contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
112
+ spot_counts.append(len(contours))
113
+
114
+ # Plot distribusi jumlah spot
115
+ plt.hist(spot_counts, bins=20, alpha=0.7, label=f"{skin_type}")
116
+ plt.title(f"Spot Distribution for {skin_type}")
117
+ plt.xlabel("Number of Spots")
118
+ plt.ylabel("Frequency")
119
+ st.pyplot(plt)
120
+ plt.clf()
121
+
122
+ st.write("Insight: Pores and spot distribution varies across skin types.")
123
+
124
+ if __name__ == "__main__":
125
+ run()