EzekielMW commited on
Commit
78ee49c
·
verified ·
1 Parent(s): bc4f170

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +161 -78
app.py CHANGED
@@ -7,119 +7,202 @@ from sklearn.decomposition import PCA
7
  from scipy.signal import savgol_filter
8
  from math import pi
9
 
 
10
  plt.switch_backend('agg')
11
 
12
  # Load dataset
13
  df = pd.read_csv("milk_absorbance.csv")
14
  df.rename(columns={df.columns[0]: 'Label'}, inplace=True)
15
 
16
- # Main plot generator
17
  def plot_all():
18
  plots = []
19
 
20
- # --- Previous 6 plots (shortened for clarity) ---
21
- # [Same code from the earlier version to generate 6 plots]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
- # ---------- New Plot Group 1: Score + Loadings (2 Subplots) ----------
24
  fig7, axs = plt.subplots(1, 2, figsize=(14, 5))
25
- wavelengths = df.columns[1:]
26
- labels = df['Label']
27
  data = df.iloc[:, 1:].values.astype(float)
28
-
29
- deriv = np.diff(data, axis=1)
30
  scaler = StandardScaler()
31
- norm_deriv = scaler.fit_transform(deriv)
32
- deriv_cols = [f'Der_{w1}-{w2}' for w1, w2 in zip(wavelengths[:-1], wavelengths[1:])]
33
- processed_df = pd.DataFrame(norm_deriv, columns=deriv_cols)
34
  processed_df.insert(0, 'Label', labels)
35
-
36
- X_proc = processed_df.drop('Label', axis=1)
37
- y_proc = processed_df['Label']
38
-
39
  pca = PCA(n_components=2)
40
- pcs = pca.fit_transform(X_proc)
41
- pca_df = pd.DataFrame(pcs, columns=['PC1', 'PC2'])
42
- pca_df['Label'] = y_proc.reset_index(drop=True)
43
-
44
- cmap = plt.cm.get_cmap('tab10', len(pca_df['Label'].unique()))
45
- for i, target in enumerate(pca_df['Label'].unique()):
46
  idx = pca_df['Label'] == target
47
- axs[0].scatter(pca_df.loc[idx, 'PC1'], pca_df.loc[idx, 'PC2'], color=cmap(i), label=f"Label {target}", s=40)
48
- axs[0].set_title("Score Plot: PC1 vs PC2")
49
- axs[0].set_xlabel("PC1")
50
- axs[0].set_ylabel("PC2")
51
  axs[0].legend()
52
  axs[0].grid()
53
-
54
  loadings = pca.components_.T
55
- axs[1].plot(range(len(X_proc.columns)), loadings[:, 0], label='PC1 Loadings')
56
- axs[1].plot(range(len(X_proc.columns)), loadings[:, 1], label='PC2 Loadings', color='black')
57
- axs[1].set_title("Loadings Plot")
58
- axs[1].set_xlabel("Feature Index")
59
- axs[1].set_ylabel("Loading Value")
60
  axs[1].legend()
61
  axs[1].grid()
62
  plt.tight_layout()
63
  plots.append(fig7)
64
 
65
- # ---------- New Plot Group 2: 3x2 PCA Analysis ----------
66
  fig8, axs = plt.subplots(3, 2, figsize=(16, 14))
67
-
68
- raw = data
69
- raw_scaled = scaler.fit_transform(raw)
70
- der_scaled = scaler.fit_transform(deriv)
71
-
72
  pca_raw = PCA(n_components=10)
73
  pca_raw_scores = pca_raw.fit_transform(raw_scaled)
74
- exp_var_raw = np.cumsum(pca_raw.explained_variance_ratio_) * 100
75
-
76
  pca_der = PCA(n_components=10)
77
- pca_der_scores = pca_der.fit_transform(der_scaled)
78
- exp_var_der = np.cumsum(pca_der.explained_variance_ratio_) * 100
79
-
80
- for i, target in enumerate(np.unique(labels)):
 
81
  idx = labels == target
82
- axs[0, 0].scatter(pca_raw_scores[idx, 0], pca_raw_scores[idx, 1], label=f'Milk {target}', color=cmap(i))
83
- axs[0, 1].scatter(pca_der_scores[idx, 0], pca_der_scores[idx, 1], label=f'Milk {target}', color=cmap(i))
84
-
85
- axs[0, 0].set_title("Raw PCA Score Plot")
86
- axs[0, 1].set_title("1st Derivative PCA Score Plot")
87
- axs[1, 0].plot(range(len(wavelengths)), pca_raw.components_[0], label='PC1')
88
- axs[1, 0].plot(range(len(wavelengths)), pca_raw.components_[1], label='PC2')
89
- axs[1, 1].plot(range(len(deriv_cols)), pca_der.components_[0], label='PC1')
90
- axs[1, 1].plot(range(len(deriv_cols)), pca_der.components_[1], label='PC2')
91
- axs[2, 0].plot(range(1, 11), exp_var_raw, marker='o')
92
- axs[2, 1].plot(range(1, 11), exp_var_der, marker='o')
93
-
94
- for ax in axs.flat:
95
- ax.grid(True)
96
-
97
- axs[0, 0].legend()
98
- axs[0, 1].legend()
99
- axs[1, 0].set_title("Raw Loadings")
100
- axs[1, 1].set_title("Derivative Loadings")
101
- axs[2, 0].set_title("Raw Scree")
102
- axs[2, 1].set_title("Derivative Scree")
103
  plt.tight_layout()
104
  plots.append(fig8)
105
 
106
  return plots
107
 
108
- # Gradio UI with tabs
109
  with gr.Blocks() as demo:
110
  gr.Markdown("# 🧪 Dataset Description")
111
- with gr.Tab("Preview Raw Data"):
112
- gr.DataFrame(df.head(50), label="Milk Absorbance Data")
113
- with gr.Tab("Visualizations"):
114
- plot_btn = gr.Button("Generate All Visualizations")
115
- plot1 = gr.Plot()
116
- plot2 = gr.Plot()
117
- plot3 = gr.Plot()
118
- plot4 = gr.Plot()
119
- plot5 = gr.Plot()
120
- plot6 = gr.Plot()
121
- plot7 = gr.Plot()
122
- plot8 = gr.Plot()
123
- plot_btn.click(plot_all, inputs=[], outputs=[plot1, plot2, plot3, plot4, plot5, plot6, plot7, plot8])
124
 
125
  demo.launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False)
 
7
  from scipy.signal import savgol_filter
8
  from math import pi
9
 
10
+ # Ensure interactive backend for plotting
11
  plt.switch_backend('agg')
12
 
13
  # Load dataset
14
  df = pd.read_csv("milk_absorbance.csv")
15
  df.rename(columns={df.columns[0]: 'Label'}, inplace=True)
16
 
17
+ # Gradio plot function
18
  def plot_all():
19
  plots = []
20
 
21
+ # Plot 1: Mean Spectra per Class
22
+ fig1 = plt.figure(figsize=(12, 6))
23
+ for label in df['Label'].unique():
24
+ class_df = df[df['Label'] == label]
25
+ mean_spectrum = class_df.iloc[:, 1:].mean()
26
+ plt.plot(mean_spectrum.index.astype(int), mean_spectrum, label=f'Label {label}')
27
+ plt.title('Mean NIR Spectrum per Milk Ratio Class')
28
+ plt.xlabel('Wavelength (nm)')
29
+ plt.ylabel('Absorbance')
30
+ plt.legend(title='Class (Milk Ratio)')
31
+ plt.grid(True)
32
+ plt.tight_layout()
33
+ plots.append(fig1)
34
+
35
+ # Plot 2: Offset Mean Spectra
36
+ fig2 = plt.figure(figsize=(12, 6))
37
+ offset_step = 0.1
38
+ for i, label in enumerate(df['Label'].unique()):
39
+ class_df = df[df['Label'] == label]
40
+ mean_spectrum = class_df.iloc[:, 1:].mean()
41
+ offset = i * offset_step
42
+ plt.plot(mean_spectrum.index.astype(int), mean_spectrum + offset, label=f'Label {label}')
43
+ plt.title('Mean NIR Spectrum per Milk Ratio Class (with Offset)')
44
+ plt.xlabel('Wavelength (nm)')
45
+ plt.ylabel('Absorbance (Offset Applied)')
46
+ plt.legend(title='Class (Milk Ratio)')
47
+ plt.grid(True)
48
+ plt.tight_layout()
49
+ plots.append(fig2)
50
+
51
+ # Plot 3: Radar Plot
52
+ fig3 = plt.figure(figsize=(8, 8))
53
+ ax = plt.subplot(111, polar=True)
54
+ subset_cols = df.columns[1:][::20]
55
+ labels = df['Label'].unique()
56
+ N = len(subset_cols)
57
+ angles = [n / float(N) * 2 * pi for n in range(N)] + [0]
58
+ for label in labels:
59
+ class_df = df[df['Label'] == label]
60
+ mean_spectrum = class_df[subset_cols].mean().values
61
+ values = mean_spectrum.tolist() + [mean_spectrum[0]]
62
+ ax.plot(angles, values, label=f'Label {label}')
63
+ ax.fill(angles, values, alpha=0.1)
64
+ ax.set_xticks(angles[:-1])
65
+ ax.set_xticklabels(subset_cols.astype(int))
66
+ plt.title('Radar Plot of Mean Spectra (Subset Wavelengths)')
67
+ plt.legend(loc='upper right', bbox_to_anchor=(1.3, 1.1))
68
+ plt.tight_layout()
69
+ plots.append(fig3)
70
+
71
+ # Plot 4: Cumulative PCA Explained Variance
72
+ fig4 = plt.figure(figsize=(8, 5))
73
+ X = df.iloc[:, 1:].values
74
+ X_scaled = StandardScaler().fit_transform(X)
75
+ pca = PCA(n_components=20)
76
+ pca.fit(X_scaled)
77
+ explained = np.cumsum(pca.explained_variance_ratio_)
78
+ plt.plot(range(1, 21), explained, marker='o')
79
+ plt.axhline(y=0.95, color='r', linestyle='--', label='95% Variance')
80
+ plt.title('Cumulative Explained Variance by PCA')
81
+ plt.xlabel('Number of Principal Components')
82
+ plt.ylabel('Cumulative Variance')
83
+ plt.legend()
84
+ plt.grid(True)
85
+ plt.tight_layout()
86
+ plots.append(fig4)
87
+
88
+ # Plot 5: Derivative + Normalized Spectra
89
+ fig5 = plt.figure(figsize=(16, 8))
90
+ y_vals = df['Label'].values
91
+ wavelengths = df.columns[1:].astype(float)
92
+ X = df.iloc[:, 1:].values
93
+ X_deriv = savgol_filter(X, window_length=25, polyorder=5, deriv=1, axis=1)
94
+ scaler = MinMaxScaler()
95
+ X_deriv_norm = np.array([scaler.fit_transform(row.reshape(-1, 1)).flatten() for row in X_deriv])
96
+ unique_labels = np.unique(y_vals)
97
+ colors = plt.cm.tab10(np.linspace(0, 1, len(unique_labels)))
98
+ for label, color in zip(unique_labels, colors):
99
+ indices = np.where(y_vals == label)[0]
100
+ for i in indices:
101
+ plt.plot(wavelengths, X_deriv_norm[i], color=color, alpha=0.3, label=f'Milk {label}' if i == indices[0] else '')
102
+ plt.title("All Spectra After First Derivative + Normalization")
103
+ plt.xlabel("Wavelength (nm)")
104
+ plt.ylabel("Normalized First Derivative")
105
+ plt.legend(title="Group")
106
+ plt.grid(True)
107
+ plt.tight_layout()
108
+ plots.append(fig5)
109
+
110
+ # Plot 6: Derivative Only (No Norm)
111
+ fig6 = plt.figure(figsize=(16, 8))
112
+ for label, color in zip(unique_labels, colors):
113
+ indices = np.where(y_vals == label)[0]
114
+ for i in indices:
115
+ plt.plot(wavelengths, X_deriv[i], color=color, alpha=0.3, label=f'Milk {label}' if i == indices[0] else '')
116
+ plt.title("All Spectra After First Derivative (No Normalization)")
117
+ plt.xlabel("Wavelength (nm)")
118
+ plt.ylabel("First Derivative Absorbance")
119
+ plt.legend(title="Group")
120
+ plt.grid(True)
121
+ plt.tight_layout()
122
+ plots.append(fig6)
123
 
124
+ # Plot 7: Score + Loadings (side-by-side)
125
  fig7, axs = plt.subplots(1, 2, figsize=(14, 5))
126
+ wavelength_columns = df.columns[1:]
127
+ labels = df.iloc[:, 0]
128
  data = df.iloc[:, 1:].values.astype(float)
129
+ derivative_data = np.diff(data, axis=1)
 
130
  scaler = StandardScaler()
131
+ normalized_derivative_data = scaler.fit_transform(derivative_data)
132
+ derivative_wavelength_columns = [f'Der_{w1}-{w2}' for w1, w2 in zip(wavelength_columns[:-1], wavelength_columns[1:])]
133
+ processed_df = pd.DataFrame(normalized_derivative_data, columns=derivative_wavelength_columns)
134
  processed_df.insert(0, 'Label', labels)
135
+ processed_df['Label'] = processed_df['Label'].astype(int)
136
+ X_processed = processed_df.drop('Label', axis=1)
137
+ y_processed = processed_df['Label']
 
138
  pca = PCA(n_components=2)
139
+ principal_components = pca.fit_transform(X_processed)
140
+ pca_df = pd.DataFrame(data=principal_components, columns=['PC1', 'PC2'])
141
+ pca_df['Label'] = y_processed.reset_index(drop=True)
142
+ targets = y_processed.unique()
143
+ cmap = plt.colormaps.get_cmap('tab10', len(targets))
144
+ for i, target in enumerate(targets):
145
  idx = pca_df['Label'] == target
146
+ axs[0].scatter(pca_df.loc[idx, 'PC1'], pca_df.loc[idx, 'PC2'], color=cmap(i), label=f'Label {target}')
147
+ axs[0].set_title('Score Plot: PC1 vs. PC2')
 
 
148
  axs[0].legend()
149
  axs[0].grid()
 
150
  loadings = pca.components_.T
151
+ axs[1].plot(loadings[:, 0], label='PC1 Loadings')
152
+ axs[1].plot(loadings[:, 1], label='PC2 Loadings', color='black')
153
+ axs[1].set_title('Loadings Plot')
 
 
154
  axs[1].legend()
155
  axs[1].grid()
156
  plt.tight_layout()
157
  plots.append(fig7)
158
 
159
+ # Plot 8: 3x2 PCA Summary
160
  fig8, axs = plt.subplots(3, 2, figsize=(16, 14))
161
+ raw_data = df.iloc[:, 1:].values.astype(float)
162
+ derivative_data = np.diff(raw_data, axis=1)
163
+ scaler = StandardScaler()
164
+ raw_scaled = scaler.fit_transform(raw_data)
165
+ derivative_scaled = scaler.fit_transform(derivative_data)
166
  pca_raw = PCA(n_components=10)
167
  pca_raw_scores = pca_raw.fit_transform(raw_scaled)
168
+ explained_var_raw = np.cumsum(pca_raw.explained_variance_ratio_) * 100
 
169
  pca_der = PCA(n_components=10)
170
+ pca_der_scores = pca_der.fit_transform(derivative_scaled)
171
+ explained_var_der = np.cumsum(pca_der.explained_variance_ratio_) * 100
172
+ targets = np.unique(labels)
173
+ cmap = plt.colormaps.get_cmap('tab10', len(targets))
174
+ for i, target in enumerate(targets):
175
  idx = labels == target
176
+ axs[0, 0].scatter(pca_raw_scores[idx, 0], pca_raw_scores[idx, 1], s=40, label=f'Milk {target}', color=cmap(i))
177
+ axs[0, 1].scatter(pca_der_scores[idx, 0], pca_der_scores[idx, 1], s=40, label=f'Milk {target}', color=cmap(i))
178
+ axs[0, 0].set_title('Raw Data: PCA Score Plot')
179
+ axs[0, 1].set_title('1st Derivative: PCA Score Plot')
180
+ axs[1, 0].plot(pca_raw.components_[0], label='PC1')
181
+ axs[1, 0].plot(pca_raw.components_[1], label='PC2')
182
+ axs[1, 1].plot(pca_der.components_[0], label='PC1')
183
+ axs[1, 1].plot(pca_der.components_[1], label='PC2')
184
+ axs[2, 0].plot(range(1, 11), explained_var_raw, marker='o')
185
+ axs[2, 1].plot(range(1, 11), explained_var_der, marker='o')
186
+ axs[0, 0].legend(); axs[0, 1].legend()
187
+ axs[1, 0].legend(); axs[1, 1].legend()
188
+ axs[2, 0].set_ylim(0, 105)
189
+ axs[2, 1].set_ylim(0, 105)
190
+ axs[2, 0].set_title('Raw Data: Scree Plot')
191
+ axs[2, 1].set_title('1st Derivative: Scree Plot')
 
 
 
 
 
192
  plt.tight_layout()
193
  plots.append(fig8)
194
 
195
  return plots
196
 
197
+ # Gradio UI
198
  with gr.Blocks() as demo:
199
  gr.Markdown("# 🧪 Dataset Description")
200
+ with gr.Tabs():
201
+ with gr.Tab("Preview Raw Data"):
202
+ gr.DataFrame(df.head(50), label="Preview of Raw Data")
203
+ with gr.Tab("Visualizations"):
204
+ plot_button = gr.Button("Generate Spectroscopy Visualizations")
205
+ out_gallery = [gr.Plot() for _ in range(8)]
206
+ plot_button.click(fn=plot_all, inputs=[], outputs=out_gallery)
 
 
 
 
 
 
207
 
208
  demo.launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False)