EzekielMW commited on
Commit
9e8b97d
·
verified ·
1 Parent(s): 76f2067

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +159 -203
app.py CHANGED
@@ -4,226 +4,182 @@ import numpy as np
4
  import matplotlib.pyplot as plt
5
  from sklearn.preprocessing import StandardScaler, MinMaxScaler
6
  from sklearn.decomposition import PCA
 
 
 
 
7
  from scipy.signal import savgol_filter
 
 
 
 
 
 
 
8
  from math import pi
9
- from matplotlib.cm import get_cmap # ✅ Import corrected colormap function
 
 
10
 
11
- # Ensure interactive backend for plotting
12
  plt.switch_backend('agg')
13
 
14
  # Load dataset
15
  df = pd.read_csv("milk_absorbance.csv")
16
  df.rename(columns={df.columns[0]: 'Label'}, inplace=True)
17
 
18
- def plot_all():
19
- plots = []
20
-
21
- # Plot 1: Mean Spectra per Class
22
- fig1 = plt.figure(figsize=(12, 6))
23
- for label in df['Label'].unique():
24
- class_df = df[df['Label'] == label]
25
- mean_spectrum = class_df.iloc[:, 1:].mean()
26
- plt.plot(mean_spectrum.index.astype(int), mean_spectrum, label=f'Label {label}')
27
- plt.title('Mean NIR Spectrum per Milk Ratio Class')
28
- plt.xlabel('Wavelength (nm)')
29
- plt.ylabel('Absorbance')
30
- plt.legend(title='Class (Milk Ratio)')
31
- plt.grid(True)
32
- plt.tight_layout()
33
- plots.append(fig1)
34
- plt.close(fig1)
35
-
36
- # Plot 2: Offset Mean Spectra
37
- fig2 = plt.figure(figsize=(12, 6))
38
- offset_step = 0.1
39
- for i, label in enumerate(df['Label'].unique()):
40
- class_df = df[df['Label'] == label]
41
- mean_spectrum = class_df.iloc[:, 1:].mean()
42
- offset = i * offset_step
43
- plt.plot(mean_spectrum.index.astype(int), mean_spectrum + offset, label=f'Label {label}')
44
- plt.title('Mean NIR Spectrum per Milk Ratio Class (with Offset)')
45
- plt.xlabel('Wavelength (nm)')
46
- plt.ylabel('Absorbance (Offset Applied)')
47
- plt.legend(title='Class (Milk Ratio)')
48
- plt.grid(True)
49
- plt.tight_layout()
50
- plots.append(fig2)
51
- plt.close(fig2)
52
-
53
- # Plot 3: Radar Plot
54
- fig3 = plt.figure(figsize=(8, 8))
55
- ax = plt.subplot(111, polar=True)
56
- subset_cols = df.columns[1:][::20]
57
- labels = df['Label'].unique()
58
- N = len(subset_cols)
59
- angles = [n / float(N) * 2 * pi for n in range(N)] + [0]
60
- for label in labels:
61
- class_df = df[df['Label'] == label]
62
- mean_spectrum = class_df[subset_cols].mean().values
63
- values = mean_spectrum.tolist() + [mean_spectrum[0]]
64
- ax.plot(angles, values, label=f'Label {label}')
65
- ax.fill(angles, values, alpha=0.1)
66
- ax.set_xticks(angles[:-1])
67
- ax.set_xticklabels(subset_cols.astype(int))
68
- plt.title('Radar Plot of Mean Spectra (Subset Wavelengths)')
69
- plt.legend(loc='upper right', bbox_to_anchor=(1.3, 1.1))
70
- plt.tight_layout()
71
- plots.append(fig3)
72
- plt.close(fig3)
73
-
74
- # Plot 4: Cumulative PCA Explained Variance
75
- fig4 = plt.figure(figsize=(8, 5))
76
- X = df.iloc[:, 1:].values
77
- X_scaled = StandardScaler().fit_transform(X)
78
- pca = PCA(n_components=20)
79
- pca.fit(X_scaled)
80
- explained = np.cumsum(pca.explained_variance_ratio_)
81
- plt.plot(range(1, 21), explained, marker='o')
82
- plt.axhline(y=0.95, color='r', linestyle='--', label='95% Variance')
83
- plt.title('Cumulative Explained Variance by PCA')
84
- plt.xlabel('Number of Principal Components')
85
- plt.ylabel('Cumulative Variance')
86
- plt.legend()
87
- plt.grid(True)
88
- plt.tight_layout()
89
- plots.append(fig4)
90
- plt.close(fig4)
91
-
92
- # Plot 5: Derivative + Normalized Spectra
93
- fig5 = plt.figure(figsize=(16, 8))
94
- y_vals = df['Label'].values
95
- wavelengths = df.columns[1:].astype(float)
96
- X = df.iloc[:, 1:].values
97
- X_deriv = savgol_filter(X, window_length=25, polyorder=5, deriv=1, axis=1)
98
- scaler = MinMaxScaler()
99
- X_deriv_norm = np.array([scaler.fit_transform(row.reshape(-1, 1)).flatten() for row in X_deriv])
100
- unique_labels = np.unique(y_vals)
101
- colors = get_cmap('tab10')(np.linspace(0, 1, len(unique_labels)))
102
- for label, color in zip(unique_labels, colors):
103
- indices = np.where(y_vals == label)[0]
104
- for i in indices:
105
- plt.plot(wavelengths, X_deriv_norm[i], color=color, alpha=0.3, label=f'Milk {label}' if i == indices[0] else '')
106
- plt.title("All Spectra After First Derivative + Normalization")
107
- plt.xlabel("Wavelength (nm)")
108
- plt.ylabel("Normalized First Derivative")
109
- plt.legend(title="Group")
110
- plt.grid(True)
111
- plt.tight_layout()
112
- plots.append(fig5)
113
- plt.close(fig5)
114
-
115
- # Plot 6: Derivative Only (No Norm)
116
- fig6 = plt.figure(figsize=(16, 8))
117
- for label, color in zip(unique_labels, colors):
118
- indices = np.where(y_vals == label)[0]
119
- for i in indices:
120
- plt.plot(wavelengths, X_deriv[i], color=color, alpha=0.3, label=f'Milk {label}' if i == indices[0] else '')
121
- plt.title("All Spectra After First Derivative (No Normalization)")
122
- plt.xlabel("Wavelength (nm)")
123
- plt.ylabel("First Derivative Absorbance")
124
- plt.legend(title="Group")
125
- plt.grid(True)
126
- plt.tight_layout()
127
- plots.append(fig6)
128
- plt.close(fig6)
129
-
130
- # Plot 7: Score + Loadings
131
- fig7, axs = plt.subplots(1, 2, figsize=(14, 5))
132
- wavelength_columns = df.columns[1:]
133
- labels = df.iloc[:, 0]
134
- data = df.iloc[:, 1:].values.astype(float)
135
- derivative_data = np.diff(data, axis=1)
136
  scaler = StandardScaler()
137
- normalized_derivative_data = scaler.fit_transform(derivative_data)
138
- derivative_wavelength_columns = [f'Der_{w1}-{w2}' for w1, w2 in zip(wavelength_columns[:-1], wavelength_columns[1:])]
139
- processed_df = pd.DataFrame(normalized_derivative_data, columns=derivative_wavelength_columns)
140
- processed_df.insert(0, 'Label', labels)
141
- processed_df['Label'] = processed_df['Label'].astype(int)
142
- X_processed = processed_df.drop('Label', axis=1)
143
- y_processed = processed_df['Label']
144
  pca = PCA(n_components=2)
145
- principal_components = pca.fit_transform(X_processed)
146
- pca_df = pd.DataFrame(data=principal_components, columns=['PC1', 'PC2'])
147
- pca_df['Label'] = y_processed.reset_index(drop=True)
148
- targets = y_processed.unique()
149
- cmap = get_cmap('tab10')
150
- for i, target in enumerate(targets):
151
- idx = pca_df['Label'] == target
152
- axs[0].scatter(pca_df.loc[idx, 'PC1'], pca_df.loc[idx, 'PC2'], color=cmap(i % cmap.N), label=f'Label {target}')
153
- axs[0].set_title('Score Plot: PC1 vs. PC2')
154
- axs[0].legend()
155
- axs[0].grid()
156
- loadings = pca.components_.T
157
- axs[1].plot(loadings[:, 0], label='PC1 Loadings')
158
- axs[1].plot(loadings[:, 1], label='PC2 Loadings', color='black')
159
- axs[1].set_title('Loadings Plot')
160
- axs[1].legend()
161
- axs[1].grid()
162
- plt.tight_layout()
163
- plots.append(fig7)
164
- plt.close(fig7)
165
-
166
- # Plot 8: 3x2 PCA Summary
167
- fig8, axs = plt.subplots(3, 2, figsize=(16, 14))
168
- raw_data = df.iloc[:, 1:].values.astype(float)
169
- derivative_data = np.diff(raw_data, axis=1)
170
- scaler = StandardScaler()
171
- raw_scaled = scaler.fit_transform(raw_data)
172
- derivative_scaled = scaler.fit_transform(derivative_data)
173
- pca_raw = PCA(n_components=10)
174
- pca_raw_scores = pca_raw.fit_transform(raw_scaled)
175
- explained_var_raw = np.cumsum(pca_raw.explained_variance_ratio_) * 100
176
- pca_der = PCA(n_components=10)
177
- pca_der_scores = pca_der.fit_transform(derivative_scaled)
178
- explained_var_der = np.cumsum(pca_der.explained_variance_ratio_) * 100
179
- targets = np.unique(labels)
180
- cmap = get_cmap('tab10')
181
- for i, target in enumerate(targets):
182
- idx = labels == target
183
- axs[0, 0].scatter(pca_raw_scores[idx, 0], pca_raw_scores[idx, 1], s=40, label=f'Milk {target}', color=cmap(i % cmap.N))
184
- axs[0, 0].axhline(0, color='gray', linestyle='--', linewidth=2) # Horizontal
185
- axs[0, 0].axvline(0, color='gray', linestyle='--', linewidth=2) # Vertical
186
- axs[0, 1].scatter(pca_der_scores[idx, 0], pca_der_scores[idx, 1], s=40, label=f'Milk {target}', color=cmap(i % cmap.N))
187
- axs[0, 1].axhline(0, color='gray', linestyle='--', linewidth=2) # Horizontal
188
- axs[0, 1].axvline(0, color='gray', linestyle='--', linewidth=2) # Vertical
189
- axs[0, 0].set_title('Raw Data: PCA Score Plot')
190
- axs[0, 1].set_title('1st Derivative: PCA Score Plot')
191
-
192
-
193
- # Row 2: PCA Loadings for Raw and Derivative (with horizontal and vertical lines at 0)
194
- axs[1, 0].plot(pca_raw.components_[0], label='PC1')
195
- axs[1, 0].plot(pca_raw.components_[1], label='PC2')
196
- axs[1, 0].axhline(0, color='gray', linestyle='--', linewidth=2) # Horizontal
197
- axs[1, 0].axvline(0, color='gray', linestyle='--', linewidth=2) # Vertical
198
-
199
- axs[1, 1].plot(pca_der.components_[0], label='PC1')
200
- axs[1, 1].plot(pca_der.components_[1], label='PC2')
201
- axs[1, 1].axhline(0, color='gray', linestyle='--', linewidth=2) # Horizontal
202
- axs[1, 1].axvline(0, color='gray', linestyle='--', linewidth=2) # Vertical
203
-
204
- axs[2, 0].plot(range(1, 11), explained_var_raw, marker='o')
205
- axs[2, 1].plot(range(1, 11), explained_var_der, marker='o')
206
- axs[0, 0].legend(); axs[0, 1].legend()
207
- axs[1, 0].legend(); axs[1, 1].legend()
208
- axs[2, 0].set_ylim(0, 105)
209
- axs[2, 1].set_ylim(0, 105)
210
- axs[2, 0].set_title('Raw Data: Scree Plot')
211
- axs[2, 1].set_title('1st Derivative: Scree Plot')
212
- plt.tight_layout()
213
- plots.append(fig8)
214
- plt.close(fig8)
215
-
216
- return plots
217
-
218
- # Gradio UI
 
 
 
 
 
 
 
 
 
 
 
 
219
  with gr.Blocks() as demo:
220
- gr.Markdown("# 🧪 Dataset Description")
221
  with gr.Tabs():
222
- with gr.Tab("Preview Raw Data"):
223
  gr.DataFrame(df.head(50), label="Preview of Raw Data")
 
224
  with gr.Tab("Visualizations"):
225
  plot_button = gr.Button("Generate Spectroscopy Visualizations")
226
  out_gallery = [gr.Plot() for _ in range(8)]
227
  plot_button.click(fn=plot_all, inputs=[], outputs=out_gallery)
228
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
229
  demo.launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False)
 
4
  import matplotlib.pyplot as plt
5
  from sklearn.preprocessing import StandardScaler, MinMaxScaler
6
  from sklearn.decomposition import PCA
7
+ from sklearn.model_selection import train_test_split
8
+ from sklearn.ensemble import RandomForestClassifier
9
+ from sklearn.tree import DecisionTreeClassifier
10
+ from sklearn.metrics import accuracy_score, confusion_matrix
11
  from scipy.signal import savgol_filter
12
+ from tensorflow.keras.models import Sequential
13
+ from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout
14
+ from tensorflow.keras.utils import to_categorical
15
+ from tensorflow.keras.callbacks import History
16
+ import seaborn as sns
17
+ import io
18
+ import os
19
  from math import pi
20
+ from matplotlib.cm import get_cmap
21
+ import warnings
22
+ warnings.filterwarnings("ignore")
23
 
 
24
  plt.switch_backend('agg')
25
 
26
  # Load dataset
27
  df = pd.read_csv("milk_absorbance.csv")
28
  df.rename(columns={df.columns[0]: 'Label'}, inplace=True)
29
 
30
+ # ===================== Helper Functions =========================
31
+ def compute_pca_data(df):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  scaler = StandardScaler()
33
+ features = df.iloc[:, 1:].values.astype(float)
34
+ features_scaled = scaler.fit_transform(features)
 
 
 
 
 
35
  pca = PCA(n_components=2)
36
+ pca_data = pca.fit_transform(features_scaled)
37
+ return pca_data, df['Label'].values
38
+
39
+ def train_model_on_pca(model_name):
40
+ X, y = compute_pca_data(df)
41
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y)
42
+ if model_name == "Random Forest":
43
+ model = RandomForestClassifier(n_estimators=100)
44
+ elif model_name == "Decision Tree":
45
+ model = DecisionTreeClassifier()
46
+ train_accuracies, test_accuracies = [], []
47
+ for epoch in range(1, 11):
48
+ model.fit(X_train, y_train)
49
+ train_acc = accuracy_score(y_train, model.predict(X_train))
50
+ test_acc = accuracy_score(y_test, model.predict(X_test))
51
+ train_accuracies.append(train_acc)
52
+ test_accuracies.append(test_acc)
53
+ cm = confusion_matrix(y_test, model.predict(X_test))
54
+ return train_accuracies, test_accuracies, cm
55
+
56
+ def train_1d_cnn():
57
+ X = df.iloc[:, 1:].values.astype(float)
58
+ y = df['Label'].astype(int).values
59
+ X = X[:, :, np.newaxis] # Shape for Conv1D
60
+ y_cat = to_categorical(y)
61
+ X_train, X_test, y_train, y_test = train_test_split(X, y_cat, test_size=0.2, stratify=y)
62
+ model = Sequential([
63
+ Conv1D(32, kernel_size=5, activation='relu', input_shape=(X.shape[1], 1)),
64
+ MaxPooling1D(pool_size=2),
65
+ Flatten(),
66
+ Dense(64, activation='relu'),
67
+ Dropout(0.3),
68
+ Dense(y_cat.shape[1], activation='softmax')
69
+ ])
70
+ model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
71
+ history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, verbose=0)
72
+ cm = confusion_matrix(np.argmax(y_test, axis=1), np.argmax(model.predict(X_test), axis=1))
73
+ return history.history['accuracy'], history.history['val_accuracy'], cm
74
+
75
+ def create_plot(train_acc, test_acc):
76
+ fig, ax = plt.subplots()
77
+ ax.plot(range(1, 11), train_acc, label="Train Accuracy")
78
+ ax.plot(range(1, 11), test_acc, label="Test Accuracy")
79
+ ax.set_xlabel("Epoch")
80
+ ax.set_ylabel("Accuracy")
81
+ ax.set_title("Train vs Test Accuracy")
82
+ ax.legend()
83
+ return fig
84
+
85
+ def plot_confusion_matrix(cm):
86
+ fig, ax = plt.subplots()
87
+ sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", ax=ax)
88
+ ax.set_xlabel("Predicted")
89
+ ax.set_ylabel("True")
90
+ ax.set_title("Confusion Matrix")
91
+ return fig
92
+
93
+ def predict_model(input_df, model_name):
94
+ if model_name in ["Random Forest", "Decision Tree"]:
95
+ X, y = compute_pca_data(df)
96
+ if model_name == "Random Forest":
97
+ model = RandomForestClassifier(n_estimators=100)
98
+ else:
99
+ model = DecisionTreeClassifier()
100
+ model.fit(X, y)
101
+ input_pca, _ = compute_pca_data(input_df)
102
+ return model.predict(input_pca)
103
+ elif model_name == "1D CNN":
104
+ X = df.iloc[:, 1:].values.astype(float)
105
+ y = df['Label'].astype(int).values
106
+ X = X[:, :, np.newaxis]
107
+ y_cat = to_categorical(y)
108
+ model = Sequential([
109
+ Conv1D(32, kernel_size=5, activation='relu', input_shape=(X.shape[1], 1)),
110
+ MaxPooling1D(pool_size=2),
111
+ Flatten(),
112
+ Dense(64, activation='relu'),
113
+ Dropout(0.3),
114
+ Dense(y_cat.shape[1], activation='softmax')
115
+ ])
116
+ model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
117
+ model.fit(X, y_cat, epochs=10, verbose=0)
118
+ input_data = input_df.iloc[:, 1:].values.astype(float)[:, :, np.newaxis]
119
+ return np.argmax(model.predict(input_data), axis=1)
120
+
121
+ # ===================== Gradio UI =========================
122
  with gr.Blocks() as demo:
123
+ gr.Markdown("# 🧪 Milk Spectroscopy Analysis App")
124
  with gr.Tabs():
125
+ with gr.Tab("Dataset Description"):
126
  gr.DataFrame(df.head(50), label="Preview of Raw Data")
127
+
128
  with gr.Tab("Visualizations"):
129
  plot_button = gr.Button("Generate Spectroscopy Visualizations")
130
  out_gallery = [gr.Plot() for _ in range(8)]
131
  plot_button.click(fn=plot_all, inputs=[], outputs=out_gallery)
132
 
133
+ with gr.Tab("Models"):
134
+ with gr.Tabs():
135
+ with gr.Tab("Random Forest"):
136
+ rf_btn = gr.Button("Train Random Forest")
137
+ rf_table = gr.Dataframe(headers=["Epoch", "Train Acc", "Test Acc"])
138
+ rf_plot = gr.Plot()
139
+ rf_cm = gr.Plot()
140
+ def run_rf():
141
+ train_acc, test_acc, cm = train_model_on_pca("Random Forest")
142
+ table = pd.DataFrame({"Epoch": list(range(1, 11)), "Train Acc": train_acc, "Test Acc": test_acc})
143
+ return table, create_plot(train_acc, test_acc), plot_confusion_matrix(cm)
144
+ rf_btn.click(fn=run_rf, inputs=[], outputs=[rf_table, rf_plot, rf_cm])
145
+
146
+ with gr.Tab("Decision Tree"):
147
+ dt_btn = gr.Button("Train Decision Tree")
148
+ dt_table = gr.Dataframe(headers=["Epoch", "Train Acc", "Test Acc"])
149
+ dt_plot = gr.Plot()
150
+ dt_cm = gr.Plot()
151
+ def run_dt():
152
+ train_acc, test_acc, cm = train_model_on_pca("Decision Tree")
153
+ table = pd.DataFrame({"Epoch": list(range(1, 11)), "Train Acc": train_acc, "Test Acc": test_acc})
154
+ return table, create_plot(train_acc, test_acc), plot_confusion_matrix(cm)
155
+ dt_btn.click(fn=run_dt, inputs=[], outputs=[dt_table, dt_plot, dt_cm])
156
+
157
+ with gr.Tab("1D CNN (Raw Data)"):
158
+ cnn_btn = gr.Button("Train 1D CNN")
159
+ cnn_table = gr.Dataframe(headers=["Epoch", "Train Acc", "Test Acc"])
160
+ cnn_plot = gr.Plot()
161
+ cnn_cm = gr.Plot()
162
+ def run_cnn():
163
+ train_acc, test_acc, cm = train_1d_cnn()
164
+ table = pd.DataFrame({"Epoch": list(range(1, 11)), "Train Acc": train_acc, "Test Acc": test_acc})
165
+ return table, create_plot(train_acc, test_acc), plot_confusion_matrix(cm)
166
+ cnn_btn.click(fn=run_cnn, inputs=[], outputs=[cnn_table, cnn_plot, cnn_cm])
167
+
168
+ with gr.Tab("Prediction"):
169
+ model_dropdown = gr.Dropdown(choices=["Random Forest", "Decision Tree", "1D CNN"], label="Select Model")
170
+ input_type = gr.Radio(choices=["Single", "Multiple (CSV)"])
171
+ csv_input = gr.File(file_types=[".csv"], label="Upload CSV")
172
+ predict_btn = gr.Button("Predict")
173
+ output_df = gr.DataFrame()
174
+
175
+ def predict_fn(model_name, type_sel, file):
176
+ if type_sel == "Multiple (CSV)":
177
+ data = pd.read_csv(file.name)
178
+ else:
179
+ data = df.sample(1) # fallback dummy
180
+ preds = predict_model(data, model_name)
181
+ return pd.DataFrame({"Prediction": preds})
182
+
183
+ predict_btn.click(fn=predict_fn, inputs=[model_dropdown, input_type, csv_input], outputs=output_df)
184
+
185
  demo.launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False)