EzekielMW commited on
Commit
a6c3fc8
·
verified ·
1 Parent(s): 72e728b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +318 -143
app.py CHANGED
@@ -1,25 +1,23 @@
 
 
 
1
  import gradio as gr
2
  import pandas as pd
3
  import numpy as np
4
  import matplotlib.pyplot as plt
5
- from sklearn.preprocessing import StandardScaler, MinMaxScaler
6
- from sklearn.decomposition import PCA
7
- from sklearn.model_selection import train_test_split
8
  from sklearn.ensemble import RandomForestClassifier
9
  from sklearn.tree import DecisionTreeClassifier
 
 
10
  from sklearn.metrics import accuracy_score, confusion_matrix
11
  from scipy.signal import savgol_filter
12
- from tensorflow.keras.models import Sequential
13
- from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout
14
- from tensorflow.keras.utils import to_categorical
15
- from tensorflow.keras.callbacks import History
16
- import seaborn as sns
17
- import io
18
- import os
19
  from math import pi
20
  from matplotlib.cm import get_cmap
21
- import warnings
22
- warnings.filterwarnings("ignore")
 
 
 
23
 
24
  plt.switch_backend('agg')
25
 
@@ -27,102 +25,280 @@ plt.switch_backend('agg')
27
  df = pd.read_csv("milk_absorbance.csv")
28
  df.rename(columns={df.columns[0]: 'Label'}, inplace=True)
29
 
30
- # ===================== Helper Functions =========================
31
- def compute_pca_data(df):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  scaler = StandardScaler()
33
- features = df.iloc[:, 1:].values.astype(float)
34
- features_scaled = scaler.fit_transform(features)
 
 
 
 
 
35
  pca = PCA(n_components=2)
36
- pca_data = pca.fit_transform(features_scaled)
37
- return pca_data, df['Label'].values
38
-
39
- def train_model_on_pca(model_name):
40
- X, y = compute_pca_data(df)
41
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y)
42
- if model_name == "Random Forest":
43
- model = RandomForestClassifier(n_estimators=100)
44
- elif model_name == "Decision Tree":
45
- model = DecisionTreeClassifier()
46
- train_accuracies, test_accuracies = [], []
47
- for epoch in range(1, 11):
48
- model.fit(X_train, y_train)
49
- train_acc = accuracy_score(y_train, model.predict(X_train))
50
- test_acc = accuracy_score(y_test, model.predict(X_test))
51
- train_accuracies.append(train_acc)
52
- test_accuracies.append(test_acc)
53
- cm = confusion_matrix(y_test, model.predict(X_test))
54
- return train_accuracies, test_accuracies, cm
55
-
56
- def train_1d_cnn():
57
- X = df.iloc[:, 1:].values.astype(float)
58
- y = df['Label'].astype(int).values
59
- X = X[:, :, np.newaxis] # Shape for Conv1D
60
- y_cat = to_categorical(y)
61
- X_train, X_test, y_train, y_test = train_test_split(X, y_cat, test_size=0.2, stratify=y)
62
- model = Sequential([
63
- Conv1D(32, kernel_size=5, activation='relu', input_shape=(X.shape[1], 1)),
64
- MaxPooling1D(pool_size=2),
65
- Flatten(),
66
- Dense(64, activation='relu'),
67
- Dropout(0.3),
68
- Dense(y_cat.shape[1], activation='softmax')
69
- ])
70
- model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
71
- history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, verbose=0)
72
- cm = confusion_matrix(np.argmax(y_test, axis=1), np.argmax(model.predict(X_test), axis=1))
73
- return history.history['accuracy'], history.history['val_accuracy'], cm
74
-
75
- def create_plot(train_acc, test_acc):
76
- fig, ax = plt.subplots()
77
- ax.plot(range(1, 11), train_acc, label="Train Accuracy")
78
- ax.plot(range(1, 11), test_acc, label="Test Accuracy")
79
- ax.set_xlabel("Epoch")
80
- ax.set_ylabel("Accuracy")
81
- ax.set_title("Train vs Test Accuracy")
82
- ax.legend()
83
- return fig
84
-
85
- def plot_confusion_matrix(cm):
86
- fig, ax = plt.subplots()
87
- sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", ax=ax)
88
- ax.set_xlabel("Predicted")
89
- ax.set_ylabel("True")
90
- ax.set_title("Confusion Matrix")
91
- return fig
92
-
93
- def predict_model(input_df, model_name):
94
- if model_name in ["Random Forest", "Decision Tree"]:
95
- X, y = compute_pca_data(df)
96
- if model_name == "Random Forest":
97
- model = RandomForestClassifier(n_estimators=100)
98
- else:
99
- model = DecisionTreeClassifier()
100
- model.fit(X, y)
101
- input_pca, _ = compute_pca_data(input_df)
102
- return model.predict(input_pca)
103
- elif model_name == "1D CNN":
104
- X = df.iloc[:, 1:].values.astype(float)
105
- y = df['Label'].astype(int).values
106
- X = X[:, :, np.newaxis]
107
- y_cat = to_categorical(y)
108
- model = Sequential([
109
- Conv1D(32, kernel_size=5, activation='relu', input_shape=(X.shape[1], 1)),
110
- MaxPooling1D(pool_size=2),
111
- Flatten(),
112
- Dense(64, activation='relu'),
113
- Dropout(0.3),
114
- Dense(y_cat.shape[1], activation='softmax')
115
- ])
116
- model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
117
- model.fit(X, y_cat, epochs=10, verbose=0)
118
- input_data = input_df.iloc[:, 1:].values.astype(float)[:, :, np.newaxis]
119
- return np.argmax(model.predict(input_data), axis=1)
120
-
121
- # ===================== Gradio UI =========================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  with gr.Blocks() as demo:
123
- gr.Markdown("# 🧪 Milk Spectroscopy Analysis App")
124
  with gr.Tabs():
125
- with gr.Tab("Dataset Description"):
126
  gr.DataFrame(df.head(50), label="Preview of Raw Data")
127
 
128
  with gr.Tab("Visualizations"):
@@ -133,53 +309,52 @@ with gr.Blocks() as demo:
133
  with gr.Tab("Models"):
134
  with gr.Tabs():
135
  with gr.Tab("Random Forest"):
136
- rf_btn = gr.Button("Train Random Forest")
137
- rf_table = gr.Dataframe(headers=["Epoch", "Train Acc", "Test Acc"])
138
- rf_plot = gr.Plot()
139
- rf_cm = gr.Plot()
140
- def run_rf():
141
- train_acc, test_acc, cm = train_model_on_pca("Random Forest")
142
- table = pd.DataFrame({"Epoch": list(range(1, 11)), "Train Acc": train_acc, "Test Acc": test_acc})
143
- return table, create_plot(train_acc, test_acc), plot_confusion_matrix(cm)
144
- rf_btn.click(fn=run_rf, inputs=[], outputs=[rf_table, rf_plot, rf_cm])
145
 
146
  with gr.Tab("Decision Tree"):
147
- dt_btn = gr.Button("Train Decision Tree")
148
- dt_table = gr.Dataframe(headers=["Epoch", "Train Acc", "Test Acc"])
149
- dt_plot = gr.Plot()
150
- dt_cm = gr.Plot()
151
- def run_dt():
152
- train_acc, test_acc, cm = train_model_on_pca("Decision Tree")
153
- table = pd.DataFrame({"Epoch": list(range(1, 11)), "Train Acc": train_acc, "Test Acc": test_acc})
154
- return table, create_plot(train_acc, test_acc), plot_confusion_matrix(cm)
155
- dt_btn.click(fn=run_dt, inputs=[], outputs=[dt_table, dt_plot, dt_cm])
156
 
157
  with gr.Tab("1D CNN (Raw Data)"):
158
- cnn_btn = gr.Button("Train 1D CNN")
159
- cnn_table = gr.Dataframe(headers=["Epoch", "Train Acc", "Test Acc"])
160
- cnn_plot = gr.Plot()
161
- cnn_cm = gr.Plot()
162
- def run_cnn():
163
- train_acc, test_acc, cm = train_1d_cnn()
164
- table = pd.DataFrame({"Epoch": list(range(1, 11)), "Train Acc": train_acc, "Test Acc": test_acc})
165
- return table, create_plot(train_acc, test_acc), plot_confusion_matrix(cm)
166
- cnn_btn.click(fn=run_cnn, inputs=[], outputs=[cnn_table, cnn_plot, cnn_cm])
167
 
168
  with gr.Tab("Prediction"):
169
- model_dropdown = gr.Dropdown(choices=["Random Forest", "Decision Tree", "1D CNN"], label="Select Model")
170
- input_type = gr.Radio(choices=["Single", "Multiple (CSV)"])
171
- csv_input = gr.File(file_types=[".csv"], label="Upload CSV")
172
- predict_btn = gr.Button("Predict")
173
- output_df = gr.DataFrame()
174
 
175
- def predict_fn(model_name, type_sel, file):
176
- if type_sel == "Multiple (CSV)":
177
- data = pd.read_csv(file.name)
 
 
 
 
 
 
 
178
  else:
179
- data = df.sample(1) # fallback dummy
180
- preds = predict_model(data, model_name)
181
- return pd.DataFrame({"Prediction": preds})
 
182
 
183
- predict_btn.click(fn=predict_fn, inputs=[model_dropdown, input_type, csv_input], outputs=output_df)
 
184
 
 
185
  demo.launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False)
 
1
+ # ✅ FULL INTEGRATED SCRIPT
2
+ # Includes your existing visualizations + new Models and Prediction tabs
3
+
4
  import gradio as gr
5
  import pandas as pd
6
  import numpy as np
7
  import matplotlib.pyplot as plt
 
 
 
8
  from sklearn.ensemble import RandomForestClassifier
9
  from sklearn.tree import DecisionTreeClassifier
10
+ from sklearn.preprocessing import StandardScaler, MinMaxScaler
11
+ from sklearn.decomposition import PCA
12
  from sklearn.metrics import accuracy_score, confusion_matrix
13
  from scipy.signal import savgol_filter
 
 
 
 
 
 
 
14
  from math import pi
15
  from matplotlib.cm import get_cmap
16
+ import seaborn as sns
17
+ import torch
18
+ import torch.nn as nn
19
+ import torch.optim as optim
20
+ from torch.utils.data import DataLoader, Dataset
21
 
22
  plt.switch_backend('agg')
23
 
 
25
  df = pd.read_csv("milk_absorbance.csv")
26
  df.rename(columns={df.columns[0]: 'Label'}, inplace=True)
27
 
28
+ # ---------- Plotting Function (Unchanged) ----------
29
+ def plot_all():
30
+ plots = []
31
+
32
+ # Plot 1: Mean Spectra per Class
33
+ fig1 = plt.figure(figsize=(12, 6))
34
+ for label in df['Label'].unique():
35
+ class_df = df[df['Label'] == label]
36
+ mean_spectrum = class_df.iloc[:, 1:].mean()
37
+ plt.plot(mean_spectrum.index.astype(int), mean_spectrum, label=f'Label {label}')
38
+ plt.title('Mean NIR Spectrum per Milk Ratio Class')
39
+ plt.xlabel('Wavelength (nm)')
40
+ plt.ylabel('Absorbance')
41
+ plt.legend(title='Class (Milk Ratio)')
42
+ plt.grid(True)
43
+ plt.tight_layout()
44
+ plots.append(fig1)
45
+ plt.close(fig1)
46
+
47
+ # Plot 2: Offset Mean Spectra
48
+ fig2 = plt.figure(figsize=(12, 6))
49
+ offset_step = 0.1
50
+ for i, label in enumerate(df['Label'].unique()):
51
+ class_df = df[df['Label'] == label]
52
+ mean_spectrum = class_df.iloc[:, 1:].mean()
53
+ offset = i * offset_step
54
+ plt.plot(mean_spectrum.index.astype(int), mean_spectrum + offset, label=f'Label {label}')
55
+ plt.title('Mean NIR Spectrum per Milk Ratio Class (with Offset)')
56
+ plt.xlabel('Wavelength (nm)')
57
+ plt.ylabel('Absorbance (Offset Applied)')
58
+ plt.legend(title='Class (Milk Ratio)')
59
+ plt.grid(True)
60
+ plt.tight_layout()
61
+ plots.append(fig2)
62
+ plt.close(fig2)
63
+
64
+ # Plot 3: Radar Plot
65
+ fig3 = plt.figure(figsize=(8, 8))
66
+ ax = plt.subplot(111, polar=True)
67
+ subset_cols = df.columns[1:][::20]
68
+ labels = df['Label'].unique()
69
+ N = len(subset_cols)
70
+ angles = [n / float(N) * 2 * pi for n in range(N)] + [0]
71
+ for label in labels:
72
+ class_df = df[df['Label'] == label]
73
+ mean_spectrum = class_df[subset_cols].mean().values
74
+ values = mean_spectrum.tolist() + [mean_spectrum[0]]
75
+ ax.plot(angles, values, label=f'Label {label}')
76
+ ax.fill(angles, values, alpha=0.1)
77
+ ax.set_xticks(angles[:-1])
78
+ ax.set_xticklabels(subset_cols.astype(int))
79
+ plt.title('Radar Plot of Mean Spectra (Subset Wavelengths)')
80
+ plt.legend(loc='upper right', bbox_to_anchor=(1.3, 1.1))
81
+ plt.tight_layout()
82
+ plots.append(fig3)
83
+ plt.close(fig3)
84
+
85
+ # Plot 4: Cumulative PCA Explained Variance
86
+ fig4 = plt.figure(figsize=(8, 5))
87
+ X = df.iloc[:, 1:].values
88
+ X_scaled = StandardScaler().fit_transform(X)
89
+ pca = PCA(n_components=20)
90
+ pca.fit(X_scaled)
91
+ explained = np.cumsum(pca.explained_variance_ratio_)
92
+ plt.plot(range(1, 21), explained, marker='o')
93
+ plt.axhline(y=0.95, color='r', linestyle='--', label='95% Variance')
94
+ plt.title('Cumulative Explained Variance by PCA')
95
+ plt.xlabel('Number of Principal Components')
96
+ plt.ylabel('Cumulative Variance')
97
+ plt.legend()
98
+ plt.grid(True)
99
+ plt.tight_layout()
100
+ plots.append(fig4)
101
+ plt.close(fig4)
102
+
103
+ # Plot 5: Derivative + Normalized Spectra
104
+ fig5 = plt.figure(figsize=(16, 8))
105
+ y_vals = df['Label'].values
106
+ wavelengths = df.columns[1:].astype(float)
107
+ X = df.iloc[:, 1:].values
108
+ X_deriv = savgol_filter(X, window_length=25, polyorder=5, deriv=1, axis=1)
109
+ scaler = MinMaxScaler()
110
+ X_deriv_norm = np.array([scaler.fit_transform(row.reshape(-1, 1)).flatten() for row in X_deriv])
111
+ unique_labels = np.unique(y_vals)
112
+ colors = get_cmap('tab10')(np.linspace(0, 1, len(unique_labels)))
113
+ for label, color in zip(unique_labels, colors):
114
+ indices = np.where(y_vals == label)[0]
115
+ for i in indices:
116
+ plt.plot(wavelengths, X_deriv_norm[i], color=color, alpha=0.3, label=f'Milk {label}' if i == indices[0] else '')
117
+ plt.title("All Spectra After First Derivative + Normalization")
118
+ plt.xlabel("Wavelength (nm)")
119
+ plt.ylabel("Normalized First Derivative")
120
+ plt.legend(title="Group")
121
+ plt.grid(True)
122
+ plt.tight_layout()
123
+ plots.append(fig5)
124
+ plt.close(fig5)
125
+
126
+ # Plot 6: Derivative Only (No Norm)
127
+ fig6 = plt.figure(figsize=(16, 8))
128
+ for label, color in zip(unique_labels, colors):
129
+ indices = np.where(y_vals == label)[0]
130
+ for i in indices:
131
+ plt.plot(wavelengths, X_deriv[i], color=color, alpha=0.3, label=f'Milk {label}' if i == indices[0] else '')
132
+ plt.title("All Spectra After First Derivative (No Normalization)")
133
+ plt.xlabel("Wavelength (nm)")
134
+ plt.ylabel("First Derivative Absorbance")
135
+ plt.legend(title="Group")
136
+ plt.grid(True)
137
+ plt.tight_layout()
138
+ plots.append(fig6)
139
+ plt.close(fig6)
140
+
141
+ # Plot 7: Score + Loadings
142
+ fig7, axs = plt.subplots(1, 2, figsize=(14, 5))
143
+ wavelength_columns = df.columns[1:]
144
+ labels = df.iloc[:, 0]
145
+ data = df.iloc[:, 1:].values.astype(float)
146
+ derivative_data = np.diff(data, axis=1)
147
  scaler = StandardScaler()
148
+ normalized_derivative_data = scaler.fit_transform(derivative_data)
149
+ derivative_wavelength_columns = [f'Der_{w1}-{w2}' for w1, w2 in zip(wavelength_columns[:-1], wavelength_columns[1:])]
150
+ processed_df = pd.DataFrame(normalized_derivative_data, columns=derivative_wavelength_columns)
151
+ processed_df.insert(0, 'Label', labels)
152
+ processed_df['Label'] = processed_df['Label'].astype(int)
153
+ X_processed = processed_df.drop('Label', axis=1)
154
+ y_processed = processed_df['Label']
155
  pca = PCA(n_components=2)
156
+ principal_components = pca.fit_transform(X_processed)
157
+ pca_df = pd.DataFrame(data=principal_components, columns=['PC1', 'PC2'])
158
+ pca_df['Label'] = y_processed.reset_index(drop=True)
159
+ targets = y_processed.unique()
160
+ cmap = get_cmap('tab10')
161
+ for i, target in enumerate(targets):
162
+ idx = pca_df['Label'] == target
163
+ axs[0].scatter(pca_df.loc[idx, 'PC1'], pca_df.loc[idx, 'PC2'], color=cmap(i % cmap.N), label=f'Label {target}')
164
+ axs[0].set_title('Score Plot: PC1 vs. PC2')
165
+ axs[0].legend()
166
+ axs[0].grid()
167
+ loadings = pca.components_.T
168
+ axs[1].plot(loadings[:, 0], label='PC1 Loadings')
169
+ axs[1].plot(loadings[:, 1], label='PC2 Loadings', color='black')
170
+ axs[1].set_title('Loadings Plot')
171
+ axs[1].legend()
172
+ axs[1].grid()
173
+ plt.tight_layout()
174
+ plots.append(fig7)
175
+ plt.close(fig7)
176
+
177
+ # Plot 8: 3x2 PCA Summary
178
+ fig8, axs = plt.subplots(3, 2, figsize=(16, 14))
179
+ raw_data = df.iloc[:, 1:].values.astype(float)
180
+ derivative_data = np.diff(raw_data, axis=1)
181
+ scaler = StandardScaler()
182
+ raw_scaled = scaler.fit_transform(raw_data)
183
+ derivative_scaled = scaler.fit_transform(derivative_data)
184
+ pca_raw = PCA(n_components=10)
185
+ pca_raw_scores = pca_raw.fit_transform(raw_scaled)
186
+ explained_var_raw = np.cumsum(pca_raw.explained_variance_ratio_) * 100
187
+ pca_der = PCA(n_components=10)
188
+ pca_der_scores = pca_der.fit_transform(derivative_scaled)
189
+ explained_var_der = np.cumsum(pca_der.explained_variance_ratio_) * 100
190
+ targets = np.unique(labels)
191
+ cmap = get_cmap('tab10')
192
+ for i, target in enumerate(targets):
193
+ idx = labels == target
194
+ axs[0, 0].scatter(pca_raw_scores[idx, 0], pca_raw_scores[idx, 1], s=40, label=f'Milk {target}', color=cmap(i % cmap.N))
195
+ axs[0, 0].axhline(0, color='gray', linestyle='--', linewidth=2) # Horizontal
196
+ axs[0, 0].axvline(0, color='gray', linestyle='--', linewidth=2) # Vertical
197
+ axs[0, 1].scatter(pca_der_scores[idx, 0], pca_der_scores[idx, 1], s=40, label=f'Milk {target}', color=cmap(i % cmap.N))
198
+ axs[0, 1].axhline(0, color='gray', linestyle='--', linewidth=2) # Horizontal
199
+ axs[0, 1].axvline(0, color='gray', linestyle='--', linewidth=2) # Vertical
200
+ axs[0, 0].set_title('Raw Data: PCA Score Plot')
201
+ axs[0, 1].set_title('1st Derivative: PCA Score Plot')
202
+
203
+
204
+ # Row 2: PCA Loadings for Raw and Derivative (with horizontal and vertical lines at 0)
205
+ axs[1, 0].plot(pca_raw.components_[0], label='PC1')
206
+ axs[1, 0].plot(pca_raw.components_[1], label='PC2')
207
+ axs[1, 0].axhline(0, color='gray', linestyle='--', linewidth=2) # Horizontal
208
+ axs[1, 0].axvline(0, color='gray', linestyle='--', linewidth=2) # Vertical
209
+
210
+ axs[1, 1].plot(pca_der.components_[0], label='PC1')
211
+ axs[1, 1].plot(pca_der.components_[1], label='PC2')
212
+ axs[1, 1].axhline(0, color='gray', linestyle='--', linewidth=2) # Horizontal
213
+ axs[1, 1].axvline(0, color='gray', linestyle='--', linewidth=2) # Vertical
214
+
215
+ axs[2, 0].plot(range(1, 11), explained_var_raw, marker='o')
216
+ axs[2, 1].plot(range(1, 11), explained_var_der, marker='o')
217
+ axs[0, 0].legend(); axs[0, 1].legend()
218
+ axs[1, 0].legend(); axs[1, 1].legend()
219
+ axs[2, 0].set_ylim(0, 105)
220
+ axs[2, 1].set_ylim(0, 105)
221
+ axs[2, 0].set_title('Raw Data: Scree Plot')
222
+ axs[2, 1].set_title('1st Derivative: Scree Plot')
223
+ plt.tight_layout()
224
+ plots.append(fig8)
225
+ plt.close(fig8)
226
+
227
+ return plots
228
+
229
+
230
+ # ---------- Prepare Data for Modeling ----------
231
+ X = df.iloc[:, 1:].values
232
+ y = df['Label'].values
233
+ scaler = StandardScaler()
234
+ X_scaled = scaler.fit_transform(X)
235
+ pca = PCA(n_components=2)
236
+ X_pca = pca.fit_transform(X_scaled)
237
+ X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.2, random_state=42)
238
+
239
+ # ---------- Train Random Forest ----------
240
+ rf = RandomForestClassifier(n_estimators=100, random_state=42)
241
+ rf.fit(X_train, y_train)
242
+
243
+ # ---------- Train Decision Tree ----------
244
+ dt = DecisionTreeClassifier(random_state=42)
245
+ dt.fit(X_train, y_train)
246
+
247
+ # ---------- CNN on Raw Data ----------
248
+ class MilkDataset(Dataset):
249
+ def __init__(self, X, y):
250
+ self.X = torch.tensor(X, dtype=torch.float32).unsqueeze(1)
251
+ self.y = torch.tensor(y, dtype=torch.long)
252
+ def __len__(self): return len(self.X)
253
+ def __getitem__(self, idx): return self.X[idx], self.y[idx]
254
+
255
+ X_raw_scaled = scaler.fit_transform(X)
256
+ X_train_raw, X_test_raw, y_train_raw, y_test_raw = train_test_split(X_raw_scaled, y, test_size=0.2, random_state=42)
257
+ train_dataset = MilkDataset(X_train_raw, y_train_raw)
258
+ test_dataset = MilkDataset(X_test_raw, y_test_raw)
259
+ train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
260
+ test_loader = DataLoader(test_dataset, batch_size=16)
261
+
262
+ class CNN1D(nn.Module):
263
+ def __init__(self):
264
+ super().__init__()
265
+ self.net = nn.Sequential(
266
+ nn.Conv1d(1, 32, 3, padding=1), nn.ReLU(),
267
+ nn.Conv1d(32, 64, 3, padding=1), nn.ReLU(),
268
+ nn.AdaptiveAvgPool1d(1),
269
+ nn.Flatten(),
270
+ nn.Linear(64, len(np.unique(y)))
271
+ )
272
+ def forward(self, x): return self.net(x)
273
+
274
+ model = CNN1D()
275
+ criterion = nn.CrossEntropyLoss()
276
+ optimizer = optim.Adam(model.parameters(), lr=0.001)
277
+
278
+ train_acc_list, test_acc_list = [], []
279
+ for epoch in range(1, 11):
280
+ model.train()
281
+ for Xb, yb in train_loader:
282
+ optimizer.zero_grad()
283
+ loss = criterion(model(Xb), yb)
284
+ loss.backward()
285
+ optimizer.step()
286
+ model.eval()
287
+ with torch.no_grad():
288
+ train_preds = torch.argmax(model(torch.cat([X for X, _ in train_loader], 0)), dim=1)
289
+ test_preds = torch.argmax(model(torch.cat([X for X, _ in test_loader], 0)), dim=1)
290
+ y_train_all = torch.cat([y for _, y in train_loader])
291
+ y_test_all = torch.cat([y for _, y in test_loader])
292
+ train_acc = (train_preds == y_train_all).float().mean().item()
293
+ test_acc = (test_preds == y_test_all).float().mean().item()
294
+ train_acc_list.append(train_acc)
295
+ test_acc_list.append(test_acc)
296
+
297
+ # ---------- Gradio Interface ----------
298
  with gr.Blocks() as demo:
299
+ gr.Markdown("# 🧪 Dataset Description")
300
  with gr.Tabs():
301
+ with gr.Tab("Preview Raw Data"):
302
  gr.DataFrame(df.head(50), label="Preview of Raw Data")
303
 
304
  with gr.Tab("Visualizations"):
 
309
  with gr.Tab("Models"):
310
  with gr.Tabs():
311
  with gr.Tab("Random Forest"):
312
+ gr.Markdown(f"""Train Accuracy: {accuracy_score(y_train, rf.predict(X_train)):.2f} \
313
+ Test Accuracy: {accuracy_score(y_test, rf.predict(X_test)):.2f}""")
314
+ fig_rf = plt.figure()
315
+ sns.heatmap(confusion_matrix(y_test, rf.predict(X_test)), annot=True, fmt='d')
316
+ plt.title("Random Forest Confusion Matrix")
317
+ gr.Plot(fig_rf)
 
 
 
318
 
319
  with gr.Tab("Decision Tree"):
320
+ gr.Markdown(f"""Train Accuracy: {accuracy_score(y_train, dt.predict(X_train)):.2f} \
321
+ Test Accuracy: {accuracy_score(y_test, dt.predict(X_test)):.2f}""")
322
+ fig_dt = plt.figure()
323
+ sns.heatmap(confusion_matrix(y_test, dt.predict(X_test)), annot=True, fmt='d')
324
+ plt.title("Decision Tree Confusion Matrix")
325
+ gr.Plot(fig_dt)
 
 
 
326
 
327
  with gr.Tab("1D CNN (Raw Data)"):
328
+ gr.Markdown(f"""Train Accuracy: {train_acc:.2f} \
329
+ Test Accuracy: {test_acc:.2f}""")
330
+ fig_cnn = plt.figure()
331
+ sns.heatmap(confusion_matrix(y_test_all, test_preds), annot=True, fmt='d')
332
+ plt.title("1D CNN Confusion Matrix")
333
+ gr.Plot(fig_cnn)
 
 
 
334
 
335
  with gr.Tab("Prediction"):
336
+ model_dropdown = gr.Dropdown(choices=['Random Forest', 'Decision Tree', '1D CNN'], label="Choose Model")
337
+ input_file = gr.File(label="Upload CSV File (Same Format as Original Data)")
338
+ output_df = gr.DataFrame(label="Predicted Labels")
 
 
339
 
340
+ def predict(file, model_name):
341
+ test_df = pd.read_csv(file.name)
342
+ if 'Label' in test_df.columns:
343
+ test_df = test_df.drop(columns=['Label'])
344
+ X_input = test_df.values
345
+ if model_name == '1D CNN':
346
+ X_scaled = scaler.transform(X_input)
347
+ X_tensor = torch.tensor(X_scaled, dtype=torch.float32).unsqueeze(1)
348
+ with torch.no_grad():
349
+ preds = torch.argmax(model(X_tensor), dim=1).numpy()
350
  else:
351
+ X_pca_input = pca.transform(scaler.transform(X_input))
352
+ preds = rf.predict(X_pca_input) if model_name == 'Random Forest' else dt.predict(X_pca_input)
353
+ test_df['Predicted Label'] = preds
354
+ return test_df
355
 
356
+ predict_btn = gr.Button("Predict")
357
+ predict_btn.click(fn=predict, inputs=[input_file, model_dropdown], outputs=[output_df])
358
 
359
+ # Run app
360
  demo.launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False)