Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,25 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import pandas as pd
|
| 3 |
import numpy as np
|
| 4 |
import matplotlib.pyplot as plt
|
| 5 |
-
from sklearn.preprocessing import StandardScaler, MinMaxScaler
|
| 6 |
-
from sklearn.decomposition import PCA
|
| 7 |
-
from sklearn.model_selection import train_test_split
|
| 8 |
from sklearn.ensemble import RandomForestClassifier
|
| 9 |
from sklearn.tree import DecisionTreeClassifier
|
|
|
|
|
|
|
| 10 |
from sklearn.metrics import accuracy_score, confusion_matrix
|
| 11 |
from scipy.signal import savgol_filter
|
| 12 |
-
from tensorflow.keras.models import Sequential
|
| 13 |
-
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout
|
| 14 |
-
from tensorflow.keras.utils import to_categorical
|
| 15 |
-
from tensorflow.keras.callbacks import History
|
| 16 |
-
import seaborn as sns
|
| 17 |
-
import io
|
| 18 |
-
import os
|
| 19 |
from math import pi
|
| 20 |
from matplotlib.cm import get_cmap
|
| 21 |
-
import
|
| 22 |
-
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
plt.switch_backend('agg')
|
| 25 |
|
|
@@ -27,102 +25,280 @@ plt.switch_backend('agg')
|
|
| 27 |
df = pd.read_csv("milk_absorbance.csv")
|
| 28 |
df.rename(columns={df.columns[0]: 'Label'}, inplace=True)
|
| 29 |
|
| 30 |
-
#
|
| 31 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
scaler = StandardScaler()
|
| 33 |
-
|
| 34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
pca = PCA(n_components=2)
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
with gr.Blocks() as demo:
|
| 123 |
-
gr.Markdown("# 🧪
|
| 124 |
with gr.Tabs():
|
| 125 |
-
with gr.Tab("
|
| 126 |
gr.DataFrame(df.head(50), label="Preview of Raw Data")
|
| 127 |
|
| 128 |
with gr.Tab("Visualizations"):
|
|
@@ -133,53 +309,52 @@ with gr.Blocks() as demo:
|
|
| 133 |
with gr.Tab("Models"):
|
| 134 |
with gr.Tabs():
|
| 135 |
with gr.Tab("Random Forest"):
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
table = pd.DataFrame({"Epoch": list(range(1, 11)), "Train Acc": train_acc, "Test Acc": test_acc})
|
| 143 |
-
return table, create_plot(train_acc, test_acc), plot_confusion_matrix(cm)
|
| 144 |
-
rf_btn.click(fn=run_rf, inputs=[], outputs=[rf_table, rf_plot, rf_cm])
|
| 145 |
|
| 146 |
with gr.Tab("Decision Tree"):
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
table = pd.DataFrame({"Epoch": list(range(1, 11)), "Train Acc": train_acc, "Test Acc": test_acc})
|
| 154 |
-
return table, create_plot(train_acc, test_acc), plot_confusion_matrix(cm)
|
| 155 |
-
dt_btn.click(fn=run_dt, inputs=[], outputs=[dt_table, dt_plot, dt_cm])
|
| 156 |
|
| 157 |
with gr.Tab("1D CNN (Raw Data)"):
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
table = pd.DataFrame({"Epoch": list(range(1, 11)), "Train Acc": train_acc, "Test Acc": test_acc})
|
| 165 |
-
return table, create_plot(train_acc, test_acc), plot_confusion_matrix(cm)
|
| 166 |
-
cnn_btn.click(fn=run_cnn, inputs=[], outputs=[cnn_table, cnn_plot, cnn_cm])
|
| 167 |
|
| 168 |
with gr.Tab("Prediction"):
|
| 169 |
-
model_dropdown = gr.Dropdown(choices=[
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
predict_btn = gr.Button("Predict")
|
| 173 |
-
output_df = gr.DataFrame()
|
| 174 |
|
| 175 |
-
def
|
| 176 |
-
|
| 177 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
else:
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
|
|
|
| 182 |
|
| 183 |
-
predict_btn
|
|
|
|
| 184 |
|
|
|
|
| 185 |
demo.launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False)
|
|
|
|
| 1 |
+
# ✅ FULL INTEGRATED SCRIPT
|
| 2 |
+
# Includes your existing visualizations + new Models and Prediction tabs
|
| 3 |
+
|
| 4 |
import gradio as gr
|
| 5 |
import pandas as pd
|
| 6 |
import numpy as np
|
| 7 |
import matplotlib.pyplot as plt
|
|
|
|
|
|
|
|
|
|
| 8 |
from sklearn.ensemble import RandomForestClassifier
|
| 9 |
from sklearn.tree import DecisionTreeClassifier
|
| 10 |
+
from sklearn.preprocessing import StandardScaler, MinMaxScaler
|
| 11 |
+
from sklearn.decomposition import PCA
|
| 12 |
from sklearn.metrics import accuracy_score, confusion_matrix
|
| 13 |
from scipy.signal import savgol_filter
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
from math import pi
|
| 15 |
from matplotlib.cm import get_cmap
|
| 16 |
+
import seaborn as sns
|
| 17 |
+
import torch
|
| 18 |
+
import torch.nn as nn
|
| 19 |
+
import torch.optim as optim
|
| 20 |
+
from torch.utils.data import DataLoader, Dataset
|
| 21 |
|
| 22 |
plt.switch_backend('agg')
|
| 23 |
|
|
|
|
| 25 |
df = pd.read_csv("milk_absorbance.csv")
|
| 26 |
df.rename(columns={df.columns[0]: 'Label'}, inplace=True)
|
| 27 |
|
| 28 |
+
# ---------- Plotting Function (Unchanged) ----------
|
| 29 |
+
def plot_all():
|
| 30 |
+
plots = []
|
| 31 |
+
|
| 32 |
+
# Plot 1: Mean Spectra per Class
|
| 33 |
+
fig1 = plt.figure(figsize=(12, 6))
|
| 34 |
+
for label in df['Label'].unique():
|
| 35 |
+
class_df = df[df['Label'] == label]
|
| 36 |
+
mean_spectrum = class_df.iloc[:, 1:].mean()
|
| 37 |
+
plt.plot(mean_spectrum.index.astype(int), mean_spectrum, label=f'Label {label}')
|
| 38 |
+
plt.title('Mean NIR Spectrum per Milk Ratio Class')
|
| 39 |
+
plt.xlabel('Wavelength (nm)')
|
| 40 |
+
plt.ylabel('Absorbance')
|
| 41 |
+
plt.legend(title='Class (Milk Ratio)')
|
| 42 |
+
plt.grid(True)
|
| 43 |
+
plt.tight_layout()
|
| 44 |
+
plots.append(fig1)
|
| 45 |
+
plt.close(fig1)
|
| 46 |
+
|
| 47 |
+
# Plot 2: Offset Mean Spectra
|
| 48 |
+
fig2 = plt.figure(figsize=(12, 6))
|
| 49 |
+
offset_step = 0.1
|
| 50 |
+
for i, label in enumerate(df['Label'].unique()):
|
| 51 |
+
class_df = df[df['Label'] == label]
|
| 52 |
+
mean_spectrum = class_df.iloc[:, 1:].mean()
|
| 53 |
+
offset = i * offset_step
|
| 54 |
+
plt.plot(mean_spectrum.index.astype(int), mean_spectrum + offset, label=f'Label {label}')
|
| 55 |
+
plt.title('Mean NIR Spectrum per Milk Ratio Class (with Offset)')
|
| 56 |
+
plt.xlabel('Wavelength (nm)')
|
| 57 |
+
plt.ylabel('Absorbance (Offset Applied)')
|
| 58 |
+
plt.legend(title='Class (Milk Ratio)')
|
| 59 |
+
plt.grid(True)
|
| 60 |
+
plt.tight_layout()
|
| 61 |
+
plots.append(fig2)
|
| 62 |
+
plt.close(fig2)
|
| 63 |
+
|
| 64 |
+
# Plot 3: Radar Plot
|
| 65 |
+
fig3 = plt.figure(figsize=(8, 8))
|
| 66 |
+
ax = plt.subplot(111, polar=True)
|
| 67 |
+
subset_cols = df.columns[1:][::20]
|
| 68 |
+
labels = df['Label'].unique()
|
| 69 |
+
N = len(subset_cols)
|
| 70 |
+
angles = [n / float(N) * 2 * pi for n in range(N)] + [0]
|
| 71 |
+
for label in labels:
|
| 72 |
+
class_df = df[df['Label'] == label]
|
| 73 |
+
mean_spectrum = class_df[subset_cols].mean().values
|
| 74 |
+
values = mean_spectrum.tolist() + [mean_spectrum[0]]
|
| 75 |
+
ax.plot(angles, values, label=f'Label {label}')
|
| 76 |
+
ax.fill(angles, values, alpha=0.1)
|
| 77 |
+
ax.set_xticks(angles[:-1])
|
| 78 |
+
ax.set_xticklabels(subset_cols.astype(int))
|
| 79 |
+
plt.title('Radar Plot of Mean Spectra (Subset Wavelengths)')
|
| 80 |
+
plt.legend(loc='upper right', bbox_to_anchor=(1.3, 1.1))
|
| 81 |
+
plt.tight_layout()
|
| 82 |
+
plots.append(fig3)
|
| 83 |
+
plt.close(fig3)
|
| 84 |
+
|
| 85 |
+
# Plot 4: Cumulative PCA Explained Variance
|
| 86 |
+
fig4 = plt.figure(figsize=(8, 5))
|
| 87 |
+
X = df.iloc[:, 1:].values
|
| 88 |
+
X_scaled = StandardScaler().fit_transform(X)
|
| 89 |
+
pca = PCA(n_components=20)
|
| 90 |
+
pca.fit(X_scaled)
|
| 91 |
+
explained = np.cumsum(pca.explained_variance_ratio_)
|
| 92 |
+
plt.plot(range(1, 21), explained, marker='o')
|
| 93 |
+
plt.axhline(y=0.95, color='r', linestyle='--', label='95% Variance')
|
| 94 |
+
plt.title('Cumulative Explained Variance by PCA')
|
| 95 |
+
plt.xlabel('Number of Principal Components')
|
| 96 |
+
plt.ylabel('Cumulative Variance')
|
| 97 |
+
plt.legend()
|
| 98 |
+
plt.grid(True)
|
| 99 |
+
plt.tight_layout()
|
| 100 |
+
plots.append(fig4)
|
| 101 |
+
plt.close(fig4)
|
| 102 |
+
|
| 103 |
+
# Plot 5: Derivative + Normalized Spectra
|
| 104 |
+
fig5 = plt.figure(figsize=(16, 8))
|
| 105 |
+
y_vals = df['Label'].values
|
| 106 |
+
wavelengths = df.columns[1:].astype(float)
|
| 107 |
+
X = df.iloc[:, 1:].values
|
| 108 |
+
X_deriv = savgol_filter(X, window_length=25, polyorder=5, deriv=1, axis=1)
|
| 109 |
+
scaler = MinMaxScaler()
|
| 110 |
+
X_deriv_norm = np.array([scaler.fit_transform(row.reshape(-1, 1)).flatten() for row in X_deriv])
|
| 111 |
+
unique_labels = np.unique(y_vals)
|
| 112 |
+
colors = get_cmap('tab10')(np.linspace(0, 1, len(unique_labels)))
|
| 113 |
+
for label, color in zip(unique_labels, colors):
|
| 114 |
+
indices = np.where(y_vals == label)[0]
|
| 115 |
+
for i in indices:
|
| 116 |
+
plt.plot(wavelengths, X_deriv_norm[i], color=color, alpha=0.3, label=f'Milk {label}' if i == indices[0] else '')
|
| 117 |
+
plt.title("All Spectra After First Derivative + Normalization")
|
| 118 |
+
plt.xlabel("Wavelength (nm)")
|
| 119 |
+
plt.ylabel("Normalized First Derivative")
|
| 120 |
+
plt.legend(title="Group")
|
| 121 |
+
plt.grid(True)
|
| 122 |
+
plt.tight_layout()
|
| 123 |
+
plots.append(fig5)
|
| 124 |
+
plt.close(fig5)
|
| 125 |
+
|
| 126 |
+
# Plot 6: Derivative Only (No Norm)
|
| 127 |
+
fig6 = plt.figure(figsize=(16, 8))
|
| 128 |
+
for label, color in zip(unique_labels, colors):
|
| 129 |
+
indices = np.where(y_vals == label)[0]
|
| 130 |
+
for i in indices:
|
| 131 |
+
plt.plot(wavelengths, X_deriv[i], color=color, alpha=0.3, label=f'Milk {label}' if i == indices[0] else '')
|
| 132 |
+
plt.title("All Spectra After First Derivative (No Normalization)")
|
| 133 |
+
plt.xlabel("Wavelength (nm)")
|
| 134 |
+
plt.ylabel("First Derivative Absorbance")
|
| 135 |
+
plt.legend(title="Group")
|
| 136 |
+
plt.grid(True)
|
| 137 |
+
plt.tight_layout()
|
| 138 |
+
plots.append(fig6)
|
| 139 |
+
plt.close(fig6)
|
| 140 |
+
|
| 141 |
+
# Plot 7: Score + Loadings
|
| 142 |
+
fig7, axs = plt.subplots(1, 2, figsize=(14, 5))
|
| 143 |
+
wavelength_columns = df.columns[1:]
|
| 144 |
+
labels = df.iloc[:, 0]
|
| 145 |
+
data = df.iloc[:, 1:].values.astype(float)
|
| 146 |
+
derivative_data = np.diff(data, axis=1)
|
| 147 |
scaler = StandardScaler()
|
| 148 |
+
normalized_derivative_data = scaler.fit_transform(derivative_data)
|
| 149 |
+
derivative_wavelength_columns = [f'Der_{w1}-{w2}' for w1, w2 in zip(wavelength_columns[:-1], wavelength_columns[1:])]
|
| 150 |
+
processed_df = pd.DataFrame(normalized_derivative_data, columns=derivative_wavelength_columns)
|
| 151 |
+
processed_df.insert(0, 'Label', labels)
|
| 152 |
+
processed_df['Label'] = processed_df['Label'].astype(int)
|
| 153 |
+
X_processed = processed_df.drop('Label', axis=1)
|
| 154 |
+
y_processed = processed_df['Label']
|
| 155 |
pca = PCA(n_components=2)
|
| 156 |
+
principal_components = pca.fit_transform(X_processed)
|
| 157 |
+
pca_df = pd.DataFrame(data=principal_components, columns=['PC1', 'PC2'])
|
| 158 |
+
pca_df['Label'] = y_processed.reset_index(drop=True)
|
| 159 |
+
targets = y_processed.unique()
|
| 160 |
+
cmap = get_cmap('tab10')
|
| 161 |
+
for i, target in enumerate(targets):
|
| 162 |
+
idx = pca_df['Label'] == target
|
| 163 |
+
axs[0].scatter(pca_df.loc[idx, 'PC1'], pca_df.loc[idx, 'PC2'], color=cmap(i % cmap.N), label=f'Label {target}')
|
| 164 |
+
axs[0].set_title('Score Plot: PC1 vs. PC2')
|
| 165 |
+
axs[0].legend()
|
| 166 |
+
axs[0].grid()
|
| 167 |
+
loadings = pca.components_.T
|
| 168 |
+
axs[1].plot(loadings[:, 0], label='PC1 Loadings')
|
| 169 |
+
axs[1].plot(loadings[:, 1], label='PC2 Loadings', color='black')
|
| 170 |
+
axs[1].set_title('Loadings Plot')
|
| 171 |
+
axs[1].legend()
|
| 172 |
+
axs[1].grid()
|
| 173 |
+
plt.tight_layout()
|
| 174 |
+
plots.append(fig7)
|
| 175 |
+
plt.close(fig7)
|
| 176 |
+
|
| 177 |
+
# Plot 8: 3x2 PCA Summary
|
| 178 |
+
fig8, axs = plt.subplots(3, 2, figsize=(16, 14))
|
| 179 |
+
raw_data = df.iloc[:, 1:].values.astype(float)
|
| 180 |
+
derivative_data = np.diff(raw_data, axis=1)
|
| 181 |
+
scaler = StandardScaler()
|
| 182 |
+
raw_scaled = scaler.fit_transform(raw_data)
|
| 183 |
+
derivative_scaled = scaler.fit_transform(derivative_data)
|
| 184 |
+
pca_raw = PCA(n_components=10)
|
| 185 |
+
pca_raw_scores = pca_raw.fit_transform(raw_scaled)
|
| 186 |
+
explained_var_raw = np.cumsum(pca_raw.explained_variance_ratio_) * 100
|
| 187 |
+
pca_der = PCA(n_components=10)
|
| 188 |
+
pca_der_scores = pca_der.fit_transform(derivative_scaled)
|
| 189 |
+
explained_var_der = np.cumsum(pca_der.explained_variance_ratio_) * 100
|
| 190 |
+
targets = np.unique(labels)
|
| 191 |
+
cmap = get_cmap('tab10')
|
| 192 |
+
for i, target in enumerate(targets):
|
| 193 |
+
idx = labels == target
|
| 194 |
+
axs[0, 0].scatter(pca_raw_scores[idx, 0], pca_raw_scores[idx, 1], s=40, label=f'Milk {target}', color=cmap(i % cmap.N))
|
| 195 |
+
axs[0, 0].axhline(0, color='gray', linestyle='--', linewidth=2) # Horizontal
|
| 196 |
+
axs[0, 0].axvline(0, color='gray', linestyle='--', linewidth=2) # Vertical
|
| 197 |
+
axs[0, 1].scatter(pca_der_scores[idx, 0], pca_der_scores[idx, 1], s=40, label=f'Milk {target}', color=cmap(i % cmap.N))
|
| 198 |
+
axs[0, 1].axhline(0, color='gray', linestyle='--', linewidth=2) # Horizontal
|
| 199 |
+
axs[0, 1].axvline(0, color='gray', linestyle='--', linewidth=2) # Vertical
|
| 200 |
+
axs[0, 0].set_title('Raw Data: PCA Score Plot')
|
| 201 |
+
axs[0, 1].set_title('1st Derivative: PCA Score Plot')
|
| 202 |
+
|
| 203 |
+
|
| 204 |
+
# Row 2: PCA Loadings for Raw and Derivative (with horizontal and vertical lines at 0)
|
| 205 |
+
axs[1, 0].plot(pca_raw.components_[0], label='PC1')
|
| 206 |
+
axs[1, 0].plot(pca_raw.components_[1], label='PC2')
|
| 207 |
+
axs[1, 0].axhline(0, color='gray', linestyle='--', linewidth=2) # Horizontal
|
| 208 |
+
axs[1, 0].axvline(0, color='gray', linestyle='--', linewidth=2) # Vertical
|
| 209 |
+
|
| 210 |
+
axs[1, 1].plot(pca_der.components_[0], label='PC1')
|
| 211 |
+
axs[1, 1].plot(pca_der.components_[1], label='PC2')
|
| 212 |
+
axs[1, 1].axhline(0, color='gray', linestyle='--', linewidth=2) # Horizontal
|
| 213 |
+
axs[1, 1].axvline(0, color='gray', linestyle='--', linewidth=2) # Vertical
|
| 214 |
+
|
| 215 |
+
axs[2, 0].plot(range(1, 11), explained_var_raw, marker='o')
|
| 216 |
+
axs[2, 1].plot(range(1, 11), explained_var_der, marker='o')
|
| 217 |
+
axs[0, 0].legend(); axs[0, 1].legend()
|
| 218 |
+
axs[1, 0].legend(); axs[1, 1].legend()
|
| 219 |
+
axs[2, 0].set_ylim(0, 105)
|
| 220 |
+
axs[2, 1].set_ylim(0, 105)
|
| 221 |
+
axs[2, 0].set_title('Raw Data: Scree Plot')
|
| 222 |
+
axs[2, 1].set_title('1st Derivative: Scree Plot')
|
| 223 |
+
plt.tight_layout()
|
| 224 |
+
plots.append(fig8)
|
| 225 |
+
plt.close(fig8)
|
| 226 |
+
|
| 227 |
+
return plots
|
| 228 |
+
|
| 229 |
+
|
| 230 |
+
# ---------- Prepare Data for Modeling ----------
|
| 231 |
+
X = df.iloc[:, 1:].values
|
| 232 |
+
y = df['Label'].values
|
| 233 |
+
scaler = StandardScaler()
|
| 234 |
+
X_scaled = scaler.fit_transform(X)
|
| 235 |
+
pca = PCA(n_components=2)
|
| 236 |
+
X_pca = pca.fit_transform(X_scaled)
|
| 237 |
+
X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.2, random_state=42)
|
| 238 |
+
|
| 239 |
+
# ---------- Train Random Forest ----------
|
| 240 |
+
rf = RandomForestClassifier(n_estimators=100, random_state=42)
|
| 241 |
+
rf.fit(X_train, y_train)
|
| 242 |
+
|
| 243 |
+
# ---------- Train Decision Tree ----------
|
| 244 |
+
dt = DecisionTreeClassifier(random_state=42)
|
| 245 |
+
dt.fit(X_train, y_train)
|
| 246 |
+
|
| 247 |
+
# ---------- CNN on Raw Data ----------
|
| 248 |
+
class MilkDataset(Dataset):
|
| 249 |
+
def __init__(self, X, y):
|
| 250 |
+
self.X = torch.tensor(X, dtype=torch.float32).unsqueeze(1)
|
| 251 |
+
self.y = torch.tensor(y, dtype=torch.long)
|
| 252 |
+
def __len__(self): return len(self.X)
|
| 253 |
+
def __getitem__(self, idx): return self.X[idx], self.y[idx]
|
| 254 |
+
|
| 255 |
+
X_raw_scaled = scaler.fit_transform(X)
|
| 256 |
+
X_train_raw, X_test_raw, y_train_raw, y_test_raw = train_test_split(X_raw_scaled, y, test_size=0.2, random_state=42)
|
| 257 |
+
train_dataset = MilkDataset(X_train_raw, y_train_raw)
|
| 258 |
+
test_dataset = MilkDataset(X_test_raw, y_test_raw)
|
| 259 |
+
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
|
| 260 |
+
test_loader = DataLoader(test_dataset, batch_size=16)
|
| 261 |
+
|
| 262 |
+
class CNN1D(nn.Module):
|
| 263 |
+
def __init__(self):
|
| 264 |
+
super().__init__()
|
| 265 |
+
self.net = nn.Sequential(
|
| 266 |
+
nn.Conv1d(1, 32, 3, padding=1), nn.ReLU(),
|
| 267 |
+
nn.Conv1d(32, 64, 3, padding=1), nn.ReLU(),
|
| 268 |
+
nn.AdaptiveAvgPool1d(1),
|
| 269 |
+
nn.Flatten(),
|
| 270 |
+
nn.Linear(64, len(np.unique(y)))
|
| 271 |
+
)
|
| 272 |
+
def forward(self, x): return self.net(x)
|
| 273 |
+
|
| 274 |
+
model = CNN1D()
|
| 275 |
+
criterion = nn.CrossEntropyLoss()
|
| 276 |
+
optimizer = optim.Adam(model.parameters(), lr=0.001)
|
| 277 |
+
|
| 278 |
+
train_acc_list, test_acc_list = [], []
|
| 279 |
+
for epoch in range(1, 11):
|
| 280 |
+
model.train()
|
| 281 |
+
for Xb, yb in train_loader:
|
| 282 |
+
optimizer.zero_grad()
|
| 283 |
+
loss = criterion(model(Xb), yb)
|
| 284 |
+
loss.backward()
|
| 285 |
+
optimizer.step()
|
| 286 |
+
model.eval()
|
| 287 |
+
with torch.no_grad():
|
| 288 |
+
train_preds = torch.argmax(model(torch.cat([X for X, _ in train_loader], 0)), dim=1)
|
| 289 |
+
test_preds = torch.argmax(model(torch.cat([X for X, _ in test_loader], 0)), dim=1)
|
| 290 |
+
y_train_all = torch.cat([y for _, y in train_loader])
|
| 291 |
+
y_test_all = torch.cat([y for _, y in test_loader])
|
| 292 |
+
train_acc = (train_preds == y_train_all).float().mean().item()
|
| 293 |
+
test_acc = (test_preds == y_test_all).float().mean().item()
|
| 294 |
+
train_acc_list.append(train_acc)
|
| 295 |
+
test_acc_list.append(test_acc)
|
| 296 |
+
|
| 297 |
+
# ---------- Gradio Interface ----------
|
| 298 |
with gr.Blocks() as demo:
|
| 299 |
+
gr.Markdown("# 🧪 Dataset Description")
|
| 300 |
with gr.Tabs():
|
| 301 |
+
with gr.Tab("Preview Raw Data"):
|
| 302 |
gr.DataFrame(df.head(50), label="Preview of Raw Data")
|
| 303 |
|
| 304 |
with gr.Tab("Visualizations"):
|
|
|
|
| 309 |
with gr.Tab("Models"):
|
| 310 |
with gr.Tabs():
|
| 311 |
with gr.Tab("Random Forest"):
|
| 312 |
+
gr.Markdown(f"""Train Accuracy: {accuracy_score(y_train, rf.predict(X_train)):.2f} \
|
| 313 |
+
Test Accuracy: {accuracy_score(y_test, rf.predict(X_test)):.2f}""")
|
| 314 |
+
fig_rf = plt.figure()
|
| 315 |
+
sns.heatmap(confusion_matrix(y_test, rf.predict(X_test)), annot=True, fmt='d')
|
| 316 |
+
plt.title("Random Forest Confusion Matrix")
|
| 317 |
+
gr.Plot(fig_rf)
|
|
|
|
|
|
|
|
|
|
| 318 |
|
| 319 |
with gr.Tab("Decision Tree"):
|
| 320 |
+
gr.Markdown(f"""Train Accuracy: {accuracy_score(y_train, dt.predict(X_train)):.2f} \
|
| 321 |
+
Test Accuracy: {accuracy_score(y_test, dt.predict(X_test)):.2f}""")
|
| 322 |
+
fig_dt = plt.figure()
|
| 323 |
+
sns.heatmap(confusion_matrix(y_test, dt.predict(X_test)), annot=True, fmt='d')
|
| 324 |
+
plt.title("Decision Tree Confusion Matrix")
|
| 325 |
+
gr.Plot(fig_dt)
|
|
|
|
|
|
|
|
|
|
| 326 |
|
| 327 |
with gr.Tab("1D CNN (Raw Data)"):
|
| 328 |
+
gr.Markdown(f"""Train Accuracy: {train_acc:.2f} \
|
| 329 |
+
Test Accuracy: {test_acc:.2f}""")
|
| 330 |
+
fig_cnn = plt.figure()
|
| 331 |
+
sns.heatmap(confusion_matrix(y_test_all, test_preds), annot=True, fmt='d')
|
| 332 |
+
plt.title("1D CNN Confusion Matrix")
|
| 333 |
+
gr.Plot(fig_cnn)
|
|
|
|
|
|
|
|
|
|
| 334 |
|
| 335 |
with gr.Tab("Prediction"):
|
| 336 |
+
model_dropdown = gr.Dropdown(choices=['Random Forest', 'Decision Tree', '1D CNN'], label="Choose Model")
|
| 337 |
+
input_file = gr.File(label="Upload CSV File (Same Format as Original Data)")
|
| 338 |
+
output_df = gr.DataFrame(label="Predicted Labels")
|
|
|
|
|
|
|
| 339 |
|
| 340 |
+
def predict(file, model_name):
|
| 341 |
+
test_df = pd.read_csv(file.name)
|
| 342 |
+
if 'Label' in test_df.columns:
|
| 343 |
+
test_df = test_df.drop(columns=['Label'])
|
| 344 |
+
X_input = test_df.values
|
| 345 |
+
if model_name == '1D CNN':
|
| 346 |
+
X_scaled = scaler.transform(X_input)
|
| 347 |
+
X_tensor = torch.tensor(X_scaled, dtype=torch.float32).unsqueeze(1)
|
| 348 |
+
with torch.no_grad():
|
| 349 |
+
preds = torch.argmax(model(X_tensor), dim=1).numpy()
|
| 350 |
else:
|
| 351 |
+
X_pca_input = pca.transform(scaler.transform(X_input))
|
| 352 |
+
preds = rf.predict(X_pca_input) if model_name == 'Random Forest' else dt.predict(X_pca_input)
|
| 353 |
+
test_df['Predicted Label'] = preds
|
| 354 |
+
return test_df
|
| 355 |
|
| 356 |
+
predict_btn = gr.Button("Predict")
|
| 357 |
+
predict_btn.click(fn=predict, inputs=[input_file, model_dropdown], outputs=[output_df])
|
| 358 |
|
| 359 |
+
# Run app
|
| 360 |
demo.launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False)
|