Spectroscopy / app.py
EzekielMW's picture
Update app.py
7eff709 verified
# ✅ FULL INTEGRATED SCRIPT
# Includes your existing visualizations + new Models and Prediction tabs
import gradio as gr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import StandardScaler, MinMaxScaler,LabelEncoder
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score, confusion_matrix
from scipy.signal import savgol_filter
from math import pi
from matplotlib.cm import get_cmap
import seaborn as sns
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
plt.switch_backend('agg')
# Load dataset
df = pd.read_csv("milk_absorbance.csv")
df.rename(columns={df.columns[0]: 'Label'}, inplace=True)
# Label encoding
le = LabelEncoder()
y = le.fit_transform(df['Label'].values)
# ---------- Plotting Function (Unchanged) ----------
def plot_all():
plots = []
# Plot 1: Mean Spectra per Class
fig1 = plt.figure(figsize=(12, 6))
for label in df['Label'].unique():
class_df = df[df['Label'] == label]
mean_spectrum = class_df.iloc[:, 1:].mean()
plt.plot(mean_spectrum.index.astype(int), mean_spectrum, label=f'Label {label}')
plt.title('Mean NIR Spectrum per Milk Ratio Class')
plt.xlabel('Wavelength (nm)')
plt.ylabel('Absorbance')
plt.legend(title='Class (Milk Ratio)')
plt.grid(True)
plt.tight_layout()
plots.append(fig1)
plt.close(fig1)
# Plot 2: Offset Mean Spectra
fig2 = plt.figure(figsize=(12, 6))
offset_step = 0.1
for i, label in enumerate(df['Label'].unique()):
class_df = df[df['Label'] == label]
mean_spectrum = class_df.iloc[:, 1:].mean()
offset = i * offset_step
plt.plot(mean_spectrum.index.astype(int), mean_spectrum + offset, label=f'Label {label}')
plt.title('Mean NIR Spectrum per Milk Ratio Class (with Offset)')
plt.xlabel('Wavelength (nm)')
plt.ylabel('Absorbance (Offset Applied)')
plt.legend(title='Class (Milk Ratio)')
plt.grid(True)
plt.tight_layout()
plots.append(fig2)
plt.close(fig2)
# Plot 3: Radar Plot
fig3 = plt.figure(figsize=(8, 8))
ax = plt.subplot(111, polar=True)
subset_cols = df.columns[1:][::20]
labels = df['Label'].unique()
N = len(subset_cols)
angles = [n / float(N) * 2 * pi for n in range(N)] + [0]
for label in labels:
class_df = df[df['Label'] == label]
mean_spectrum = class_df[subset_cols].mean().values
values = mean_spectrum.tolist() + [mean_spectrum[0]]
ax.plot(angles, values, label=f'Label {label}')
ax.fill(angles, values, alpha=0.1)
ax.set_xticks(angles[:-1])
ax.set_xticklabels(subset_cols.astype(int))
plt.title('Radar Plot of Mean Spectra (Subset Wavelengths)')
plt.legend(loc='upper right', bbox_to_anchor=(1.3, 1.1))
plt.tight_layout()
plots.append(fig3)
plt.close(fig3)
# Plot 4: Cumulative PCA Explained Variance
fig4 = plt.figure(figsize=(8, 5))
X = df.iloc[:, 1:].values
X_scaled = StandardScaler().fit_transform(X)
pca = PCA(n_components=20)
pca.fit(X_scaled)
explained = np.cumsum(pca.explained_variance_ratio_)
plt.plot(range(1, 21), explained, marker='o')
plt.axhline(y=0.95, color='r', linestyle='--', label='95% Variance')
plt.title('Cumulative Explained Variance by PCA')
plt.xlabel('Number of Principal Components')
plt.ylabel('Cumulative Variance')
plt.legend()
plt.grid(True)
plt.tight_layout()
plots.append(fig4)
plt.close(fig4)
# Plot 5: Derivative + Normalized Spectra
fig5 = plt.figure(figsize=(16, 8))
y_vals = df['Label'].values
wavelengths = df.columns[1:].astype(float)
X = df.iloc[:, 1:].values
X_deriv = savgol_filter(X, window_length=25, polyorder=5, deriv=1, axis=1)
scaler = MinMaxScaler()
X_deriv_norm = np.array([scaler.fit_transform(row.reshape(-1, 1)).flatten() for row in X_deriv])
unique_labels = np.unique(y_vals)
colors = get_cmap('tab10')(np.linspace(0, 1, len(unique_labels)))
for label, color in zip(unique_labels, colors):
indices = np.where(y_vals == label)[0]
for i in indices:
plt.plot(wavelengths, X_deriv_norm[i], color=color, alpha=0.3, label=f'Milk {label}' if i == indices[0] else '')
plt.title("All Spectra After First Derivative + Normalization")
plt.xlabel("Wavelength (nm)")
plt.ylabel("Normalized First Derivative")
plt.legend(title="Group")
plt.grid(True)
plt.tight_layout()
plots.append(fig5)
plt.close(fig5)
# Plot 6: Derivative Only (No Norm)
fig6 = plt.figure(figsize=(16, 8))
for label, color in zip(unique_labels, colors):
indices = np.where(y_vals == label)[0]
for i in indices:
plt.plot(wavelengths, X_deriv[i], color=color, alpha=0.3, label=f'Milk {label}' if i == indices[0] else '')
plt.title("All Spectra After First Derivative (No Normalization)")
plt.xlabel("Wavelength (nm)")
plt.ylabel("First Derivative Absorbance")
plt.legend(title="Group")
plt.grid(True)
plt.tight_layout()
plots.append(fig6)
plt.close(fig6)
# Plot 7: Score + Loadings
fig7, axs = plt.subplots(1, 2, figsize=(14, 5))
wavelength_columns = df.columns[1:]
labels = df.iloc[:, 0]
data = df.iloc[:, 1:].values.astype(float)
derivative_data = np.diff(data, axis=1)
scaler = StandardScaler()
normalized_derivative_data = scaler.fit_transform(derivative_data)
derivative_wavelength_columns = [f'Der_{w1}-{w2}' for w1, w2 in zip(wavelength_columns[:-1], wavelength_columns[1:])]
processed_df = pd.DataFrame(normalized_derivative_data, columns=derivative_wavelength_columns)
processed_df.insert(0, 'Label', labels)
processed_df['Label'] = processed_df['Label'].astype(int)
X_processed = processed_df.drop('Label', axis=1)
y_processed = processed_df['Label']
pca = PCA(n_components=2)
principal_components = pca.fit_transform(X_processed)
pca_df = pd.DataFrame(data=principal_components, columns=['PC1', 'PC2'])
pca_df['Label'] = y_processed.reset_index(drop=True)
targets = y_processed.unique()
cmap = get_cmap('tab10')
for i, target in enumerate(targets):
idx = pca_df['Label'] == target
axs[0].scatter(pca_df.loc[idx, 'PC1'], pca_df.loc[idx, 'PC2'], color=cmap(i % cmap.N), label=f'Label {target}')
axs[0].set_title('Score Plot: PC1 vs. PC2')
axs[0].legend()
axs[0].grid()
loadings = pca.components_.T
axs[1].plot(loadings[:, 0], label='PC1 Loadings')
axs[1].plot(loadings[:, 1], label='PC2 Loadings', color='black')
axs[1].set_title('Loadings Plot')
axs[1].legend()
axs[1].grid()
plt.tight_layout()
plots.append(fig7)
plt.close(fig7)
# Plot 8: 3x2 PCA Summary
fig8, axs = plt.subplots(3, 2, figsize=(16, 14))
raw_data = df.iloc[:, 1:].values.astype(float)
derivative_data = np.diff(raw_data, axis=1)
scaler = StandardScaler()
raw_scaled = scaler.fit_transform(raw_data)
derivative_scaled = scaler.fit_transform(derivative_data)
pca_raw = PCA(n_components=10)
pca_raw_scores = pca_raw.fit_transform(raw_scaled)
explained_var_raw = np.cumsum(pca_raw.explained_variance_ratio_) * 100
pca_der = PCA(n_components=10)
pca_der_scores = pca_der.fit_transform(derivative_scaled)
explained_var_der = np.cumsum(pca_der.explained_variance_ratio_) * 100
targets = np.unique(labels)
cmap = get_cmap('tab10')
for i, target in enumerate(targets):
idx = labels == target
axs[0, 0].scatter(pca_raw_scores[idx, 0], pca_raw_scores[idx, 1], s=40, label=f'Milk {target}', color=cmap(i % cmap.N))
axs[0, 0].axhline(0, color='gray', linestyle='--', linewidth=2) # Horizontal
axs[0, 0].axvline(0, color='gray', linestyle='--', linewidth=2) # Vertical
axs[0, 1].scatter(pca_der_scores[idx, 0], pca_der_scores[idx, 1], s=40, label=f'Milk {target}', color=cmap(i % cmap.N))
axs[0, 1].axhline(0, color='gray', linestyle='--', linewidth=2) # Horizontal
axs[0, 1].axvline(0, color='gray', linestyle='--', linewidth=2) # Vertical
axs[0, 0].set_title('Raw Data: PCA Score Plot')
axs[0, 1].set_title('1st Derivative: PCA Score Plot')
# Row 2: PCA Loadings for Raw and Derivative (with horizontal and vertical lines at 0)
axs[1, 0].plot(pca_raw.components_[0], label='PC1')
axs[1, 0].plot(pca_raw.components_[1], label='PC2')
axs[1, 0].axhline(0, color='gray', linestyle='--', linewidth=2) # Horizontal
axs[1, 0].axvline(0, color='gray', linestyle='--', linewidth=2) # Vertical
axs[1, 1].plot(pca_der.components_[0], label='PC1')
axs[1, 1].plot(pca_der.components_[1], label='PC2')
axs[1, 1].axhline(0, color='gray', linestyle='--', linewidth=2) # Horizontal
axs[1, 1].axvline(0, color='gray', linestyle='--', linewidth=2) # Vertical
axs[2, 0].plot(range(1, 11), explained_var_raw, marker='o')
axs[2, 1].plot(range(1, 11), explained_var_der, marker='o')
axs[0, 0].legend(); axs[0, 1].legend()
axs[1, 0].legend(); axs[1, 1].legend()
axs[2, 0].set_ylim(0, 105)
axs[2, 1].set_ylim(0, 105)
axs[2, 0].set_title('Raw Data: Scree Plot')
axs[2, 1].set_title('1st Derivative: Scree Plot')
plt.tight_layout()
plots.append(fig8)
plt.close(fig8)
return plots
# Encode labels
le = LabelEncoder()
y = le.fit_transform(df['Label'].values)
X = df.iloc[:, 1:].values
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# === PCA reduction ===
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)
X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.2, random_state=42)
# === Models ===
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train, y_train)
# === CNN ===
class MilkDataset(Dataset):
def __init__(self, X, y):
self.X = torch.tensor(X, dtype=torch.float32).unsqueeze(1)
self.y = torch.tensor(y, dtype=torch.long)
def __len__(self): return len(self.X)
def __getitem__(self, idx): return self.X[idx], self.y[idx]
X_train_raw, X_test_raw, y_train_raw, y_test_raw = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
train_loader = DataLoader(MilkDataset(X_train_raw, y_train_raw), batch_size=16, shuffle=True)
test_loader = DataLoader(MilkDataset(X_test_raw, y_test_raw), batch_size=16)
class CNN1D(nn.Module):
def __init__(self):
super().__init__()
self.net = nn.Sequential(
nn.Conv1d(1, 32, 3, padding=1), nn.ReLU(),
nn.Conv1d(32, 64, 3, padding=1), nn.ReLU(),
nn.AdaptiveAvgPool1d(1),
nn.Flatten(),
nn.Linear(64, len(np.unique(y)))
)
def forward(self, x): return self.net(x)
model = CNN1D()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
for epoch in range(10):
model.train()
for Xb, yb in train_loader:
optimizer.zero_grad()
loss = criterion(model(Xb), yb)
loss.backward()
optimizer.step()
model.eval()
with torch.no_grad():
X_test_tensor = torch.tensor(X_test_raw, dtype=torch.float32).unsqueeze(1)
test_preds = model(X_test_tensor).argmax(dim=1)
test_acc = (test_preds == torch.tensor(y_test_raw)).float().mean().item()
X_train_tensor = torch.tensor(X_train_raw, dtype=torch.float32).unsqueeze(1)
train_preds = model(X_train_tensor).argmax(dim=1)
train_acc = (train_preds == torch.tensor(y_train_raw)).float().mean().item()
with gr.Blocks() as demo:
gr.Markdown("# 🧪 SPECTROSCOPY - YOUR HEALTH OUR CONCERN!!!")
with gr.Tabs():
with gr.Tab("Preview Raw Data"):
gr.DataFrame(df.head(50), label="Preview of Raw Data")
with gr.Tab("Visualizations"):
plot_button = gr.Button("Generate Spectroscopy Visualizations")
out_gallery = [gr.Plot() for _ in range(8)]
plot_button.click(fn=plot_all, inputs=[], outputs=out_gallery)
with gr.Tab("Models"):
with gr.Tabs():
with gr.Tab("Random Forest"):
gr.Image(value="rf.png", label="Random Forest Output")
with gr.Tab("Decision Tree"):
gr.Markdown("**Confusion Matrix**")
gr.Image(value="tree_cm.png", label="Confusion Matrix")
gr.Markdown("**Decision Tree Visualization**")
gr.Image(value="tree.png", label="Tree Structure")
with gr.Tab("1D CNN (Raw Data)"):
gr.Image(value="1d.png", label="1D CNN Output")
with gr.Tab("Takeaways"):
gr.Markdown("## 🌿 Why Spectroscopy Matters in the Dairy Ecosystem")
gr.Markdown("### 👨‍🌾 Farmers")
gr.Markdown("""
- ✅ Enables **quick, non-destructive testing** of milk quality at the source.
- ⚠️ Allows **early detection** of spoilage, contamination, or adulteration.
- 💰 Supports **transparent and fair pricing** in cooperative and local markets.
""")
gr.Markdown("### 🏛️ Government & Regulators")
gr.Markdown("""
- 🛡️ Reinforces **food safety and public health** monitoring systems.
- 📊 Ensures **consistency and traceability** across the dairy supply chain.
- 🚀 Encourages **innovation in agricultural technologies** and rural development.
""")
gr.Markdown("### 🏭 Businesses & Cooperatives")
gr.Markdown("""
- ⏱️ Facilitates **real-time quality control** during production and logistics.
- 💡 Reduces dependency on slow, expensive lab tests.
- 🤝 Builds **consumer trust** through transparency and quality assurance.
""")
gr.Markdown("---")
gr.Markdown("## 🧬 Parting Thought: Healthy Living Starts with Smart Choices")
gr.Markdown("""
> “Milk is nature’s first food – and spectroscopy helps us keep it honest, pure, and nutritious.”
>
> Embrace technology. Protect health.
> Let's make every drop of milk safe and reliable – for everyone.
""")
demo.launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False)