Spaces:

EzekielMW
/

Spectroscopy

Sleeping

App Files Files Community

Spectroscopy / app.py

EzekielMW

Update app.py

7eff709 verified 6 months ago

raw

history blame contribute delete

14.7 kB

	# ✅ FULL INTEGRATED SCRIPT
	# Includes your existing visualizations + new Models and Prediction tabs

	import gradio as gr
	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt
	from sklearn.ensemble import RandomForestClassifier
	from sklearn.tree import DecisionTreeClassifier
	from sklearn.preprocessing import StandardScaler, MinMaxScaler,LabelEncoder
	from sklearn.decomposition import PCA
	from sklearn.metrics import accuracy_score, confusion_matrix
	from scipy.signal import savgol_filter
	from math import pi
	from matplotlib.cm import get_cmap
	import seaborn as sns
	import torch
	import torch.nn as nn
	import torch.optim as optim
	from torch.utils.data import DataLoader, Dataset
	from sklearn.model_selection import train_test_split


	plt.switch_backend('agg')

	# Load dataset
	df = pd.read_csv("milk_absorbance.csv")
	df.rename(columns={df.columns[0]: 'Label'}, inplace=True)

	# Label encoding
	le = LabelEncoder()
	y = le.fit_transform(df['Label'].values)
	# ---------- Plotting Function (Unchanged) ----------
	def plot_all():
	plots = []

	# Plot 1: Mean Spectra per Class
	fig1 = plt.figure(figsize=(12, 6))
	for label in df['Label'].unique():
	class_df = df[df['Label'] == label]
	mean_spectrum = class_df.iloc[:, 1:].mean()
	plt.plot(mean_spectrum.index.astype(int), mean_spectrum, label=f'Label {label}')
	plt.title('Mean NIR Spectrum per Milk Ratio Class')
	plt.xlabel('Wavelength (nm)')
	plt.ylabel('Absorbance')
	plt.legend(title='Class (Milk Ratio)')
	plt.grid(True)
	plt.tight_layout()
	plots.append(fig1)
	plt.close(fig1)

	# Plot 2: Offset Mean Spectra
	fig2 = plt.figure(figsize=(12, 6))
	offset_step = 0.1
	for i, label in enumerate(df['Label'].unique()):
	class_df = df[df['Label'] == label]
	mean_spectrum = class_df.iloc[:, 1:].mean()
	offset = i * offset_step
	plt.plot(mean_spectrum.index.astype(int), mean_spectrum + offset, label=f'Label {label}')
	plt.title('Mean NIR Spectrum per Milk Ratio Class (with Offset)')
	plt.xlabel('Wavelength (nm)')
	plt.ylabel('Absorbance (Offset Applied)')
	plt.legend(title='Class (Milk Ratio)')
	plt.grid(True)
	plt.tight_layout()
	plots.append(fig2)
	plt.close(fig2)

	# Plot 3: Radar Plot
	fig3 = plt.figure(figsize=(8, 8))
	ax = plt.subplot(111, polar=True)
	subset_cols = df.columns[1:][::20]
	labels = df['Label'].unique()
	N = len(subset_cols)
	angles = [n / float(N) * 2 * pi for n in range(N)] + [0]
	for label in labels:
	class_df = df[df['Label'] == label]
	mean_spectrum = class_df[subset_cols].mean().values
	values = mean_spectrum.tolist() + [mean_spectrum[0]]
	ax.plot(angles, values, label=f'Label {label}')
	ax.fill(angles, values, alpha=0.1)
	ax.set_xticks(angles[:-1])
	ax.set_xticklabels(subset_cols.astype(int))
	plt.title('Radar Plot of Mean Spectra (Subset Wavelengths)')
	plt.legend(loc='upper right', bbox_to_anchor=(1.3, 1.1))
	plt.tight_layout()
	plots.append(fig3)
	plt.close(fig3)

	# Plot 4: Cumulative PCA Explained Variance
	fig4 = plt.figure(figsize=(8, 5))
	X = df.iloc[:, 1:].values
	X_scaled = StandardScaler().fit_transform(X)
	pca = PCA(n_components=20)
	pca.fit(X_scaled)
	explained = np.cumsum(pca.explained_variance_ratio_)
	plt.plot(range(1, 21), explained, marker='o')
	plt.axhline(y=0.95, color='r', linestyle='--', label='95% Variance')
	plt.title('Cumulative Explained Variance by PCA')
	plt.xlabel('Number of Principal Components')
	plt.ylabel('Cumulative Variance')
	plt.legend()
	plt.grid(True)
	plt.tight_layout()
	plots.append(fig4)
	plt.close(fig4)

	# Plot 5: Derivative + Normalized Spectra
	fig5 = plt.figure(figsize=(16, 8))
	y_vals = df['Label'].values
	wavelengths = df.columns[1:].astype(float)
	X = df.iloc[:, 1:].values
	X_deriv = savgol_filter(X, window_length=25, polyorder=5, deriv=1, axis=1)
	scaler = MinMaxScaler()
	X_deriv_norm = np.array([scaler.fit_transform(row.reshape(-1, 1)).flatten() for row in X_deriv])
	unique_labels = np.unique(y_vals)
	colors = get_cmap('tab10')(np.linspace(0, 1, len(unique_labels)))
	for label, color in zip(unique_labels, colors):
	indices = np.where(y_vals == label)[0]
	for i in indices:
	plt.plot(wavelengths, X_deriv_norm[i], color=color, alpha=0.3, label=f'Milk {label}' if i == indices[0] else '')
	plt.title("All Spectra After First Derivative + Normalization")
	plt.xlabel("Wavelength (nm)")
	plt.ylabel("Normalized First Derivative")
	plt.legend(title="Group")
	plt.grid(True)
	plt.tight_layout()
	plots.append(fig5)
	plt.close(fig5)

	# Plot 6: Derivative Only (No Norm)
	fig6 = plt.figure(figsize=(16, 8))
	for label, color in zip(unique_labels, colors):
	indices = np.where(y_vals == label)[0]
	for i in indices:
	plt.plot(wavelengths, X_deriv[i], color=color, alpha=0.3, label=f'Milk {label}' if i == indices[0] else '')
	plt.title("All Spectra After First Derivative (No Normalization)")
	plt.xlabel("Wavelength (nm)")
	plt.ylabel("First Derivative Absorbance")
	plt.legend(title="Group")
	plt.grid(True)
	plt.tight_layout()
	plots.append(fig6)
	plt.close(fig6)

	# Plot 7: Score + Loadings
	fig7, axs = plt.subplots(1, 2, figsize=(14, 5))
	wavelength_columns = df.columns[1:]
	labels = df.iloc[:, 0]
	data = df.iloc[:, 1:].values.astype(float)
	derivative_data = np.diff(data, axis=1)
	scaler = StandardScaler()
	normalized_derivative_data = scaler.fit_transform(derivative_data)
	derivative_wavelength_columns = [f'Der_{w1}-{w2}' for w1, w2 in zip(wavelength_columns[:-1], wavelength_columns[1:])]
	processed_df = pd.DataFrame(normalized_derivative_data, columns=derivative_wavelength_columns)
	processed_df.insert(0, 'Label', labels)
	processed_df['Label'] = processed_df['Label'].astype(int)
	X_processed = processed_df.drop('Label', axis=1)
	y_processed = processed_df['Label']
	pca = PCA(n_components=2)
	principal_components = pca.fit_transform(X_processed)
	pca_df = pd.DataFrame(data=principal_components, columns=['PC1', 'PC2'])
	pca_df['Label'] = y_processed.reset_index(drop=True)
	targets = y_processed.unique()
	cmap = get_cmap('tab10')
	for i, target in enumerate(targets):
	idx = pca_df['Label'] == target
	axs[0].scatter(pca_df.loc[idx, 'PC1'], pca_df.loc[idx, 'PC2'], color=cmap(i % cmap.N), label=f'Label {target}')
	axs[0].set_title('Score Plot: PC1 vs. PC2')
	axs[0].legend()
	axs[0].grid()
	loadings = pca.components_.T
	axs[1].plot(loadings[:, 0], label='PC1 Loadings')
	axs[1].plot(loadings[:, 1], label='PC2 Loadings', color='black')
	axs[1].set_title('Loadings Plot')
	axs[1].legend()
	axs[1].grid()
	plt.tight_layout()
	plots.append(fig7)
	plt.close(fig7)

	# Plot 8: 3x2 PCA Summary
	fig8, axs = plt.subplots(3, 2, figsize=(16, 14))
	raw_data = df.iloc[:, 1:].values.astype(float)
	derivative_data = np.diff(raw_data, axis=1)
	scaler = StandardScaler()
	raw_scaled = scaler.fit_transform(raw_data)
	derivative_scaled = scaler.fit_transform(derivative_data)
	pca_raw = PCA(n_components=10)
	pca_raw_scores = pca_raw.fit_transform(raw_scaled)
	explained_var_raw = np.cumsum(pca_raw.explained_variance_ratio_) * 100
	pca_der = PCA(n_components=10)
	pca_der_scores = pca_der.fit_transform(derivative_scaled)
	explained_var_der = np.cumsum(pca_der.explained_variance_ratio_) * 100
	targets = np.unique(labels)
	cmap = get_cmap('tab10')
	for i, target in enumerate(targets):
	idx = labels == target
	axs[0, 0].scatter(pca_raw_scores[idx, 0], pca_raw_scores[idx, 1], s=40, label=f'Milk {target}', color=cmap(i % cmap.N))
	axs[0, 0].axhline(0, color='gray', linestyle='--', linewidth=2) # Horizontal
	axs[0, 0].axvline(0, color='gray', linestyle='--', linewidth=2) # Vertical
	axs[0, 1].scatter(pca_der_scores[idx, 0], pca_der_scores[idx, 1], s=40, label=f'Milk {target}', color=cmap(i % cmap.N))
	axs[0, 1].axhline(0, color='gray', linestyle='--', linewidth=2) # Horizontal
	axs[0, 1].axvline(0, color='gray', linestyle='--', linewidth=2) # Vertical
	axs[0, 0].set_title('Raw Data: PCA Score Plot')
	axs[0, 1].set_title('1st Derivative: PCA Score Plot')


	# Row 2: PCA Loadings for Raw and Derivative (with horizontal and vertical lines at 0)
	axs[1, 0].plot(pca_raw.components_[0], label='PC1')
	axs[1, 0].plot(pca_raw.components_[1], label='PC2')
	axs[1, 0].axhline(0, color='gray', linestyle='--', linewidth=2) # Horizontal
	axs[1, 0].axvline(0, color='gray', linestyle='--', linewidth=2) # Vertical

	axs[1, 1].plot(pca_der.components_[0], label='PC1')
	axs[1, 1].plot(pca_der.components_[1], label='PC2')
	axs[1, 1].axhline(0, color='gray', linestyle='--', linewidth=2) # Horizontal
	axs[1, 1].axvline(0, color='gray', linestyle='--', linewidth=2) # Vertical

	axs[2, 0].plot(range(1, 11), explained_var_raw, marker='o')
	axs[2, 1].plot(range(1, 11), explained_var_der, marker='o')
	axs[0, 0].legend(); axs[0, 1].legend()
	axs[1, 0].legend(); axs[1, 1].legend()
	axs[2, 0].set_ylim(0, 105)
	axs[2, 1].set_ylim(0, 105)
	axs[2, 0].set_title('Raw Data: Scree Plot')
	axs[2, 1].set_title('1st Derivative: Scree Plot')
	plt.tight_layout()
	plots.append(fig8)
	plt.close(fig8)

	return plots


	# Encode labels
	le = LabelEncoder()
	y = le.fit_transform(df['Label'].values)
	X = df.iloc[:, 1:].values
	scaler = StandardScaler()
	X_scaled = scaler.fit_transform(X)

	# === PCA reduction ===
	pca = PCA(n_components=2)
	X_pca = pca.fit_transform(X_scaled)
	X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.2, random_state=42)

	# === Models ===
	rf = RandomForestClassifier(n_estimators=100, random_state=42)
	rf.fit(X_train, y_train)

	dt = DecisionTreeClassifier(random_state=42)
	dt.fit(X_train, y_train)

	# === CNN ===
	class MilkDataset(Dataset):
	def __init__(self, X, y):
	self.X = torch.tensor(X, dtype=torch.float32).unsqueeze(1)
	self.y = torch.tensor(y, dtype=torch.long)
	def __len__(self): return len(self.X)
	def __getitem__(self, idx): return self.X[idx], self.y[idx]

	X_train_raw, X_test_raw, y_train_raw, y_test_raw = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
	train_loader = DataLoader(MilkDataset(X_train_raw, y_train_raw), batch_size=16, shuffle=True)
	test_loader = DataLoader(MilkDataset(X_test_raw, y_test_raw), batch_size=16)

	class CNN1D(nn.Module):
	def __init__(self):
	super().__init__()
	self.net = nn.Sequential(
	nn.Conv1d(1, 32, 3, padding=1), nn.ReLU(),
	nn.Conv1d(32, 64, 3, padding=1), nn.ReLU(),
	nn.AdaptiveAvgPool1d(1),
	nn.Flatten(),
	nn.Linear(64, len(np.unique(y)))
	)
	def forward(self, x): return self.net(x)

	model = CNN1D()
	criterion = nn.CrossEntropyLoss()
	optimizer = optim.Adam(model.parameters(), lr=0.001)

	for epoch in range(10):
	model.train()
	for Xb, yb in train_loader:
	optimizer.zero_grad()
	loss = criterion(model(Xb), yb)
	loss.backward()
	optimizer.step()

	model.eval()
	with torch.no_grad():
	X_test_tensor = torch.tensor(X_test_raw, dtype=torch.float32).unsqueeze(1)
	test_preds = model(X_test_tensor).argmax(dim=1)
	test_acc = (test_preds == torch.tensor(y_test_raw)).float().mean().item()

	X_train_tensor = torch.tensor(X_train_raw, dtype=torch.float32).unsqueeze(1)
	train_preds = model(X_train_tensor).argmax(dim=1)
	train_acc = (train_preds == torch.tensor(y_train_raw)).float().mean().item()

	with gr.Blocks() as demo:
	gr.Markdown("# 🧪 SPECTROSCOPY - YOUR HEALTH OUR CONCERN!!!")

	with gr.Tabs():
	with gr.Tab("Preview Raw Data"):
	gr.DataFrame(df.head(50), label="Preview of Raw Data")

	with gr.Tab("Visualizations"):
	plot_button = gr.Button("Generate Spectroscopy Visualizations")
	out_gallery = [gr.Plot() for _ in range(8)]
	plot_button.click(fn=plot_all, inputs=[], outputs=out_gallery)

	with gr.Tab("Models"):
	with gr.Tabs():
	with gr.Tab("Random Forest"):
	gr.Image(value="rf.png", label="Random Forest Output")

	with gr.Tab("Decision Tree"):
	gr.Markdown("Confusion Matrix")
	gr.Image(value="tree_cm.png", label="Confusion Matrix")
	gr.Markdown("Decision Tree Visualization")
	gr.Image(value="tree.png", label="Tree Structure")

	with gr.Tab("1D CNN (Raw Data)"):
	gr.Image(value="1d.png", label="1D CNN Output")

	with gr.Tab("Takeaways"):
	gr.Markdown("## 🌿 Why Spectroscopy Matters in the Dairy Ecosystem")

	gr.Markdown("### 👨‍🌾 Farmers")
	gr.Markdown("""
	- ✅ Enables quick, non-destructive testing of milk quality at the source.
	- ⚠️ Allows early detection of spoilage, contamination, or adulteration.
	- 💰 Supports transparent and fair pricing in cooperative and local markets.
	""")

	gr.Markdown("### 🏛️ Government & Regulators")
	gr.Markdown("""
	- 🛡️ Reinforces food safety and public health monitoring systems.
	- 📊 Ensures consistency and traceability across the dairy supply chain.
	- 🚀 Encourages innovation in agricultural technologies and rural development.
	""")

	gr.Markdown("### 🏭 Businesses & Cooperatives")
	gr.Markdown("""
	- ⏱️ Facilitates real-time quality control during production and logistics.
	- 💡 Reduces dependency on slow, expensive lab tests.
	- 🤝 Builds consumer trust through transparency and quality assurance.
	""")

	gr.Markdown("---")
	gr.Markdown("## 🧬 Parting Thought: Healthy Living Starts with Smart Choices")
	gr.Markdown("""
	> “Milk is nature’s first food – and spectroscopy helps us keep it honest, pure, and nutritious.”
	>
	> Embrace technology. Protect health.
	> Let's make every drop of milk safe and reliable – for everyone.
	""")

	demo.launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False)