# βœ… FULL INTEGRATED SCRIPT # Includes your existing visualizations + new Models and Prediction tabs import gradio as gr import pandas as pd import numpy as np import matplotlib.pyplot as plt from sklearn.ensemble import RandomForestClassifier from sklearn.tree import DecisionTreeClassifier from sklearn.preprocessing import StandardScaler, MinMaxScaler,LabelEncoder from sklearn.decomposition import PCA from sklearn.metrics import accuracy_score, confusion_matrix from scipy.signal import savgol_filter from math import pi from matplotlib.cm import get_cmap import seaborn as sns import torch import torch.nn as nn import torch.optim as optim from torch.utils.data import DataLoader, Dataset from sklearn.model_selection import train_test_split plt.switch_backend('agg') # Load dataset df = pd.read_csv("milk_absorbance.csv") df.rename(columns={df.columns[0]: 'Label'}, inplace=True) # Label encoding le = LabelEncoder() y = le.fit_transform(df['Label'].values) # ---------- Plotting Function (Unchanged) ---------- def plot_all(): plots = [] # Plot 1: Mean Spectra per Class fig1 = plt.figure(figsize=(12, 6)) for label in df['Label'].unique(): class_df = df[df['Label'] == label] mean_spectrum = class_df.iloc[:, 1:].mean() plt.plot(mean_spectrum.index.astype(int), mean_spectrum, label=f'Label {label}') plt.title('Mean NIR Spectrum per Milk Ratio Class') plt.xlabel('Wavelength (nm)') plt.ylabel('Absorbance') plt.legend(title='Class (Milk Ratio)') plt.grid(True) plt.tight_layout() plots.append(fig1) plt.close(fig1) # Plot 2: Offset Mean Spectra fig2 = plt.figure(figsize=(12, 6)) offset_step = 0.1 for i, label in enumerate(df['Label'].unique()): class_df = df[df['Label'] == label] mean_spectrum = class_df.iloc[:, 1:].mean() offset = i * offset_step plt.plot(mean_spectrum.index.astype(int), mean_spectrum + offset, label=f'Label {label}') plt.title('Mean NIR Spectrum per Milk Ratio Class (with Offset)') plt.xlabel('Wavelength (nm)') plt.ylabel('Absorbance (Offset Applied)') plt.legend(title='Class (Milk Ratio)') plt.grid(True) plt.tight_layout() plots.append(fig2) plt.close(fig2) # Plot 3: Radar Plot fig3 = plt.figure(figsize=(8, 8)) ax = plt.subplot(111, polar=True) subset_cols = df.columns[1:][::20] labels = df['Label'].unique() N = len(subset_cols) angles = [n / float(N) * 2 * pi for n in range(N)] + [0] for label in labels: class_df = df[df['Label'] == label] mean_spectrum = class_df[subset_cols].mean().values values = mean_spectrum.tolist() + [mean_spectrum[0]] ax.plot(angles, values, label=f'Label {label}') ax.fill(angles, values, alpha=0.1) ax.set_xticks(angles[:-1]) ax.set_xticklabels(subset_cols.astype(int)) plt.title('Radar Plot of Mean Spectra (Subset Wavelengths)') plt.legend(loc='upper right', bbox_to_anchor=(1.3, 1.1)) plt.tight_layout() plots.append(fig3) plt.close(fig3) # Plot 4: Cumulative PCA Explained Variance fig4 = plt.figure(figsize=(8, 5)) X = df.iloc[:, 1:].values X_scaled = StandardScaler().fit_transform(X) pca = PCA(n_components=20) pca.fit(X_scaled) explained = np.cumsum(pca.explained_variance_ratio_) plt.plot(range(1, 21), explained, marker='o') plt.axhline(y=0.95, color='r', linestyle='--', label='95% Variance') plt.title('Cumulative Explained Variance by PCA') plt.xlabel('Number of Principal Components') plt.ylabel('Cumulative Variance') plt.legend() plt.grid(True) plt.tight_layout() plots.append(fig4) plt.close(fig4) # Plot 5: Derivative + Normalized Spectra fig5 = plt.figure(figsize=(16, 8)) y_vals = df['Label'].values wavelengths = df.columns[1:].astype(float) X = df.iloc[:, 1:].values X_deriv = savgol_filter(X, window_length=25, polyorder=5, deriv=1, axis=1) scaler = MinMaxScaler() X_deriv_norm = np.array([scaler.fit_transform(row.reshape(-1, 1)).flatten() for row in X_deriv]) unique_labels = np.unique(y_vals) colors = get_cmap('tab10')(np.linspace(0, 1, len(unique_labels))) for label, color in zip(unique_labels, colors): indices = np.where(y_vals == label)[0] for i in indices: plt.plot(wavelengths, X_deriv_norm[i], color=color, alpha=0.3, label=f'Milk {label}' if i == indices[0] else '') plt.title("All Spectra After First Derivative + Normalization") plt.xlabel("Wavelength (nm)") plt.ylabel("Normalized First Derivative") plt.legend(title="Group") plt.grid(True) plt.tight_layout() plots.append(fig5) plt.close(fig5) # Plot 6: Derivative Only (No Norm) fig6 = plt.figure(figsize=(16, 8)) for label, color in zip(unique_labels, colors): indices = np.where(y_vals == label)[0] for i in indices: plt.plot(wavelengths, X_deriv[i], color=color, alpha=0.3, label=f'Milk {label}' if i == indices[0] else '') plt.title("All Spectra After First Derivative (No Normalization)") plt.xlabel("Wavelength (nm)") plt.ylabel("First Derivative Absorbance") plt.legend(title="Group") plt.grid(True) plt.tight_layout() plots.append(fig6) plt.close(fig6) # Plot 7: Score + Loadings fig7, axs = plt.subplots(1, 2, figsize=(14, 5)) wavelength_columns = df.columns[1:] labels = df.iloc[:, 0] data = df.iloc[:, 1:].values.astype(float) derivative_data = np.diff(data, axis=1) scaler = StandardScaler() normalized_derivative_data = scaler.fit_transform(derivative_data) derivative_wavelength_columns = [f'Der_{w1}-{w2}' for w1, w2 in zip(wavelength_columns[:-1], wavelength_columns[1:])] processed_df = pd.DataFrame(normalized_derivative_data, columns=derivative_wavelength_columns) processed_df.insert(0, 'Label', labels) processed_df['Label'] = processed_df['Label'].astype(int) X_processed = processed_df.drop('Label', axis=1) y_processed = processed_df['Label'] pca = PCA(n_components=2) principal_components = pca.fit_transform(X_processed) pca_df = pd.DataFrame(data=principal_components, columns=['PC1', 'PC2']) pca_df['Label'] = y_processed.reset_index(drop=True) targets = y_processed.unique() cmap = get_cmap('tab10') for i, target in enumerate(targets): idx = pca_df['Label'] == target axs[0].scatter(pca_df.loc[idx, 'PC1'], pca_df.loc[idx, 'PC2'], color=cmap(i % cmap.N), label=f'Label {target}') axs[0].set_title('Score Plot: PC1 vs. PC2') axs[0].legend() axs[0].grid() loadings = pca.components_.T axs[1].plot(loadings[:, 0], label='PC1 Loadings') axs[1].plot(loadings[:, 1], label='PC2 Loadings', color='black') axs[1].set_title('Loadings Plot') axs[1].legend() axs[1].grid() plt.tight_layout() plots.append(fig7) plt.close(fig7) # Plot 8: 3x2 PCA Summary fig8, axs = plt.subplots(3, 2, figsize=(16, 14)) raw_data = df.iloc[:, 1:].values.astype(float) derivative_data = np.diff(raw_data, axis=1) scaler = StandardScaler() raw_scaled = scaler.fit_transform(raw_data) derivative_scaled = scaler.fit_transform(derivative_data) pca_raw = PCA(n_components=10) pca_raw_scores = pca_raw.fit_transform(raw_scaled) explained_var_raw = np.cumsum(pca_raw.explained_variance_ratio_) * 100 pca_der = PCA(n_components=10) pca_der_scores = pca_der.fit_transform(derivative_scaled) explained_var_der = np.cumsum(pca_der.explained_variance_ratio_) * 100 targets = np.unique(labels) cmap = get_cmap('tab10') for i, target in enumerate(targets): idx = labels == target axs[0, 0].scatter(pca_raw_scores[idx, 0], pca_raw_scores[idx, 1], s=40, label=f'Milk {target}', color=cmap(i % cmap.N)) axs[0, 0].axhline(0, color='gray', linestyle='--', linewidth=2) # Horizontal axs[0, 0].axvline(0, color='gray', linestyle='--', linewidth=2) # Vertical axs[0, 1].scatter(pca_der_scores[idx, 0], pca_der_scores[idx, 1], s=40, label=f'Milk {target}', color=cmap(i % cmap.N)) axs[0, 1].axhline(0, color='gray', linestyle='--', linewidth=2) # Horizontal axs[0, 1].axvline(0, color='gray', linestyle='--', linewidth=2) # Vertical axs[0, 0].set_title('Raw Data: PCA Score Plot') axs[0, 1].set_title('1st Derivative: PCA Score Plot') # Row 2: PCA Loadings for Raw and Derivative (with horizontal and vertical lines at 0) axs[1, 0].plot(pca_raw.components_[0], label='PC1') axs[1, 0].plot(pca_raw.components_[1], label='PC2') axs[1, 0].axhline(0, color='gray', linestyle='--', linewidth=2) # Horizontal axs[1, 0].axvline(0, color='gray', linestyle='--', linewidth=2) # Vertical axs[1, 1].plot(pca_der.components_[0], label='PC1') axs[1, 1].plot(pca_der.components_[1], label='PC2') axs[1, 1].axhline(0, color='gray', linestyle='--', linewidth=2) # Horizontal axs[1, 1].axvline(0, color='gray', linestyle='--', linewidth=2) # Vertical axs[2, 0].plot(range(1, 11), explained_var_raw, marker='o') axs[2, 1].plot(range(1, 11), explained_var_der, marker='o') axs[0, 0].legend(); axs[0, 1].legend() axs[1, 0].legend(); axs[1, 1].legend() axs[2, 0].set_ylim(0, 105) axs[2, 1].set_ylim(0, 105) axs[2, 0].set_title('Raw Data: Scree Plot') axs[2, 1].set_title('1st Derivative: Scree Plot') plt.tight_layout() plots.append(fig8) plt.close(fig8) return plots # Encode labels le = LabelEncoder() y = le.fit_transform(df['Label'].values) X = df.iloc[:, 1:].values scaler = StandardScaler() X_scaled = scaler.fit_transform(X) # === PCA reduction === pca = PCA(n_components=2) X_pca = pca.fit_transform(X_scaled) X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.2, random_state=42) # === Models === rf = RandomForestClassifier(n_estimators=100, random_state=42) rf.fit(X_train, y_train) dt = DecisionTreeClassifier(random_state=42) dt.fit(X_train, y_train) # === CNN === class MilkDataset(Dataset): def __init__(self, X, y): self.X = torch.tensor(X, dtype=torch.float32).unsqueeze(1) self.y = torch.tensor(y, dtype=torch.long) def __len__(self): return len(self.X) def __getitem__(self, idx): return self.X[idx], self.y[idx] X_train_raw, X_test_raw, y_train_raw, y_test_raw = train_test_split(X_scaled, y, test_size=0.2, random_state=42) train_loader = DataLoader(MilkDataset(X_train_raw, y_train_raw), batch_size=16, shuffle=True) test_loader = DataLoader(MilkDataset(X_test_raw, y_test_raw), batch_size=16) class CNN1D(nn.Module): def __init__(self): super().__init__() self.net = nn.Sequential( nn.Conv1d(1, 32, 3, padding=1), nn.ReLU(), nn.Conv1d(32, 64, 3, padding=1), nn.ReLU(), nn.AdaptiveAvgPool1d(1), nn.Flatten(), nn.Linear(64, len(np.unique(y))) ) def forward(self, x): return self.net(x) model = CNN1D() criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.001) for epoch in range(10): model.train() for Xb, yb in train_loader: optimizer.zero_grad() loss = criterion(model(Xb), yb) loss.backward() optimizer.step() model.eval() with torch.no_grad(): X_test_tensor = torch.tensor(X_test_raw, dtype=torch.float32).unsqueeze(1) test_preds = model(X_test_tensor).argmax(dim=1) test_acc = (test_preds == torch.tensor(y_test_raw)).float().mean().item() X_train_tensor = torch.tensor(X_train_raw, dtype=torch.float32).unsqueeze(1) train_preds = model(X_train_tensor).argmax(dim=1) train_acc = (train_preds == torch.tensor(y_train_raw)).float().mean().item() with gr.Blocks() as demo: gr.Markdown("# πŸ§ͺ SPECTROSCOPY - YOUR HEALTH OUR CONCERN!!!") with gr.Tabs(): with gr.Tab("Preview Raw Data"): gr.DataFrame(df.head(50), label="Preview of Raw Data") with gr.Tab("Visualizations"): plot_button = gr.Button("Generate Spectroscopy Visualizations") out_gallery = [gr.Plot() for _ in range(8)] plot_button.click(fn=plot_all, inputs=[], outputs=out_gallery) with gr.Tab("Models"): with gr.Tabs(): with gr.Tab("Random Forest"): gr.Image(value="rf.png", label="Random Forest Output") with gr.Tab("Decision Tree"): gr.Markdown("**Confusion Matrix**") gr.Image(value="tree_cm.png", label="Confusion Matrix") gr.Markdown("**Decision Tree Visualization**") gr.Image(value="tree.png", label="Tree Structure") with gr.Tab("1D CNN (Raw Data)"): gr.Image(value="1d.png", label="1D CNN Output") with gr.Tab("Takeaways"): gr.Markdown("## 🌿 Why Spectroscopy Matters in the Dairy Ecosystem") gr.Markdown("### πŸ‘¨β€πŸŒΎ Farmers") gr.Markdown(""" - βœ… Enables **quick, non-destructive testing** of milk quality at the source. - ⚠️ Allows **early detection** of spoilage, contamination, or adulteration. - πŸ’° Supports **transparent and fair pricing** in cooperative and local markets. """) gr.Markdown("### πŸ›οΈ Government & Regulators") gr.Markdown(""" - πŸ›‘οΈ Reinforces **food safety and public health** monitoring systems. - πŸ“Š Ensures **consistency and traceability** across the dairy supply chain. - πŸš€ Encourages **innovation in agricultural technologies** and rural development. """) gr.Markdown("### 🏭 Businesses & Cooperatives") gr.Markdown(""" - ⏱️ Facilitates **real-time quality control** during production and logistics. - πŸ’‘ Reduces dependency on slow, expensive lab tests. - 🀝 Builds **consumer trust** through transparency and quality assurance. """) gr.Markdown("---") gr.Markdown("## 🧬 Parting Thought: Healthy Living Starts with Smart Choices") gr.Markdown(""" > β€œMilk is nature’s first food – and spectroscopy helps us keep it honest, pure, and nutritious.” > > Embrace technology. Protect health. > Let's make every drop of milk safe and reliable – for everyone. """) demo.launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False)