Spaces:
Sleeping
Sleeping
| # ✅ FULL INTEGRATED SCRIPT | |
| # Includes your existing visualizations + new Models and Prediction tabs | |
| import gradio as gr | |
| import pandas as pd | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| from sklearn.ensemble import RandomForestClassifier | |
| from sklearn.tree import DecisionTreeClassifier | |
| from sklearn.preprocessing import StandardScaler, MinMaxScaler,LabelEncoder | |
| from sklearn.decomposition import PCA | |
| from sklearn.metrics import accuracy_score, confusion_matrix | |
| from scipy.signal import savgol_filter | |
| from math import pi | |
| from matplotlib.cm import get_cmap | |
| import seaborn as sns | |
| import torch | |
| import torch.nn as nn | |
| import torch.optim as optim | |
| from torch.utils.data import DataLoader, Dataset | |
| from sklearn.model_selection import train_test_split | |
| plt.switch_backend('agg') | |
| # Load dataset | |
| df = pd.read_csv("milk_absorbance.csv") | |
| df.rename(columns={df.columns[0]: 'Label'}, inplace=True) | |
| # Label encoding | |
| le = LabelEncoder() | |
| y = le.fit_transform(df['Label'].values) | |
| # ---------- Plotting Function (Unchanged) ---------- | |
| def plot_all(): | |
| plots = [] | |
| # Plot 1: Mean Spectra per Class | |
| fig1 = plt.figure(figsize=(12, 6)) | |
| for label in df['Label'].unique(): | |
| class_df = df[df['Label'] == label] | |
| mean_spectrum = class_df.iloc[:, 1:].mean() | |
| plt.plot(mean_spectrum.index.astype(int), mean_spectrum, label=f'Label {label}') | |
| plt.title('Mean NIR Spectrum per Milk Ratio Class') | |
| plt.xlabel('Wavelength (nm)') | |
| plt.ylabel('Absorbance') | |
| plt.legend(title='Class (Milk Ratio)') | |
| plt.grid(True) | |
| plt.tight_layout() | |
| plots.append(fig1) | |
| plt.close(fig1) | |
| # Plot 2: Offset Mean Spectra | |
| fig2 = plt.figure(figsize=(12, 6)) | |
| offset_step = 0.1 | |
| for i, label in enumerate(df['Label'].unique()): | |
| class_df = df[df['Label'] == label] | |
| mean_spectrum = class_df.iloc[:, 1:].mean() | |
| offset = i * offset_step | |
| plt.plot(mean_spectrum.index.astype(int), mean_spectrum + offset, label=f'Label {label}') | |
| plt.title('Mean NIR Spectrum per Milk Ratio Class (with Offset)') | |
| plt.xlabel('Wavelength (nm)') | |
| plt.ylabel('Absorbance (Offset Applied)') | |
| plt.legend(title='Class (Milk Ratio)') | |
| plt.grid(True) | |
| plt.tight_layout() | |
| plots.append(fig2) | |
| plt.close(fig2) | |
| # Plot 3: Radar Plot | |
| fig3 = plt.figure(figsize=(8, 8)) | |
| ax = plt.subplot(111, polar=True) | |
| subset_cols = df.columns[1:][::20] | |
| labels = df['Label'].unique() | |
| N = len(subset_cols) | |
| angles = [n / float(N) * 2 * pi for n in range(N)] + [0] | |
| for label in labels: | |
| class_df = df[df['Label'] == label] | |
| mean_spectrum = class_df[subset_cols].mean().values | |
| values = mean_spectrum.tolist() + [mean_spectrum[0]] | |
| ax.plot(angles, values, label=f'Label {label}') | |
| ax.fill(angles, values, alpha=0.1) | |
| ax.set_xticks(angles[:-1]) | |
| ax.set_xticklabels(subset_cols.astype(int)) | |
| plt.title('Radar Plot of Mean Spectra (Subset Wavelengths)') | |
| plt.legend(loc='upper right', bbox_to_anchor=(1.3, 1.1)) | |
| plt.tight_layout() | |
| plots.append(fig3) | |
| plt.close(fig3) | |
| # Plot 4: Cumulative PCA Explained Variance | |
| fig4 = plt.figure(figsize=(8, 5)) | |
| X = df.iloc[:, 1:].values | |
| X_scaled = StandardScaler().fit_transform(X) | |
| pca = PCA(n_components=20) | |
| pca.fit(X_scaled) | |
| explained = np.cumsum(pca.explained_variance_ratio_) | |
| plt.plot(range(1, 21), explained, marker='o') | |
| plt.axhline(y=0.95, color='r', linestyle='--', label='95% Variance') | |
| plt.title('Cumulative Explained Variance by PCA') | |
| plt.xlabel('Number of Principal Components') | |
| plt.ylabel('Cumulative Variance') | |
| plt.legend() | |
| plt.grid(True) | |
| plt.tight_layout() | |
| plots.append(fig4) | |
| plt.close(fig4) | |
| # Plot 5: Derivative + Normalized Spectra | |
| fig5 = plt.figure(figsize=(16, 8)) | |
| y_vals = df['Label'].values | |
| wavelengths = df.columns[1:].astype(float) | |
| X = df.iloc[:, 1:].values | |
| X_deriv = savgol_filter(X, window_length=25, polyorder=5, deriv=1, axis=1) | |
| scaler = MinMaxScaler() | |
| X_deriv_norm = np.array([scaler.fit_transform(row.reshape(-1, 1)).flatten() for row in X_deriv]) | |
| unique_labels = np.unique(y_vals) | |
| colors = get_cmap('tab10')(np.linspace(0, 1, len(unique_labels))) | |
| for label, color in zip(unique_labels, colors): | |
| indices = np.where(y_vals == label)[0] | |
| for i in indices: | |
| plt.plot(wavelengths, X_deriv_norm[i], color=color, alpha=0.3, label=f'Milk {label}' if i == indices[0] else '') | |
| plt.title("All Spectra After First Derivative + Normalization") | |
| plt.xlabel("Wavelength (nm)") | |
| plt.ylabel("Normalized First Derivative") | |
| plt.legend(title="Group") | |
| plt.grid(True) | |
| plt.tight_layout() | |
| plots.append(fig5) | |
| plt.close(fig5) | |
| # Plot 6: Derivative Only (No Norm) | |
| fig6 = plt.figure(figsize=(16, 8)) | |
| for label, color in zip(unique_labels, colors): | |
| indices = np.where(y_vals == label)[0] | |
| for i in indices: | |
| plt.plot(wavelengths, X_deriv[i], color=color, alpha=0.3, label=f'Milk {label}' if i == indices[0] else '') | |
| plt.title("All Spectra After First Derivative (No Normalization)") | |
| plt.xlabel("Wavelength (nm)") | |
| plt.ylabel("First Derivative Absorbance") | |
| plt.legend(title="Group") | |
| plt.grid(True) | |
| plt.tight_layout() | |
| plots.append(fig6) | |
| plt.close(fig6) | |
| # Plot 7: Score + Loadings | |
| fig7, axs = plt.subplots(1, 2, figsize=(14, 5)) | |
| wavelength_columns = df.columns[1:] | |
| labels = df.iloc[:, 0] | |
| data = df.iloc[:, 1:].values.astype(float) | |
| derivative_data = np.diff(data, axis=1) | |
| scaler = StandardScaler() | |
| normalized_derivative_data = scaler.fit_transform(derivative_data) | |
| derivative_wavelength_columns = [f'Der_{w1}-{w2}' for w1, w2 in zip(wavelength_columns[:-1], wavelength_columns[1:])] | |
| processed_df = pd.DataFrame(normalized_derivative_data, columns=derivative_wavelength_columns) | |
| processed_df.insert(0, 'Label', labels) | |
| processed_df['Label'] = processed_df['Label'].astype(int) | |
| X_processed = processed_df.drop('Label', axis=1) | |
| y_processed = processed_df['Label'] | |
| pca = PCA(n_components=2) | |
| principal_components = pca.fit_transform(X_processed) | |
| pca_df = pd.DataFrame(data=principal_components, columns=['PC1', 'PC2']) | |
| pca_df['Label'] = y_processed.reset_index(drop=True) | |
| targets = y_processed.unique() | |
| cmap = get_cmap('tab10') | |
| for i, target in enumerate(targets): | |
| idx = pca_df['Label'] == target | |
| axs[0].scatter(pca_df.loc[idx, 'PC1'], pca_df.loc[idx, 'PC2'], color=cmap(i % cmap.N), label=f'Label {target}') | |
| axs[0].set_title('Score Plot: PC1 vs. PC2') | |
| axs[0].legend() | |
| axs[0].grid() | |
| loadings = pca.components_.T | |
| axs[1].plot(loadings[:, 0], label='PC1 Loadings') | |
| axs[1].plot(loadings[:, 1], label='PC2 Loadings', color='black') | |
| axs[1].set_title('Loadings Plot') | |
| axs[1].legend() | |
| axs[1].grid() | |
| plt.tight_layout() | |
| plots.append(fig7) | |
| plt.close(fig7) | |
| # Plot 8: 3x2 PCA Summary | |
| fig8, axs = plt.subplots(3, 2, figsize=(16, 14)) | |
| raw_data = df.iloc[:, 1:].values.astype(float) | |
| derivative_data = np.diff(raw_data, axis=1) | |
| scaler = StandardScaler() | |
| raw_scaled = scaler.fit_transform(raw_data) | |
| derivative_scaled = scaler.fit_transform(derivative_data) | |
| pca_raw = PCA(n_components=10) | |
| pca_raw_scores = pca_raw.fit_transform(raw_scaled) | |
| explained_var_raw = np.cumsum(pca_raw.explained_variance_ratio_) * 100 | |
| pca_der = PCA(n_components=10) | |
| pca_der_scores = pca_der.fit_transform(derivative_scaled) | |
| explained_var_der = np.cumsum(pca_der.explained_variance_ratio_) * 100 | |
| targets = np.unique(labels) | |
| cmap = get_cmap('tab10') | |
| for i, target in enumerate(targets): | |
| idx = labels == target | |
| axs[0, 0].scatter(pca_raw_scores[idx, 0], pca_raw_scores[idx, 1], s=40, label=f'Milk {target}', color=cmap(i % cmap.N)) | |
| axs[0, 0].axhline(0, color='gray', linestyle='--', linewidth=2) # Horizontal | |
| axs[0, 0].axvline(0, color='gray', linestyle='--', linewidth=2) # Vertical | |
| axs[0, 1].scatter(pca_der_scores[idx, 0], pca_der_scores[idx, 1], s=40, label=f'Milk {target}', color=cmap(i % cmap.N)) | |
| axs[0, 1].axhline(0, color='gray', linestyle='--', linewidth=2) # Horizontal | |
| axs[0, 1].axvline(0, color='gray', linestyle='--', linewidth=2) # Vertical | |
| axs[0, 0].set_title('Raw Data: PCA Score Plot') | |
| axs[0, 1].set_title('1st Derivative: PCA Score Plot') | |
| # Row 2: PCA Loadings for Raw and Derivative (with horizontal and vertical lines at 0) | |
| axs[1, 0].plot(pca_raw.components_[0], label='PC1') | |
| axs[1, 0].plot(pca_raw.components_[1], label='PC2') | |
| axs[1, 0].axhline(0, color='gray', linestyle='--', linewidth=2) # Horizontal | |
| axs[1, 0].axvline(0, color='gray', linestyle='--', linewidth=2) # Vertical | |
| axs[1, 1].plot(pca_der.components_[0], label='PC1') | |
| axs[1, 1].plot(pca_der.components_[1], label='PC2') | |
| axs[1, 1].axhline(0, color='gray', linestyle='--', linewidth=2) # Horizontal | |
| axs[1, 1].axvline(0, color='gray', linestyle='--', linewidth=2) # Vertical | |
| axs[2, 0].plot(range(1, 11), explained_var_raw, marker='o') | |
| axs[2, 1].plot(range(1, 11), explained_var_der, marker='o') | |
| axs[0, 0].legend(); axs[0, 1].legend() | |
| axs[1, 0].legend(); axs[1, 1].legend() | |
| axs[2, 0].set_ylim(0, 105) | |
| axs[2, 1].set_ylim(0, 105) | |
| axs[2, 0].set_title('Raw Data: Scree Plot') | |
| axs[2, 1].set_title('1st Derivative: Scree Plot') | |
| plt.tight_layout() | |
| plots.append(fig8) | |
| plt.close(fig8) | |
| return plots | |
| # Encode labels | |
| le = LabelEncoder() | |
| y = le.fit_transform(df['Label'].values) | |
| X = df.iloc[:, 1:].values | |
| scaler = StandardScaler() | |
| X_scaled = scaler.fit_transform(X) | |
| # === PCA reduction === | |
| pca = PCA(n_components=2) | |
| X_pca = pca.fit_transform(X_scaled) | |
| X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.2, random_state=42) | |
| # === Models === | |
| rf = RandomForestClassifier(n_estimators=100, random_state=42) | |
| rf.fit(X_train, y_train) | |
| dt = DecisionTreeClassifier(random_state=42) | |
| dt.fit(X_train, y_train) | |
| # === CNN === | |
| class MilkDataset(Dataset): | |
| def __init__(self, X, y): | |
| self.X = torch.tensor(X, dtype=torch.float32).unsqueeze(1) | |
| self.y = torch.tensor(y, dtype=torch.long) | |
| def __len__(self): return len(self.X) | |
| def __getitem__(self, idx): return self.X[idx], self.y[idx] | |
| X_train_raw, X_test_raw, y_train_raw, y_test_raw = train_test_split(X_scaled, y, test_size=0.2, random_state=42) | |
| train_loader = DataLoader(MilkDataset(X_train_raw, y_train_raw), batch_size=16, shuffle=True) | |
| test_loader = DataLoader(MilkDataset(X_test_raw, y_test_raw), batch_size=16) | |
| class CNN1D(nn.Module): | |
| def __init__(self): | |
| super().__init__() | |
| self.net = nn.Sequential( | |
| nn.Conv1d(1, 32, 3, padding=1), nn.ReLU(), | |
| nn.Conv1d(32, 64, 3, padding=1), nn.ReLU(), | |
| nn.AdaptiveAvgPool1d(1), | |
| nn.Flatten(), | |
| nn.Linear(64, len(np.unique(y))) | |
| ) | |
| def forward(self, x): return self.net(x) | |
| model = CNN1D() | |
| criterion = nn.CrossEntropyLoss() | |
| optimizer = optim.Adam(model.parameters(), lr=0.001) | |
| for epoch in range(10): | |
| model.train() | |
| for Xb, yb in train_loader: | |
| optimizer.zero_grad() | |
| loss = criterion(model(Xb), yb) | |
| loss.backward() | |
| optimizer.step() | |
| model.eval() | |
| with torch.no_grad(): | |
| X_test_tensor = torch.tensor(X_test_raw, dtype=torch.float32).unsqueeze(1) | |
| test_preds = model(X_test_tensor).argmax(dim=1) | |
| test_acc = (test_preds == torch.tensor(y_test_raw)).float().mean().item() | |
| X_train_tensor = torch.tensor(X_train_raw, dtype=torch.float32).unsqueeze(1) | |
| train_preds = model(X_train_tensor).argmax(dim=1) | |
| train_acc = (train_preds == torch.tensor(y_train_raw)).float().mean().item() | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# 🧪 SPECTROSCOPY - YOUR HEALTH OUR CONCERN!!!") | |
| with gr.Tabs(): | |
| with gr.Tab("Preview Raw Data"): | |
| gr.DataFrame(df.head(50), label="Preview of Raw Data") | |
| with gr.Tab("Visualizations"): | |
| plot_button = gr.Button("Generate Spectroscopy Visualizations") | |
| out_gallery = [gr.Plot() for _ in range(8)] | |
| plot_button.click(fn=plot_all, inputs=[], outputs=out_gallery) | |
| with gr.Tab("Models"): | |
| with gr.Tabs(): | |
| with gr.Tab("Random Forest"): | |
| gr.Image(value="rf.png", label="Random Forest Output") | |
| with gr.Tab("Decision Tree"): | |
| gr.Markdown("**Confusion Matrix**") | |
| gr.Image(value="tree_cm.png", label="Confusion Matrix") | |
| gr.Markdown("**Decision Tree Visualization**") | |
| gr.Image(value="tree.png", label="Tree Structure") | |
| with gr.Tab("1D CNN (Raw Data)"): | |
| gr.Image(value="1d.png", label="1D CNN Output") | |
| with gr.Tab("Takeaways"): | |
| gr.Markdown("## 🌿 Why Spectroscopy Matters in the Dairy Ecosystem") | |
| gr.Markdown("### 👨🌾 Farmers") | |
| gr.Markdown(""" | |
| - ✅ Enables **quick, non-destructive testing** of milk quality at the source. | |
| - ⚠️ Allows **early detection** of spoilage, contamination, or adulteration. | |
| - 💰 Supports **transparent and fair pricing** in cooperative and local markets. | |
| """) | |
| gr.Markdown("### 🏛️ Government & Regulators") | |
| gr.Markdown(""" | |
| - 🛡️ Reinforces **food safety and public health** monitoring systems. | |
| - 📊 Ensures **consistency and traceability** across the dairy supply chain. | |
| - 🚀 Encourages **innovation in agricultural technologies** and rural development. | |
| """) | |
| gr.Markdown("### 🏭 Businesses & Cooperatives") | |
| gr.Markdown(""" | |
| - ⏱️ Facilitates **real-time quality control** during production and logistics. | |
| - 💡 Reduces dependency on slow, expensive lab tests. | |
| - 🤝 Builds **consumer trust** through transparency and quality assurance. | |
| """) | |
| gr.Markdown("---") | |
| gr.Markdown("## 🧬 Parting Thought: Healthy Living Starts with Smart Choices") | |
| gr.Markdown(""" | |
| > “Milk is nature’s first food – and spectroscopy helps us keep it honest, pure, and nutritious.” | |
| > | |
| > Embrace technology. Protect health. | |
| > Let's make every drop of milk safe and reliable – for everyone. | |
| """) | |
| demo.launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False) | |