File size: 14,661 Bytes
a6c3fc8
 
 
bcca7d7
 
50fbe35
bcca7d7
9e8b97d
 
2772470
a6c3fc8
9e8b97d
50fbe35
 
9e8b97d
a6c3fc8
 
 
 
 
89e4eb2
 
50fbe35
bc4f170
50fbe35
 
 
 
 
0c8fcfa
 
 
a6c3fc8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bc4f170
a6c3fc8
 
 
 
 
 
 
bc4f170
a6c3fc8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5cb4d7a
 
 
 
a6c3fc8
 
 
5cb4d7a
 
a6c3fc8
 
 
 
5cb4d7a
a6c3fc8
 
 
 
 
 
5cb4d7a
a6c3fc8
 
 
 
 
 
 
5cb4d7a
 
 
a6c3fc8
 
 
 
 
 
 
 
 
75975c8
a6c3fc8
 
75975c8
a6c3fc8
 
 
 
5cb4d7a
a6c3fc8
 
 
 
 
 
5cb4d7a
 
 
 
 
 
 
 
 
 
 
50fbe35
1c0c1dd
5cb4d7a
78ee49c
a6c3fc8
3c7ea85
9e8b97d
78ee49c
 
3c7ea85
 
40ff2e9
7eff709
 
 
 
 
 
 
 
 
 
 
 
 
 
a01d3d8
7eff709
 
 
 
 
 
 
 
 
 
a01d3d8
7eff709
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3c7ea85
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
# ✅ FULL INTEGRATED SCRIPT
# Includes your existing visualizations + new Models and Prediction tabs

import gradio as gr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import StandardScaler, MinMaxScaler,LabelEncoder
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score, confusion_matrix
from scipy.signal import savgol_filter
from math import pi
from matplotlib.cm import get_cmap
import seaborn as sns
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split


plt.switch_backend('agg')

# Load dataset
df = pd.read_csv("milk_absorbance.csv")
df.rename(columns={df.columns[0]: 'Label'}, inplace=True)

# Label encoding
le = LabelEncoder()
y = le.fit_transform(df['Label'].values)
# ---------- Plotting Function (Unchanged) ----------
def plot_all():
    plots = []

    # Plot 1: Mean Spectra per Class
    fig1 = plt.figure(figsize=(12, 6))
    for label in df['Label'].unique():
        class_df = df[df['Label'] == label]
        mean_spectrum = class_df.iloc[:, 1:].mean()
        plt.plot(mean_spectrum.index.astype(int), mean_spectrum, label=f'Label {label}')
    plt.title('Mean NIR Spectrum per Milk Ratio Class')
    plt.xlabel('Wavelength (nm)')
    plt.ylabel('Absorbance')
    plt.legend(title='Class (Milk Ratio)')
    plt.grid(True)
    plt.tight_layout()
    plots.append(fig1)
    plt.close(fig1)

    # Plot 2: Offset Mean Spectra
    fig2 = plt.figure(figsize=(12, 6))
    offset_step = 0.1
    for i, label in enumerate(df['Label'].unique()):
        class_df = df[df['Label'] == label]
        mean_spectrum = class_df.iloc[:, 1:].mean()
        offset = i * offset_step
        plt.plot(mean_spectrum.index.astype(int), mean_spectrum + offset, label=f'Label {label}')
    plt.title('Mean NIR Spectrum per Milk Ratio Class (with Offset)')
    plt.xlabel('Wavelength (nm)')
    plt.ylabel('Absorbance (Offset Applied)')
    plt.legend(title='Class (Milk Ratio)')
    plt.grid(True)
    plt.tight_layout()
    plots.append(fig2)
    plt.close(fig2)

    # Plot 3: Radar Plot
    fig3 = plt.figure(figsize=(8, 8))
    ax = plt.subplot(111, polar=True)
    subset_cols = df.columns[1:][::20]
    labels = df['Label'].unique()
    N = len(subset_cols)
    angles = [n / float(N) * 2 * pi for n in range(N)] + [0]
    for label in labels:
        class_df = df[df['Label'] == label]
        mean_spectrum = class_df[subset_cols].mean().values
        values = mean_spectrum.tolist() + [mean_spectrum[0]]
        ax.plot(angles, values, label=f'Label {label}')
        ax.fill(angles, values, alpha=0.1)
    ax.set_xticks(angles[:-1])
    ax.set_xticklabels(subset_cols.astype(int))
    plt.title('Radar Plot of Mean Spectra (Subset Wavelengths)')
    plt.legend(loc='upper right', bbox_to_anchor=(1.3, 1.1))
    plt.tight_layout()
    plots.append(fig3)
    plt.close(fig3)

    # Plot 4: Cumulative PCA Explained Variance
    fig4 = plt.figure(figsize=(8, 5))
    X = df.iloc[:, 1:].values
    X_scaled = StandardScaler().fit_transform(X)
    pca = PCA(n_components=20)
    pca.fit(X_scaled)
    explained = np.cumsum(pca.explained_variance_ratio_)
    plt.plot(range(1, 21), explained, marker='o')
    plt.axhline(y=0.95, color='r', linestyle='--', label='95% Variance')
    plt.title('Cumulative Explained Variance by PCA')
    plt.xlabel('Number of Principal Components')
    plt.ylabel('Cumulative Variance')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plots.append(fig4)
    plt.close(fig4)

    # Plot 5: Derivative + Normalized Spectra
    fig5 = plt.figure(figsize=(16, 8))
    y_vals = df['Label'].values
    wavelengths = df.columns[1:].astype(float)
    X = df.iloc[:, 1:].values
    X_deriv = savgol_filter(X, window_length=25, polyorder=5, deriv=1, axis=1)
    scaler = MinMaxScaler()
    X_deriv_norm = np.array([scaler.fit_transform(row.reshape(-1, 1)).flatten() for row in X_deriv])
    unique_labels = np.unique(y_vals)
    colors = get_cmap('tab10')(np.linspace(0, 1, len(unique_labels)))
    for label, color in zip(unique_labels, colors):
        indices = np.where(y_vals == label)[0]
        for i in indices:
            plt.plot(wavelengths, X_deriv_norm[i], color=color, alpha=0.3, label=f'Milk {label}' if i == indices[0] else '')
    plt.title("All Spectra After First Derivative + Normalization")
    plt.xlabel("Wavelength (nm)")
    plt.ylabel("Normalized First Derivative")
    plt.legend(title="Group")
    plt.grid(True)
    plt.tight_layout()
    plots.append(fig5)
    plt.close(fig5)

    # Plot 6: Derivative Only (No Norm)
    fig6 = plt.figure(figsize=(16, 8))
    for label, color in zip(unique_labels, colors):
        indices = np.where(y_vals == label)[0]
        for i in indices:
            plt.plot(wavelengths, X_deriv[i], color=color, alpha=0.3, label=f'Milk {label}' if i == indices[0] else '')
    plt.title("All Spectra After First Derivative (No Normalization)")
    plt.xlabel("Wavelength (nm)")
    plt.ylabel("First Derivative Absorbance")
    plt.legend(title="Group")
    plt.grid(True)
    plt.tight_layout()
    plots.append(fig6)
    plt.close(fig6)

    # Plot 7: Score + Loadings
    fig7, axs = plt.subplots(1, 2, figsize=(14, 5))
    wavelength_columns = df.columns[1:]
    labels = df.iloc[:, 0]
    data = df.iloc[:, 1:].values.astype(float)
    derivative_data = np.diff(data, axis=1)
    scaler = StandardScaler()
    normalized_derivative_data = scaler.fit_transform(derivative_data)
    derivative_wavelength_columns = [f'Der_{w1}-{w2}' for w1, w2 in zip(wavelength_columns[:-1], wavelength_columns[1:])]
    processed_df = pd.DataFrame(normalized_derivative_data, columns=derivative_wavelength_columns)
    processed_df.insert(0, 'Label', labels)
    processed_df['Label'] = processed_df['Label'].astype(int)
    X_processed = processed_df.drop('Label', axis=1)
    y_processed = processed_df['Label']
    pca = PCA(n_components=2)
    principal_components = pca.fit_transform(X_processed)
    pca_df = pd.DataFrame(data=principal_components, columns=['PC1', 'PC2'])
    pca_df['Label'] = y_processed.reset_index(drop=True)
    targets = y_processed.unique()
    cmap = get_cmap('tab10')
    for i, target in enumerate(targets):
        idx = pca_df['Label'] == target
        axs[0].scatter(pca_df.loc[idx, 'PC1'], pca_df.loc[idx, 'PC2'], color=cmap(i % cmap.N), label=f'Label {target}')
    axs[0].set_title('Score Plot: PC1 vs. PC2')
    axs[0].legend()
    axs[0].grid()
    loadings = pca.components_.T
    axs[1].plot(loadings[:, 0], label='PC1 Loadings')
    axs[1].plot(loadings[:, 1], label='PC2 Loadings', color='black')
    axs[1].set_title('Loadings Plot')
    axs[1].legend()
    axs[1].grid()
    plt.tight_layout()
    plots.append(fig7)
    plt.close(fig7)

    # Plot 8: 3x2 PCA Summary
    fig8, axs = plt.subplots(3, 2, figsize=(16, 14))
    raw_data = df.iloc[:, 1:].values.astype(float)
    derivative_data = np.diff(raw_data, axis=1)
    scaler = StandardScaler()
    raw_scaled = scaler.fit_transform(raw_data)
    derivative_scaled = scaler.fit_transform(derivative_data)
    pca_raw = PCA(n_components=10)
    pca_raw_scores = pca_raw.fit_transform(raw_scaled)
    explained_var_raw = np.cumsum(pca_raw.explained_variance_ratio_) * 100
    pca_der = PCA(n_components=10)
    pca_der_scores = pca_der.fit_transform(derivative_scaled)
    explained_var_der = np.cumsum(pca_der.explained_variance_ratio_) * 100
    targets = np.unique(labels)
    cmap = get_cmap('tab10')
    for i, target in enumerate(targets):
        idx = labels == target
        axs[0, 0].scatter(pca_raw_scores[idx, 0], pca_raw_scores[idx, 1], s=40, label=f'Milk {target}', color=cmap(i % cmap.N))
        axs[0, 0].axhline(0, color='gray', linestyle='--', linewidth=2)  # Horizontal
        axs[0, 0].axvline(0, color='gray', linestyle='--', linewidth=2)  # Vertical
        axs[0, 1].scatter(pca_der_scores[idx, 0], pca_der_scores[idx, 1], s=40, label=f'Milk {target}', color=cmap(i % cmap.N))
        axs[0, 1].axhline(0, color='gray', linestyle='--', linewidth=2)  # Horizontal
        axs[0, 1].axvline(0, color='gray', linestyle='--', linewidth=2)  # Vertical
        axs[0, 0].set_title('Raw Data: PCA Score Plot')
        axs[0, 1].set_title('1st Derivative: PCA Score Plot')
    

    # Row 2: PCA Loadings for Raw and Derivative (with horizontal and vertical lines at 0)
    axs[1, 0].plot(pca_raw.components_[0], label='PC1')
    axs[1, 0].plot(pca_raw.components_[1], label='PC2')
    axs[1, 0].axhline(0, color='gray', linestyle='--', linewidth=2)  # Horizontal
    axs[1, 0].axvline(0, color='gray', linestyle='--', linewidth=2)  # Vertical
    
    axs[1, 1].plot(pca_der.components_[0], label='PC1')
    axs[1, 1].plot(pca_der.components_[1], label='PC2')
    axs[1, 1].axhline(0, color='gray', linestyle='--', linewidth=2)  # Horizontal
    axs[1, 1].axvline(0, color='gray', linestyle='--', linewidth=2)  # Vertical

    axs[2, 0].plot(range(1, 11), explained_var_raw, marker='o')
    axs[2, 1].plot(range(1, 11), explained_var_der, marker='o')
    axs[0, 0].legend(); axs[0, 1].legend()
    axs[1, 0].legend(); axs[1, 1].legend()
    axs[2, 0].set_ylim(0, 105)
    axs[2, 1].set_ylim(0, 105)
    axs[2, 0].set_title('Raw Data: Scree Plot')
    axs[2, 1].set_title('1st Derivative: Scree Plot')
    plt.tight_layout()
    plots.append(fig8)
    plt.close(fig8)

    return plots


# Encode labels
le = LabelEncoder()
y = le.fit_transform(df['Label'].values)
X = df.iloc[:, 1:].values
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# === PCA reduction ===
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)
X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.2, random_state=42)

# === Models ===
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train, y_train)

# === CNN ===
class MilkDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32).unsqueeze(1)
        self.y = torch.tensor(y, dtype=torch.long)
    def __len__(self): return len(self.X)
    def __getitem__(self, idx): return self.X[idx], self.y[idx]

X_train_raw, X_test_raw, y_train_raw, y_test_raw = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
train_loader = DataLoader(MilkDataset(X_train_raw, y_train_raw), batch_size=16, shuffle=True)
test_loader = DataLoader(MilkDataset(X_test_raw, y_test_raw), batch_size=16)

class CNN1D(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv1d(1, 32, 3, padding=1), nn.ReLU(),
            nn.Conv1d(32, 64, 3, padding=1), nn.ReLU(),
            nn.AdaptiveAvgPool1d(1),
            nn.Flatten(),
            nn.Linear(64, len(np.unique(y)))
        )
    def forward(self, x): return self.net(x)

model = CNN1D()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(10):
    model.train()
    for Xb, yb in train_loader:
        optimizer.zero_grad()
        loss = criterion(model(Xb), yb)
        loss.backward()
        optimizer.step()

model.eval()
with torch.no_grad():
    X_test_tensor = torch.tensor(X_test_raw, dtype=torch.float32).unsqueeze(1)
    test_preds = model(X_test_tensor).argmax(dim=1)
    test_acc = (test_preds == torch.tensor(y_test_raw)).float().mean().item()

    X_train_tensor = torch.tensor(X_train_raw, dtype=torch.float32).unsqueeze(1)
    train_preds = model(X_train_tensor).argmax(dim=1)
    train_acc = (train_preds == torch.tensor(y_train_raw)).float().mean().item()

with gr.Blocks() as demo:
    gr.Markdown("# 🧪 SPECTROSCOPY - YOUR HEALTH OUR CONCERN!!!")

    with gr.Tabs():
        with gr.Tab("Preview Raw Data"):
            gr.DataFrame(df.head(50), label="Preview of Raw Data")

        with gr.Tab("Visualizations"):
            plot_button = gr.Button("Generate Spectroscopy Visualizations")
            out_gallery = [gr.Plot() for _ in range(8)]
            plot_button.click(fn=plot_all, inputs=[], outputs=out_gallery)

        with gr.Tab("Models"):
            with gr.Tabs():
                with gr.Tab("Random Forest"):
                    gr.Image(value="rf.png", label="Random Forest Output")

                with gr.Tab("Decision Tree"):
                    gr.Markdown("**Confusion Matrix**")
                    gr.Image(value="tree_cm.png", label="Confusion Matrix")
                    gr.Markdown("**Decision Tree Visualization**")
                    gr.Image(value="tree.png", label="Tree Structure")

                with gr.Tab("1D CNN (Raw Data)"):
                    gr.Image(value="1d.png", label="1D CNN Output")

        with gr.Tab("Takeaways"):
            gr.Markdown("## 🌿 Why Spectroscopy Matters in the Dairy Ecosystem")

            gr.Markdown("### 👨‍🌾 Farmers")
            gr.Markdown("""
        - ✅ Enables **quick, non-destructive testing** of milk quality at the source.
        - ⚠️ Allows **early detection** of spoilage, contamination, or adulteration.
        - 💰 Supports **transparent and fair pricing** in cooperative and local markets.
            """)
        
            gr.Markdown("### 🏛️ Government & Regulators")
            gr.Markdown("""
        - 🛡️ Reinforces **food safety and public health** monitoring systems.
        - 📊 Ensures **consistency and traceability** across the dairy supply chain.
        - 🚀 Encourages **innovation in agricultural technologies** and rural development.
            """)
        
            gr.Markdown("### 🏭 Businesses & Cooperatives")
            gr.Markdown("""
        - ⏱️ Facilitates **real-time quality control** during production and logistics.
        - 💡 Reduces dependency on slow, expensive lab tests.
        - 🤝 Builds **consumer trust** through transparency and quality assurance.
            """)
        
            gr.Markdown("---")
            gr.Markdown("## 🧬 Parting Thought: Healthy Living Starts with Smart Choices")
            gr.Markdown("""
        > “Milk is nature’s first food – and spectroscopy helps us keep it honest, pure, and nutritious.”  
        >  
        > Embrace technology. Protect health.  
        > Let's make every drop of milk safe and reliable – for everyone.
            """)

demo.launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False)