Spaces:

EzekielMW
/

Spectroscopy

Sleeping

App Files Files Community

EzekielMW commited on Jul 22, 2025

Commit

5cb4d7a

verified ·

1 Parent(s): 40ff2e9

Update app.py

Browse files

Files changed (1) hide show

app.py +74 -74

app.py CHANGED Viewed

@@ -231,23 +231,27 @@ def plot_all():
     return plots
-# ---------- Prepare Data for Modeling ----------
 X = df.iloc[:, 1:].values
 scaler = StandardScaler()
 X_scaled = scaler.fit_transform(X)
 pca = PCA(n_components=2)
 X_pca = pca.fit_transform(X_scaled)
 X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.2, random_state=42)
-# ---------- Train Random Forest ----------
 rf = RandomForestClassifier(n_estimators=100, random_state=42)
 rf.fit(X_train, y_train)
-# ---------- Train Decision Tree ----------
 dt = DecisionTreeClassifier(random_state=42)
 dt.fit(X_train, y_train)
-# ---------- CNN on Raw Data ----------
 class MilkDataset(Dataset):
     def __init__(self, X, y):
         self.X = torch.tensor(X, dtype=torch.float32).unsqueeze(1)
@@ -255,12 +259,9 @@ class MilkDataset(Dataset):
     def __len__(self): return len(self.X)
     def __getitem__(self, idx): return self.X[idx], self.y[idx]
-X_raw_scaled = scaler.fit_transform(X)
-X_train_raw, X_test_raw, y_train_raw, y_test_raw = train_test_split(X_raw_scaled, y, test_size=0.2, random_state=42)
-train_dataset = MilkDataset(X_train_raw, y_train_raw)
-test_dataset = MilkDataset(X_test_raw, y_test_raw)
-train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
-test_loader = DataLoader(test_dataset, batch_size=16)
 class CNN1D(nn.Module):
     def __init__(self):
@@ -278,119 +279,118 @@ model = CNN1D()
 criterion = nn.CrossEntropyLoss()
 optimizer = optim.Adam(model.parameters(), lr=0.001)
-train_acc_list, test_acc_list = [], []
-for epoch in range(1, 11):
     model.train()
     for Xb, yb in train_loader:
         optimizer.zero_grad()
         loss = criterion(model(Xb), yb)
         loss.backward()
         optimizer.step()
-    model.eval()
-    with torch.no_grad():
-        train_preds = torch.argmax(model(torch.cat([X for X, _ in train_loader], 0)), dim=1)
-        test_preds = torch.argmax(model(torch.cat([X for X, _ in test_loader], 0)), dim=1)
-        y_train_all = torch.cat([y for _, y in train_loader])
-        y_test_all = torch.cat([y for _, y in test_loader])
-        train_acc = (train_preds == y_train_all).float().mean().item()
-        test_acc = (test_preds == y_test_all).float().mean().item()
-        train_acc_list.append(train_acc)
-        test_acc_list.append(test_acc)
-# ---------- Gradio Interface ----------
 with gr.Blocks() as demo:
-    gr.Markdown("# 🧪 Dataset Description")
     with gr.Tabs():
         with gr.Tab("Preview Raw Data"):
-            gr.DataFrame(df.head(50), label="Preview of Raw Data")
         with gr.Tab("Visualizations"):
             plot_button = gr.Button("Generate Spectroscopy Visualizations")
-            out_gallery = [gr.Plot() for _ in range(8)]
-            plot_button.click(fn=plot_all, inputs=[], outputs=out_gallery)
         with gr.Tab("Models"):
             with gr.Tabs():
                 with gr.Tab("Random Forest"):
-                    gr.Markdown(f"""Train Accuracy: {accuracy_score(y_train, rf.predict(X_train)):.2f}  \
-                                 Test Accuracy: {accuracy_score(y_test, rf.predict(X_test)):.2f}""")
                     fig_rf = plt.figure()
                     sns.heatmap(confusion_matrix(y_test, rf.predict(X_test)), annot=True, fmt='d')
                     plt.title("Random Forest Confusion Matrix")
                     gr.Plot(fig_rf)
                 with gr.Tab("Decision Tree"):
-                    gr.Markdown(f"""Train Accuracy: {accuracy_score(y_train, dt.predict(X_train)):.2f}  \
-                                 Test Accuracy: {accuracy_score(y_test, dt.predict(X_test)):.2f}""")
                     fig_dt = plt.figure()
                     sns.heatmap(confusion_matrix(y_test, dt.predict(X_test)), annot=True, fmt='d')
                     plt.title("Decision Tree Confusion Matrix")
                     gr.Plot(fig_dt)
-                with gr.Tab("1D CNN (Raw Data)"):
-                    gr.Markdown(f"""Train Accuracy: {train_acc:.2f}  \
-                                 Test Accuracy: {test_acc:.2f}""")
                     fig_cnn = plt.figure()
-                    sns.heatmap(confusion_matrix(y_test_all, test_preds), annot=True, fmt='d')
                     plt.title("1D CNN Confusion Matrix")
                     gr.Plot(fig_cnn)
         with gr.Tab("Prediction"):
-            model_dropdown = gr.Dropdown(choices=['Random Forest', 'Decision Tree', '1D CNN'], label="Choose Model")
-            input_file = gr.File(label="Upload CSV File (Same Format as Original Data)")
-            output_df = gr.DataFrame(label="Predicted Labels")
-            def predict(file, model_name):
-                test_df = pd.read_csv(file.name)
-                if 'Label' in test_df.columns:
-                    test_df = test_df.drop(columns=['Label'])
-                X_input = test_df.values
-                if model_name == '1D CNN':
-                    X_scaled = scaler.transform(X_input)
-                    X_tensor = torch.tensor(X_scaled, dtype=torch.float32).unsqueeze(1)
                     with torch.no_grad():
-                        preds = torch.argmax(model(X_tensor), dim=1).numpy()
-                        preds = le.inverse_transform(preds)
                 else:
-                    X_pca_input = pca.transform(scaler.transform(X_input))
-                    preds = rf.predict(X_pca_input) if model_name == 'Random Forest' else dt.predict(X_pca_input)
-                    preds = le.inverse_transform(preds)
-                test_df['Predicted Label'] = preds
-                return test_df
             predict_btn = gr.Button("Predict")
-            predict_btn.click(fn=predict, inputs=[input_file, model_dropdown], outputs=[output_df])
         with gr.Tab("Takeaways"):
             gr.Markdown("## 🌾 Spectroscopy: Transforming the Dairy Sector")
             gr.Markdown("""
 ### 👨‍🌾 Farmers
-- Enables quick and non-destructive testing of milk quality.
-- Helps identify adulteration or spoilage early.
-- Boosts credibility and fair pricing in local and export markets.
 ### 🏧 Government
-- Supports enforcement of food safety and regulatory standards.
-- Aids in surveillance of quality at collection centers and cooperatives.
-- Encourages investment in agri-tech and rural innovation.
 ### 🏢 Businesses & Cooperatives
-- Enhances supply chain quality control.
-- Reduces reliance on expensive lab-based testing.
-- Increases transparency and trust with consumers.
-### 🧠 Why Spectroscopy?
-- Non-invasive, fast, and cost-effective.
-- Adaptable for large-scale or smallholder use.
-- Unlocks new value in digitizing dairy analytics.
 ---
-### 💡 Parting Shot: Health Starts With What You Consume
 > “Milk is nature’s first food – and it should remain pure. Spectroscopy empowers us to ensure it stays that way.”
 Stay curious. Stay healthy.
 """)
-# Run app
-demo.launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False)

     return plots
+# Encode labels
+le = LabelEncoder()
+y = le.fit_transform(df['Label'].values)
 X = df.iloc[:, 1:].values
 scaler = StandardScaler()
 X_scaled = scaler.fit_transform(X)
+# === PCA reduction ===
 pca = PCA(n_components=2)
 X_pca = pca.fit_transform(X_scaled)
 X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.2, random_state=42)
+# === Models ===
 rf = RandomForestClassifier(n_estimators=100, random_state=42)
 rf.fit(X_train, y_train)
 dt = DecisionTreeClassifier(random_state=42)
 dt.fit(X_train, y_train)
+# === CNN ===
 class MilkDataset(Dataset):
     def __init__(self, X, y):
         self.X = torch.tensor(X, dtype=torch.float32).unsqueeze(1)
     def __len__(self): return len(self.X)
     def __getitem__(self, idx): return self.X[idx], self.y[idx]
+X_train_raw, X_test_raw, y_train_raw, y_test_raw = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
+train_loader = DataLoader(MilkDataset(X_train_raw, y_train_raw), batch_size=16, shuffle=True)
+test_loader = DataLoader(MilkDataset(X_test_raw, y_test_raw), batch_size=16)
 class CNN1D(nn.Module):
     def __init__(self):
 criterion = nn.CrossEntropyLoss()
 optimizer = optim.Adam(model.parameters(), lr=0.001)
+for epoch in range(10):
     model.train()
     for Xb, yb in train_loader:
         optimizer.zero_grad()
         loss = criterion(model(Xb), yb)
         loss.backward()
         optimizer.step()
+model.eval()
+with torch.no_grad():
+    X_test_tensor = torch.tensor(X_test_raw, dtype=torch.float32).unsqueeze(1)
+    test_preds = model(X_test_tensor).argmax(dim=1)
+    test_acc = (test_preds == torch.tensor(y_test_raw)).float().mean().item()
+    X_train_tensor = torch.tensor(X_train_raw, dtype=torch.float32).unsqueeze(1)
+    train_preds = model(X_train_tensor).argmax(dim=1)
+    train_acc = (train_preds == torch.tensor(y_train_raw)).float().mean().item()
+# === Gradio App ===
 with gr.Blocks() as demo:
+    gr.Markdown("# 🥛 NIR Milk Spectroscopy Analysis App")
     with gr.Tabs():
         with gr.Tab("Preview Raw Data"):
+            gr.DataFrame(df.head(50), label="Milk Spectra")
         with gr.Tab("Visualizations"):
+            def plot_all():
+                plots = []
+                for i in range(8):
+                    fig, ax = plt.subplots()
+                    ax.plot(X[i])
+                    ax.set_title(f"Spectrum {i+1}")
+                    plots.append(fig)
+                return plots
             plot_button = gr.Button("Generate Spectroscopy Visualizations")
+            output_plots = [gr.Plot() for _ in range(8)]
+            plot_button.click(fn=plot_all, inputs=[], outputs=output_plots)
         with gr.Tab("Models"):
             with gr.Tabs():
                 with gr.Tab("Random Forest"):
+                    gr.Markdown(f"✅ Train Accuracy: **{accuracy_score(y_train, rf.predict(X_train)):.2f}**<br>🎯 Test Accuracy: **{accuracy_score(y_test, rf.predict(X_test)):.2f}**")
                     fig_rf = plt.figure()
                     sns.heatmap(confusion_matrix(y_test, rf.predict(X_test)), annot=True, fmt='d')
                     plt.title("Random Forest Confusion Matrix")
                     gr.Plot(fig_rf)
                 with gr.Tab("Decision Tree"):
+                    gr.Markdown(f"✅ Train Accuracy: **{accuracy_score(y_train, dt.predict(X_train)):.2f}**<br>🎯 Test Accuracy: **{accuracy_score(y_test, dt.predict(X_test)):.2f}**")
                     fig_dt = plt.figure()
                     sns.heatmap(confusion_matrix(y_test, dt.predict(X_test)), annot=True, fmt='d')
                     plt.title("Decision Tree Confusion Matrix")
                     gr.Plot(fig_dt)
+                with gr.Tab("1D CNN"):
+                    gr.Markdown(f"✅ Train Accuracy: **{train_acc:.2f}**<br>🎯 Test Accuracy: **{test_acc:.2f}**")
                     fig_cnn = plt.figure()
+                    sns.heatmap(confusion_matrix(y_test_raw, test_preds), annot=True, fmt='d')
                     plt.title("1D CNN Confusion Matrix")
                     gr.Plot(fig_cnn)
         with gr.Tab("Prediction"):
+            model_choice = gr.Dropdown(['Random Forest', 'Decision Tree', '1D CNN'], label="Choose Model")
+            input_file = gr.File(label="Upload CSV (same format)")
+            output_table = gr.DataFrame(label="Predictions")
+            def predict(file, model_choice):
+                df_new = pd.read_csv(file.name)
+                if 'Label' in df_new.columns:
+                    df_new = df_new.drop(columns=['Label'])
+                X_input = df_new.values
+                if model_choice == "1D CNN":
+                    X_input_scaled = scaler.transform(X_input)
+                    tensor_input = torch.tensor(X_input_scaled, dtype=torch.float32).unsqueeze(1)
                     with torch.no_grad():
+                        preds = model(tensor_input).argmax(dim=1).numpy()
                 else:
+                    X_input_pca = pca.transform(scaler.transform(X_input))
+                    preds = rf.predict(X_input_pca) if model_choice == "Random Forest" else dt.predict(X_input_pca)
+                df_new['Predicted Label'] = le.inverse_transform(preds)
+                return df_new
             predict_btn = gr.Button("Predict")
+            predict_btn.click(predict, inputs=[input_file, model_choice], outputs=[output_table])
         with gr.Tab("Takeaways"):
             gr.Markdown("## 🌾 Spectroscopy: Transforming the Dairy Sector")
             gr.Markdown("""
 ### 👨‍🌾 Farmers
+- Quick, non-destructive testing of milk quality.
+- Early detection of spoilage or adulteration.
+- Enables fairer pricing in cooperative and market setups.
 ### 🏧 Government
+- Strengthens food safety monitoring.
+- Ensures consistent quality across the supply chain.
+- Fosters innovation in rural/agricultural tech.
 ### 🏢 Businesses & Cooperatives
+- Real-time quality control in logistics.
+- Cost-effective compared to traditional labs.
+- Enhances trust through transparency.
 ---
+### 💡 Final Note on Healthy Living
 > “Milk is nature’s first food – and it should remain pure. Spectroscopy empowers us to ensure it stays that way.”
 Stay curious. Stay healthy.
 """)
+# === Run the app ===
+demo.launch(server_name="0.0.0.0", server_port=7860)