Spaces:

EzekielMW
/

Spectroscopy

Sleeping

App Files Files Community

EzekielMW commited on Jul 22, 2025

Commit

a6c3fc8

verified ·

1 Parent(s): 72e728b

Update app.py

Browse files

Files changed (1) hide show

app.py +318 -143

app.py CHANGED Viewed

@@ -1,25 +1,23 @@
 import gradio as gr
 import pandas as pd
 import numpy as np
 import matplotlib.pyplot as plt
-from sklearn.preprocessing import StandardScaler, MinMaxScaler
-from sklearn.decomposition import PCA
-from sklearn.model_selection import train_test_split
 from sklearn.ensemble import RandomForestClassifier
 from sklearn.tree import DecisionTreeClassifier
 from sklearn.metrics import accuracy_score, confusion_matrix
 from scipy.signal import savgol_filter
-from tensorflow.keras.models import Sequential
-from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout
-from tensorflow.keras.utils import to_categorical
-from tensorflow.keras.callbacks import History
-import seaborn as sns
-import io
-import os
 from math import pi
 from matplotlib.cm import get_cmap
-import warnings
-warnings.filterwarnings("ignore")
 plt.switch_backend('agg')
@@ -27,102 +25,280 @@ plt.switch_backend('agg')
 df = pd.read_csv("milk_absorbance.csv")
 df.rename(columns={df.columns[0]: 'Label'}, inplace=True)
-# ===================== Helper Functions =========================
-def compute_pca_data(df):
     scaler = StandardScaler()
-    features = df.iloc[:, 1:].values.astype(float)
-    features_scaled = scaler.fit_transform(features)
     pca = PCA(n_components=2)
-    pca_data = pca.fit_transform(features_scaled)
-    return pca_data, df['Label'].values
-def train_model_on_pca(model_name):
-    X, y = compute_pca_data(df)
-    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y)
-    if model_name == "Random Forest":
-        model = RandomForestClassifier(n_estimators=100)
-    elif model_name == "Decision Tree":
-        model = DecisionTreeClassifier()
-    train_accuracies, test_accuracies = [], []
-    for epoch in range(1, 11):
-        model.fit(X_train, y_train)
-        train_acc = accuracy_score(y_train, model.predict(X_train))
-        test_acc = accuracy_score(y_test, model.predict(X_test))
-        train_accuracies.append(train_acc)
-        test_accuracies.append(test_acc)
-    cm = confusion_matrix(y_test, model.predict(X_test))
-    return train_accuracies, test_accuracies, cm
-def train_1d_cnn():
-    X = df.iloc[:, 1:].values.astype(float)
-    y = df['Label'].astype(int).values
-    X = X[:, :, np.newaxis]  # Shape for Conv1D
-    y_cat = to_categorical(y)
-    X_train, X_test, y_train, y_test = train_test_split(X, y_cat, test_size=0.2, stratify=y)
-    model = Sequential([
-        Conv1D(32, kernel_size=5, activation='relu', input_shape=(X.shape[1], 1)),
-        MaxPooling1D(pool_size=2),
-        Flatten(),
-        Dense(64, activation='relu'),
-        Dropout(0.3),
-        Dense(y_cat.shape[1], activation='softmax')
-    ])
-    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
-    history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, verbose=0)
-    cm = confusion_matrix(np.argmax(y_test, axis=1), np.argmax(model.predict(X_test), axis=1))
-    return history.history['accuracy'], history.history['val_accuracy'], cm
-def create_plot(train_acc, test_acc):
-    fig, ax = plt.subplots()
-    ax.plot(range(1, 11), train_acc, label="Train Accuracy")
-    ax.plot(range(1, 11), test_acc, label="Test Accuracy")
-    ax.set_xlabel("Epoch")
-    ax.set_ylabel("Accuracy")
-    ax.set_title("Train vs Test Accuracy")
-    ax.legend()
-    return fig
-def plot_confusion_matrix(cm):
-    fig, ax = plt.subplots()
-    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", ax=ax)
-    ax.set_xlabel("Predicted")
-    ax.set_ylabel("True")
-    ax.set_title("Confusion Matrix")
-    return fig
-def predict_model(input_df, model_name):
-    if model_name in ["Random Forest", "Decision Tree"]:
-        X, y = compute_pca_data(df)
-        if model_name == "Random Forest":
-            model = RandomForestClassifier(n_estimators=100)
-        else:
-            model = DecisionTreeClassifier()
-        model.fit(X, y)
-        input_pca, _ = compute_pca_data(input_df)
-        return model.predict(input_pca)
-    elif model_name == "1D CNN":
-        X = df.iloc[:, 1:].values.astype(float)
-        y = df['Label'].astype(int).values
-        X = X[:, :, np.newaxis]
-        y_cat = to_categorical(y)
-        model = Sequential([
-            Conv1D(32, kernel_size=5, activation='relu', input_shape=(X.shape[1], 1)),
-            MaxPooling1D(pool_size=2),
-            Flatten(),
-            Dense(64, activation='relu'),
-            Dropout(0.3),
-            Dense(y_cat.shape[1], activation='softmax')
-        ])
-        model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
-        model.fit(X, y_cat, epochs=10, verbose=0)
-        input_data = input_df.iloc[:, 1:].values.astype(float)[:, :, np.newaxis]
-        return np.argmax(model.predict(input_data), axis=1)
-# ===================== Gradio UI =========================
 with gr.Blocks() as demo:
-    gr.Markdown("# 🧪 Milk Spectroscopy Analysis App")
     with gr.Tabs():
-        with gr.Tab("Dataset Description"):
             gr.DataFrame(df.head(50), label="Preview of Raw Data")
         with gr.Tab("Visualizations"):
@@ -133,53 +309,52 @@ with gr.Blocks() as demo:
         with gr.Tab("Models"):
             with gr.Tabs():
                 with gr.Tab("Random Forest"):
-                    rf_btn = gr.Button("Train Random Forest")
-                    rf_table = gr.Dataframe(headers=["Epoch", "Train Acc", "Test Acc"])
-                    rf_plot = gr.Plot()
-                    rf_cm = gr.Plot()
-                    def run_rf():
-                        train_acc, test_acc, cm = train_model_on_pca("Random Forest")
-                        table = pd.DataFrame({"Epoch": list(range(1, 11)), "Train Acc": train_acc, "Test Acc": test_acc})
-                        return table, create_plot(train_acc, test_acc), plot_confusion_matrix(cm)
-                    rf_btn.click(fn=run_rf, inputs=[], outputs=[rf_table, rf_plot, rf_cm])
                 with gr.Tab("Decision Tree"):
-                    dt_btn = gr.Button("Train Decision Tree")
-                    dt_table = gr.Dataframe(headers=["Epoch", "Train Acc", "Test Acc"])
-                    dt_plot = gr.Plot()
-                    dt_cm = gr.Plot()
-                    def run_dt():
-                        train_acc, test_acc, cm = train_model_on_pca("Decision Tree")
-                        table = pd.DataFrame({"Epoch": list(range(1, 11)), "Train Acc": train_acc, "Test Acc": test_acc})
-                        return table, create_plot(train_acc, test_acc), plot_confusion_matrix(cm)
-                    dt_btn.click(fn=run_dt, inputs=[], outputs=[dt_table, dt_plot, dt_cm])
                 with gr.Tab("1D CNN (Raw Data)"):
-                    cnn_btn = gr.Button("Train 1D CNN")
-                    cnn_table = gr.Dataframe(headers=["Epoch", "Train Acc", "Test Acc"])
-                    cnn_plot = gr.Plot()
-                    cnn_cm = gr.Plot()
-                    def run_cnn():
-                        train_acc, test_acc, cm = train_1d_cnn()
-                        table = pd.DataFrame({"Epoch": list(range(1, 11)), "Train Acc": train_acc, "Test Acc": test_acc})
-                        return table, create_plot(train_acc, test_acc), plot_confusion_matrix(cm)
-                    cnn_btn.click(fn=run_cnn, inputs=[], outputs=[cnn_table, cnn_plot, cnn_cm])
         with gr.Tab("Prediction"):
-            model_dropdown = gr.Dropdown(choices=["Random Forest", "Decision Tree", "1D CNN"], label="Select Model")
-            input_type = gr.Radio(choices=["Single", "Multiple (CSV)"])
-            csv_input = gr.File(file_types=[".csv"], label="Upload CSV")
-            predict_btn = gr.Button("Predict")
-            output_df = gr.DataFrame()
-            def predict_fn(model_name, type_sel, file):
-                if type_sel == "Multiple (CSV)":
-                    data = pd.read_csv(file.name)
                 else:
-                    data = df.sample(1)  # fallback dummy
-                preds = predict_model(data, model_name)
-                return pd.DataFrame({"Prediction": preds})
-            predict_btn.click(fn=predict_fn, inputs=[model_dropdown, input_type, csv_input], outputs=output_df)
 demo.launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False)

+# ✅ FULL INTEGRATED SCRIPT
+# Includes your existing visualizations + new Models and Prediction tabs
 import gradio as gr
 import pandas as pd
 import numpy as np
 import matplotlib.pyplot as plt
 from sklearn.ensemble import RandomForestClassifier
 from sklearn.tree import DecisionTreeClassifier
+from sklearn.preprocessing import StandardScaler, MinMaxScaler
+from sklearn.decomposition import PCA
 from sklearn.metrics import accuracy_score, confusion_matrix
 from scipy.signal import savgol_filter
 from math import pi
 from matplotlib.cm import get_cmap
+import seaborn as sns
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torch.utils.data import DataLoader, Dataset
 plt.switch_backend('agg')
 df = pd.read_csv("milk_absorbance.csv")
 df.rename(columns={df.columns[0]: 'Label'}, inplace=True)
+# ---------- Plotting Function (Unchanged) ----------
+def plot_all():
+    plots = []
+    # Plot 1: Mean Spectra per Class
+    fig1 = plt.figure(figsize=(12, 6))
+    for label in df['Label'].unique():
+        class_df = df[df['Label'] == label]
+        mean_spectrum = class_df.iloc[:, 1:].mean()
+        plt.plot(mean_spectrum.index.astype(int), mean_spectrum, label=f'Label {label}')
+    plt.title('Mean NIR Spectrum per Milk Ratio Class')
+    plt.xlabel('Wavelength (nm)')
+    plt.ylabel('Absorbance')
+    plt.legend(title='Class (Milk Ratio)')
+    plt.grid(True)
+    plt.tight_layout()
+    plots.append(fig1)
+    plt.close(fig1)
+    # Plot 2: Offset Mean Spectra
+    fig2 = plt.figure(figsize=(12, 6))
+    offset_step = 0.1
+    for i, label in enumerate(df['Label'].unique()):
+        class_df = df[df['Label'] == label]
+        mean_spectrum = class_df.iloc[:, 1:].mean()
+        offset = i * offset_step
+        plt.plot(mean_spectrum.index.astype(int), mean_spectrum + offset, label=f'Label {label}')
+    plt.title('Mean NIR Spectrum per Milk Ratio Class (with Offset)')
+    plt.xlabel('Wavelength (nm)')
+    plt.ylabel('Absorbance (Offset Applied)')
+    plt.legend(title='Class (Milk Ratio)')
+    plt.grid(True)
+    plt.tight_layout()
+    plots.append(fig2)
+    plt.close(fig2)
+    # Plot 3: Radar Plot
+    fig3 = plt.figure(figsize=(8, 8))
+    ax = plt.subplot(111, polar=True)
+    subset_cols = df.columns[1:][::20]
+    labels = df['Label'].unique()
+    N = len(subset_cols)
+    angles = [n / float(N) * 2 * pi for n in range(N)] + [0]
+    for label in labels:
+        class_df = df[df['Label'] == label]
+        mean_spectrum = class_df[subset_cols].mean().values
+        values = mean_spectrum.tolist() + [mean_spectrum[0]]
+        ax.plot(angles, values, label=f'Label {label}')
+        ax.fill(angles, values, alpha=0.1)
+    ax.set_xticks(angles[:-1])
+    ax.set_xticklabels(subset_cols.astype(int))
+    plt.title('Radar Plot of Mean Spectra (Subset Wavelengths)')
+    plt.legend(loc='upper right', bbox_to_anchor=(1.3, 1.1))
+    plt.tight_layout()
+    plots.append(fig3)
+    plt.close(fig3)
+    # Plot 4: Cumulative PCA Explained Variance
+    fig4 = plt.figure(figsize=(8, 5))
+    X = df.iloc[:, 1:].values
+    X_scaled = StandardScaler().fit_transform(X)
+    pca = PCA(n_components=20)
+    pca.fit(X_scaled)
+    explained = np.cumsum(pca.explained_variance_ratio_)
+    plt.plot(range(1, 21), explained, marker='o')
+    plt.axhline(y=0.95, color='r', linestyle='--', label='95% Variance')
+    plt.title('Cumulative Explained Variance by PCA')
+    plt.xlabel('Number of Principal Components')
+    plt.ylabel('Cumulative Variance')
+    plt.legend()
+    plt.grid(True)
+    plt.tight_layout()
+    plots.append(fig4)
+    plt.close(fig4)
+    # Plot 5: Derivative + Normalized Spectra
+    fig5 = plt.figure(figsize=(16, 8))
+    y_vals = df['Label'].values
+    wavelengths = df.columns[1:].astype(float)
+    X = df.iloc[:, 1:].values
+    X_deriv = savgol_filter(X, window_length=25, polyorder=5, deriv=1, axis=1)
+    scaler = MinMaxScaler()
+    X_deriv_norm = np.array([scaler.fit_transform(row.reshape(-1, 1)).flatten() for row in X_deriv])
+    unique_labels = np.unique(y_vals)
+    colors = get_cmap('tab10')(np.linspace(0, 1, len(unique_labels)))
+    for label, color in zip(unique_labels, colors):
+        indices = np.where(y_vals == label)[0]
+        for i in indices:
+            plt.plot(wavelengths, X_deriv_norm[i], color=color, alpha=0.3, label=f'Milk {label}' if i == indices[0] else '')
+    plt.title("All Spectra After First Derivative + Normalization")
+    plt.xlabel("Wavelength (nm)")
+    plt.ylabel("Normalized First Derivative")
+    plt.legend(title="Group")
+    plt.grid(True)
+    plt.tight_layout()
+    plots.append(fig5)
+    plt.close(fig5)
+    # Plot 6: Derivative Only (No Norm)
+    fig6 = plt.figure(figsize=(16, 8))
+    for label, color in zip(unique_labels, colors):
+        indices = np.where(y_vals == label)[0]
+        for i in indices:
+            plt.plot(wavelengths, X_deriv[i], color=color, alpha=0.3, label=f'Milk {label}' if i == indices[0] else '')
+    plt.title("All Spectra After First Derivative (No Normalization)")
+    plt.xlabel("Wavelength (nm)")
+    plt.ylabel("First Derivative Absorbance")
+    plt.legend(title="Group")
+    plt.grid(True)
+    plt.tight_layout()
+    plots.append(fig6)
+    plt.close(fig6)
+    # Plot 7: Score + Loadings
+    fig7, axs = plt.subplots(1, 2, figsize=(14, 5))
+    wavelength_columns = df.columns[1:]
+    labels = df.iloc[:, 0]
+    data = df.iloc[:, 1:].values.astype(float)
+    derivative_data = np.diff(data, axis=1)
     scaler = StandardScaler()
+    normalized_derivative_data = scaler.fit_transform(derivative_data)
+    derivative_wavelength_columns = [f'Der_{w1}-{w2}' for w1, w2 in zip(wavelength_columns[:-1], wavelength_columns[1:])]
+    processed_df = pd.DataFrame(normalized_derivative_data, columns=derivative_wavelength_columns)
+    processed_df.insert(0, 'Label', labels)
+    processed_df['Label'] = processed_df['Label'].astype(int)
+    X_processed = processed_df.drop('Label', axis=1)
+    y_processed = processed_df['Label']
     pca = PCA(n_components=2)
+    principal_components = pca.fit_transform(X_processed)
+    pca_df = pd.DataFrame(data=principal_components, columns=['PC1', 'PC2'])
+    pca_df['Label'] = y_processed.reset_index(drop=True)
+    targets = y_processed.unique()
+    cmap = get_cmap('tab10')
+    for i, target in enumerate(targets):
+        idx = pca_df['Label'] == target
+        axs[0].scatter(pca_df.loc[idx, 'PC1'], pca_df.loc[idx, 'PC2'], color=cmap(i % cmap.N), label=f'Label {target}')
+    axs[0].set_title('Score Plot: PC1 vs. PC2')
+    axs[0].legend()
+    axs[0].grid()
+    loadings = pca.components_.T
+    axs[1].plot(loadings[:, 0], label='PC1 Loadings')
+    axs[1].plot(loadings[:, 1], label='PC2 Loadings', color='black')
+    axs[1].set_title('Loadings Plot')
+    axs[1].legend()
+    axs[1].grid()
+    plt.tight_layout()
+    plots.append(fig7)
+    plt.close(fig7)
+    # Plot 8: 3x2 PCA Summary
+    fig8, axs = plt.subplots(3, 2, figsize=(16, 14))
+    raw_data = df.iloc[:, 1:].values.astype(float)
+    derivative_data = np.diff(raw_data, axis=1)
+    scaler = StandardScaler()
+    raw_scaled = scaler.fit_transform(raw_data)
+    derivative_scaled = scaler.fit_transform(derivative_data)
+    pca_raw = PCA(n_components=10)
+    pca_raw_scores = pca_raw.fit_transform(raw_scaled)
+    explained_var_raw = np.cumsum(pca_raw.explained_variance_ratio_) * 100
+    pca_der = PCA(n_components=10)
+    pca_der_scores = pca_der.fit_transform(derivative_scaled)
+    explained_var_der = np.cumsum(pca_der.explained_variance_ratio_) * 100
+    targets = np.unique(labels)
+    cmap = get_cmap('tab10')
+    for i, target in enumerate(targets):
+        idx = labels == target
+        axs[0, 0].scatter(pca_raw_scores[idx, 0], pca_raw_scores[idx, 1], s=40, label=f'Milk {target}', color=cmap(i % cmap.N))
+        axs[0, 0].axhline(0, color='gray', linestyle='--', linewidth=2)  # Horizontal
+        axs[0, 0].axvline(0, color='gray', linestyle='--', linewidth=2)  # Vertical
+        axs[0, 1].scatter(pca_der_scores[idx, 0], pca_der_scores[idx, 1], s=40, label=f'Milk {target}', color=cmap(i % cmap.N))
+        axs[0, 1].axhline(0, color='gray', linestyle='--', linewidth=2)  # Horizontal
+        axs[0, 1].axvline(0, color='gray', linestyle='--', linewidth=2)  # Vertical
+        axs[0, 0].set_title('Raw Data: PCA Score Plot')
+        axs[0, 1].set_title('1st Derivative: PCA Score Plot')
+    # Row 2: PCA Loadings for Raw and Derivative (with horizontal and vertical lines at 0)
+    axs[1, 0].plot(pca_raw.components_[0], label='PC1')
+    axs[1, 0].plot(pca_raw.components_[1], label='PC2')
+    axs[1, 0].axhline(0, color='gray', linestyle='--', linewidth=2)  # Horizontal
+    axs[1, 0].axvline(0, color='gray', linestyle='--', linewidth=2)  # Vertical
+    axs[1, 1].plot(pca_der.components_[0], label='PC1')
+    axs[1, 1].plot(pca_der.components_[1], label='PC2')
+    axs[1, 1].axhline(0, color='gray', linestyle='--', linewidth=2)  # Horizontal
+    axs[1, 1].axvline(0, color='gray', linestyle='--', linewidth=2)  # Vertical
+    axs[2, 0].plot(range(1, 11), explained_var_raw, marker='o')
+    axs[2, 1].plot(range(1, 11), explained_var_der, marker='o')
+    axs[0, 0].legend(); axs[0, 1].legend()
+    axs[1, 0].legend(); axs[1, 1].legend()
+    axs[2, 0].set_ylim(0, 105)
+    axs[2, 1].set_ylim(0, 105)
+    axs[2, 0].set_title('Raw Data: Scree Plot')
+    axs[2, 1].set_title('1st Derivative: Scree Plot')
+    plt.tight_layout()
+    plots.append(fig8)
+    plt.close(fig8)
+    return plots
+# ---------- Prepare Data for Modeling ----------
+X = df.iloc[:, 1:].values
+y = df['Label'].values
+scaler = StandardScaler()
+X_scaled = scaler.fit_transform(X)
+pca = PCA(n_components=2)
+X_pca = pca.fit_transform(X_scaled)
+X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.2, random_state=42)
+# ---------- Train Random Forest ----------
+rf = RandomForestClassifier(n_estimators=100, random_state=42)
+rf.fit(X_train, y_train)
+# ---------- Train Decision Tree ----------
+dt = DecisionTreeClassifier(random_state=42)
+dt.fit(X_train, y_train)
+# ---------- CNN on Raw Data ----------
+class MilkDataset(Dataset):
+    def __init__(self, X, y):
+        self.X = torch.tensor(X, dtype=torch.float32).unsqueeze(1)
+        self.y = torch.tensor(y, dtype=torch.long)
+    def __len__(self): return len(self.X)
+    def __getitem__(self, idx): return self.X[idx], self.y[idx]
+X_raw_scaled = scaler.fit_transform(X)
+X_train_raw, X_test_raw, y_train_raw, y_test_raw = train_test_split(X_raw_scaled, y, test_size=0.2, random_state=42)
+train_dataset = MilkDataset(X_train_raw, y_train_raw)
+test_dataset = MilkDataset(X_test_raw, y_test_raw)
+train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
+test_loader = DataLoader(test_dataset, batch_size=16)
+class CNN1D(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.net = nn.Sequential(
+            nn.Conv1d(1, 32, 3, padding=1), nn.ReLU(),
+            nn.Conv1d(32, 64, 3, padding=1), nn.ReLU(),
+            nn.AdaptiveAvgPool1d(1),
+            nn.Flatten(),
+            nn.Linear(64, len(np.unique(y)))
+        )
+    def forward(self, x): return self.net(x)
+model = CNN1D()
+criterion = nn.CrossEntropyLoss()
+optimizer = optim.Adam(model.parameters(), lr=0.001)
+train_acc_list, test_acc_list = [], []
+for epoch in range(1, 11):
+    model.train()
+    for Xb, yb in train_loader:
+        optimizer.zero_grad()
+        loss = criterion(model(Xb), yb)
+        loss.backward()
+        optimizer.step()
+    model.eval()
+    with torch.no_grad():
+        train_preds = torch.argmax(model(torch.cat([X for X, _ in train_loader], 0)), dim=1)
+        test_preds = torch.argmax(model(torch.cat([X for X, _ in test_loader], 0)), dim=1)
+        y_train_all = torch.cat([y for _, y in train_loader])
+        y_test_all = torch.cat([y for _, y in test_loader])
+        train_acc = (train_preds == y_train_all).float().mean().item()
+        test_acc = (test_preds == y_test_all).float().mean().item()
+        train_acc_list.append(train_acc)
+        test_acc_list.append(test_acc)
+# ---------- Gradio Interface ----------
 with gr.Blocks() as demo:
+    gr.Markdown("# 🧪 Dataset Description")
     with gr.Tabs():
+        with gr.Tab("Preview Raw Data"):
             gr.DataFrame(df.head(50), label="Preview of Raw Data")
         with gr.Tab("Visualizations"):
         with gr.Tab("Models"):
             with gr.Tabs():
                 with gr.Tab("Random Forest"):
+                    gr.Markdown(f"""Train Accuracy: {accuracy_score(y_train, rf.predict(X_train)):.2f}  \
+                                 Test Accuracy: {accuracy_score(y_test, rf.predict(X_test)):.2f}""")
+                    fig_rf = plt.figure()
+                    sns.heatmap(confusion_matrix(y_test, rf.predict(X_test)), annot=True, fmt='d')
+                    plt.title("Random Forest Confusion Matrix")
+                    gr.Plot(fig_rf)
                 with gr.Tab("Decision Tree"):
+                    gr.Markdown(f"""Train Accuracy: {accuracy_score(y_train, dt.predict(X_train)):.2f}  \
+                                 Test Accuracy: {accuracy_score(y_test, dt.predict(X_test)):.2f}""")
+                    fig_dt = plt.figure()
+                    sns.heatmap(confusion_matrix(y_test, dt.predict(X_test)), annot=True, fmt='d')
+                    plt.title("Decision Tree Confusion Matrix")
+                    gr.Plot(fig_dt)
                 with gr.Tab("1D CNN (Raw Data)"):
+                    gr.Markdown(f"""Train Accuracy: {train_acc:.2f}  \
+                                 Test Accuracy: {test_acc:.2f}""")
+                    fig_cnn = plt.figure()
+                    sns.heatmap(confusion_matrix(y_test_all, test_preds), annot=True, fmt='d')
+                    plt.title("1D CNN Confusion Matrix")
+                    gr.Plot(fig_cnn)
         with gr.Tab("Prediction"):
+            model_dropdown = gr.Dropdown(choices=['Random Forest', 'Decision Tree', '1D CNN'], label="Choose Model")
+            input_file = gr.File(label="Upload CSV File (Same Format as Original Data)")
+            output_df = gr.DataFrame(label="Predicted Labels")
+            def predict(file, model_name):
+                test_df = pd.read_csv(file.name)
+                if 'Label' in test_df.columns:
+                    test_df = test_df.drop(columns=['Label'])
+                X_input = test_df.values
+                if model_name == '1D CNN':
+                    X_scaled = scaler.transform(X_input)
+                    X_tensor = torch.tensor(X_scaled, dtype=torch.float32).unsqueeze(1)
+                    with torch.no_grad():
+                        preds = torch.argmax(model(X_tensor), dim=1).numpy()
                 else:
+                    X_pca_input = pca.transform(scaler.transform(X_input))
+                    preds = rf.predict(X_pca_input) if model_name == 'Random Forest' else dt.predict(X_pca_input)
+                test_df['Predicted Label'] = preds
+                return test_df
+            predict_btn = gr.Button("Predict")
+            predict_btn.click(fn=predict, inputs=[input_file, model_dropdown], outputs=[output_df])
+# Run app
 demo.launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False)