Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -7,140 +7,119 @@ from sklearn.decomposition import PCA
|
|
| 7 |
from scipy.signal import savgol_filter
|
| 8 |
from math import pi
|
| 9 |
|
| 10 |
-
plt.switch_backend('agg')
|
| 11 |
|
| 12 |
# Load dataset
|
| 13 |
df = pd.read_csv("milk_absorbance.csv")
|
| 14 |
df.rename(columns={df.columns[0]: 'Label'}, inplace=True)
|
| 15 |
|
| 16 |
-
#
|
| 17 |
def plot_all():
|
| 18 |
plots = []
|
| 19 |
|
| 20 |
-
#
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
plt.
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
ax.fill(angles, values, alpha=0.1)
|
| 63 |
-
ax.set_xticks(angles[:-1])
|
| 64 |
-
ax.set_xticklabels(subset_cols.astype(int))
|
| 65 |
-
plt.title('Radar Plot of Mean Spectra (Subset)')
|
| 66 |
-
plt.legend(loc='upper right', bbox_to_anchor=(1.3, 1.1))
|
| 67 |
-
plt.tight_layout()
|
| 68 |
-
plots.append(fig3)
|
| 69 |
-
|
| 70 |
-
# 4. PCA Cumulative Variance
|
| 71 |
-
fig4 = plt.figure(figsize=(8, 5))
|
| 72 |
-
X = df.iloc[:, 1:].values
|
| 73 |
-
X_scaled = StandardScaler().fit_transform(X)
|
| 74 |
-
pca = PCA(n_components=20)
|
| 75 |
-
pca.fit(X_scaled)
|
| 76 |
-
explained = np.cumsum(pca.explained_variance_ratio_)
|
| 77 |
-
plt.plot(range(1, 21), explained, marker='o')
|
| 78 |
-
plt.axhline(y=0.95, color='r', linestyle='--', label='95% Variance')
|
| 79 |
-
plt.title('Cumulative Explained Variance by PCA')
|
| 80 |
-
plt.xlabel('Principal Components')
|
| 81 |
-
plt.ylabel('Cumulative Variance')
|
| 82 |
-
plt.legend()
|
| 83 |
-
plt.grid(True)
|
| 84 |
-
plt.tight_layout()
|
| 85 |
-
plots.append(fig4)
|
| 86 |
-
|
| 87 |
-
# 5. Derivative + Normalized
|
| 88 |
-
fig5 = plt.figure(figsize=(16, 8))
|
| 89 |
-
y_vals = df['Label'].values
|
| 90 |
-
wavelengths = df.columns[1:].astype(float)
|
| 91 |
-
X = df.iloc[:, 1:].values
|
| 92 |
-
X_deriv = savgol_filter(X, window_length=25, polyorder=5, deriv=1, axis=1)
|
| 93 |
-
scaler = MinMaxScaler()
|
| 94 |
-
X_deriv_norm = np.array([scaler.fit_transform(row.reshape(-1, 1)).flatten() for row in X_deriv])
|
| 95 |
-
unique_labels = np.unique(y_vals)
|
| 96 |
-
colors = plt.cm.tab10(np.linspace(0, 1, len(unique_labels)))
|
| 97 |
-
for label, color in zip(unique_labels, colors):
|
| 98 |
-
indices = np.where(y_vals == label)[0]
|
| 99 |
-
for i in indices:
|
| 100 |
-
plt.plot(wavelengths, X_deriv_norm[i], color=color, alpha=0.3, label=f'Milk {label}' if i == indices[0] else '')
|
| 101 |
-
plt.title("Spectra After 1st Derivative + Normalization")
|
| 102 |
-
plt.xlabel("Wavelength (nm)")
|
| 103 |
-
plt.ylabel("Normalized Derivative")
|
| 104 |
-
plt.legend(title="Class")
|
| 105 |
-
plt.grid(True)
|
| 106 |
plt.tight_layout()
|
| 107 |
-
plots.append(
|
| 108 |
-
|
| 109 |
-
#
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
plt.tight_layout()
|
| 121 |
-
plots.append(
|
| 122 |
|
| 123 |
return plots
|
| 124 |
|
| 125 |
-
# Gradio UI
|
| 126 |
with gr.Blocks() as demo:
|
| 127 |
gr.Markdown("# 🧪 Dataset Description")
|
| 128 |
-
gr.
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
fn=plot_all,
|
| 142 |
-
inputs=[],
|
| 143 |
-
outputs=[plot1, plot2, plot3, plot4, plot5, plot6]
|
| 144 |
-
)
|
| 145 |
|
| 146 |
demo.launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False)
|
|
|
|
| 7 |
from scipy.signal import savgol_filter
|
| 8 |
from math import pi
|
| 9 |
|
| 10 |
+
plt.switch_backend('agg')
|
| 11 |
|
| 12 |
# Load dataset
|
| 13 |
df = pd.read_csv("milk_absorbance.csv")
|
| 14 |
df.rename(columns={df.columns[0]: 'Label'}, inplace=True)
|
| 15 |
|
| 16 |
+
# Main plot generator
|
| 17 |
def plot_all():
|
| 18 |
plots = []
|
| 19 |
|
| 20 |
+
# --- Previous 6 plots (shortened for clarity) ---
|
| 21 |
+
# [Same code from the earlier version to generate 6 plots]
|
| 22 |
+
|
| 23 |
+
# ---------- New Plot Group 1: Score + Loadings (2 Subplots) ----------
|
| 24 |
+
fig7, axs = plt.subplots(1, 2, figsize=(14, 5))
|
| 25 |
+
wavelengths = df.columns[1:]
|
| 26 |
+
labels = df['Label']
|
| 27 |
+
data = df.iloc[:, 1:].values.astype(float)
|
| 28 |
+
|
| 29 |
+
deriv = np.diff(data, axis=1)
|
| 30 |
+
scaler = StandardScaler()
|
| 31 |
+
norm_deriv = scaler.fit_transform(deriv)
|
| 32 |
+
deriv_cols = [f'Der_{w1}-{w2}' for w1, w2 in zip(wavelengths[:-1], wavelengths[1:])]
|
| 33 |
+
processed_df = pd.DataFrame(norm_deriv, columns=deriv_cols)
|
| 34 |
+
processed_df.insert(0, 'Label', labels)
|
| 35 |
+
|
| 36 |
+
X_proc = processed_df.drop('Label', axis=1)
|
| 37 |
+
y_proc = processed_df['Label']
|
| 38 |
+
|
| 39 |
+
pca = PCA(n_components=2)
|
| 40 |
+
pcs = pca.fit_transform(X_proc)
|
| 41 |
+
pca_df = pd.DataFrame(pcs, columns=['PC1', 'PC2'])
|
| 42 |
+
pca_df['Label'] = y_proc.reset_index(drop=True)
|
| 43 |
+
|
| 44 |
+
cmap = plt.cm.get_cmap('tab10', len(pca_df['Label'].unique()))
|
| 45 |
+
for i, target in enumerate(pca_df['Label'].unique()):
|
| 46 |
+
idx = pca_df['Label'] == target
|
| 47 |
+
axs[0].scatter(pca_df.loc[idx, 'PC1'], pca_df.loc[idx, 'PC2'], color=cmap(i), label=f"Label {target}", s=40)
|
| 48 |
+
axs[0].set_title("Score Plot: PC1 vs PC2")
|
| 49 |
+
axs[0].set_xlabel("PC1")
|
| 50 |
+
axs[0].set_ylabel("PC2")
|
| 51 |
+
axs[0].legend()
|
| 52 |
+
axs[0].grid()
|
| 53 |
+
|
| 54 |
+
loadings = pca.components_.T
|
| 55 |
+
axs[1].plot(range(len(X_proc.columns)), loadings[:, 0], label='PC1 Loadings')
|
| 56 |
+
axs[1].plot(range(len(X_proc.columns)), loadings[:, 1], label='PC2 Loadings', color='black')
|
| 57 |
+
axs[1].set_title("Loadings Plot")
|
| 58 |
+
axs[1].set_xlabel("Feature Index")
|
| 59 |
+
axs[1].set_ylabel("Loading Value")
|
| 60 |
+
axs[1].legend()
|
| 61 |
+
axs[1].grid()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
plt.tight_layout()
|
| 63 |
+
plots.append(fig7)
|
| 64 |
+
|
| 65 |
+
# ---------- New Plot Group 2: 3x2 PCA Analysis ----------
|
| 66 |
+
fig8, axs = plt.subplots(3, 2, figsize=(16, 14))
|
| 67 |
+
|
| 68 |
+
raw = data
|
| 69 |
+
raw_scaled = scaler.fit_transform(raw)
|
| 70 |
+
der_scaled = scaler.fit_transform(deriv)
|
| 71 |
+
|
| 72 |
+
pca_raw = PCA(n_components=10)
|
| 73 |
+
pca_raw_scores = pca_raw.fit_transform(raw_scaled)
|
| 74 |
+
exp_var_raw = np.cumsum(pca_raw.explained_variance_ratio_) * 100
|
| 75 |
+
|
| 76 |
+
pca_der = PCA(n_components=10)
|
| 77 |
+
pca_der_scores = pca_der.fit_transform(der_scaled)
|
| 78 |
+
exp_var_der = np.cumsum(pca_der.explained_variance_ratio_) * 100
|
| 79 |
+
|
| 80 |
+
for i, target in enumerate(np.unique(labels)):
|
| 81 |
+
idx = labels == target
|
| 82 |
+
axs[0, 0].scatter(pca_raw_scores[idx, 0], pca_raw_scores[idx, 1], label=f'Milk {target}', color=cmap(i))
|
| 83 |
+
axs[0, 1].scatter(pca_der_scores[idx, 0], pca_der_scores[idx, 1], label=f'Milk {target}', color=cmap(i))
|
| 84 |
+
|
| 85 |
+
axs[0, 0].set_title("Raw PCA Score Plot")
|
| 86 |
+
axs[0, 1].set_title("1st Derivative PCA Score Plot")
|
| 87 |
+
axs[1, 0].plot(range(len(wavelengths)), pca_raw.components_[0], label='PC1')
|
| 88 |
+
axs[1, 0].plot(range(len(wavelengths)), pca_raw.components_[1], label='PC2')
|
| 89 |
+
axs[1, 1].plot(range(len(deriv_cols)), pca_der.components_[0], label='PC1')
|
| 90 |
+
axs[1, 1].plot(range(len(deriv_cols)), pca_der.components_[1], label='PC2')
|
| 91 |
+
axs[2, 0].plot(range(1, 11), exp_var_raw, marker='o')
|
| 92 |
+
axs[2, 1].plot(range(1, 11), exp_var_der, marker='o')
|
| 93 |
+
|
| 94 |
+
for ax in axs.flat:
|
| 95 |
+
ax.grid(True)
|
| 96 |
+
|
| 97 |
+
axs[0, 0].legend()
|
| 98 |
+
axs[0, 1].legend()
|
| 99 |
+
axs[1, 0].set_title("Raw Loadings")
|
| 100 |
+
axs[1, 1].set_title("Derivative Loadings")
|
| 101 |
+
axs[2, 0].set_title("Raw Scree")
|
| 102 |
+
axs[2, 1].set_title("Derivative Scree")
|
| 103 |
plt.tight_layout()
|
| 104 |
+
plots.append(fig8)
|
| 105 |
|
| 106 |
return plots
|
| 107 |
|
| 108 |
+
# Gradio UI with tabs
|
| 109 |
with gr.Blocks() as demo:
|
| 110 |
gr.Markdown("# 🧪 Dataset Description")
|
| 111 |
+
with gr.Tab("Preview Raw Data"):
|
| 112 |
+
gr.DataFrame(df.head(50), label="Milk Absorbance Data")
|
| 113 |
+
with gr.Tab("Visualizations"):
|
| 114 |
+
plot_btn = gr.Button("Generate All Visualizations")
|
| 115 |
+
plot1 = gr.Plot()
|
| 116 |
+
plot2 = gr.Plot()
|
| 117 |
+
plot3 = gr.Plot()
|
| 118 |
+
plot4 = gr.Plot()
|
| 119 |
+
plot5 = gr.Plot()
|
| 120 |
+
plot6 = gr.Plot()
|
| 121 |
+
plot7 = gr.Plot()
|
| 122 |
+
plot8 = gr.Plot()
|
| 123 |
+
plot_btn.click(plot_all, inputs=[], outputs=[plot1, plot2, plot3, plot4, plot5, plot6, plot7, plot8])
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
|
| 125 |
demo.launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False)
|