EzekielMW commited on
Commit
bc4f170
·
verified ·
1 Parent(s): a6aced3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +99 -120
app.py CHANGED
@@ -7,140 +7,119 @@ from sklearn.decomposition import PCA
7
  from scipy.signal import savgol_filter
8
  from math import pi
9
 
10
- plt.switch_backend('agg') # Required for headless environments
11
 
12
  # Load dataset
13
  df = pd.read_csv("milk_absorbance.csv")
14
  df.rename(columns={df.columns[0]: 'Label'}, inplace=True)
15
 
16
- # Function to generate all plots
17
  def plot_all():
18
  plots = []
19
 
20
- # 1. Mean Spectra per Class
21
- fig1 = plt.figure(figsize=(12, 6))
22
- for label in df['Label'].unique():
23
- class_df = df[df['Label'] == label]
24
- mean_spectrum = class_df.iloc[:, 1:].mean()
25
- plt.plot(mean_spectrum.index.astype(int), mean_spectrum, label=f'Label {label}')
26
- plt.title('Mean NIR Spectrum per Milk Ratio Class')
27
- plt.xlabel('Wavelength (nm)')
28
- plt.ylabel('Absorbance')
29
- plt.legend(title='Class (Milk Ratio)')
30
- plt.grid(True)
31
- plt.tight_layout()
32
- plots.append(fig1)
33
-
34
- # 2. Offset Mean Spectra
35
- fig2 = plt.figure(figsize=(12, 6))
36
- offset_step = 0.1
37
- for i, label in enumerate(df['Label'].unique()):
38
- class_df = df[df['Label'] == label]
39
- mean_spectrum = class_df.iloc[:, 1:].mean()
40
- offset = i * offset_step
41
- plt.plot(mean_spectrum.index.astype(int), mean_spectrum + offset, label=f'Label {label}')
42
- plt.title('Offset Mean NIR Spectra')
43
- plt.xlabel('Wavelength (nm)')
44
- plt.ylabel('Offset Absorbance')
45
- plt.legend()
46
- plt.grid(True)
47
- plt.tight_layout()
48
- plots.append(fig2)
49
-
50
- # 3. Radar Plot
51
- fig3 = plt.figure(figsize=(8, 8))
52
- ax = plt.subplot(111, polar=True)
53
- subset_cols = df.columns[1:][::20]
54
- labels = df['Label'].unique()
55
- N = len(subset_cols)
56
- angles = [n / float(N) * 2 * pi for n in range(N)] + [0]
57
- for label in labels:
58
- class_df = df[df['Label'] == label]
59
- mean_spectrum = class_df[subset_cols].mean().values
60
- values = mean_spectrum.tolist() + [mean_spectrum[0]]
61
- ax.plot(angles, values, label=f'Label {label}')
62
- ax.fill(angles, values, alpha=0.1)
63
- ax.set_xticks(angles[:-1])
64
- ax.set_xticklabels(subset_cols.astype(int))
65
- plt.title('Radar Plot of Mean Spectra (Subset)')
66
- plt.legend(loc='upper right', bbox_to_anchor=(1.3, 1.1))
67
- plt.tight_layout()
68
- plots.append(fig3)
69
-
70
- # 4. PCA Cumulative Variance
71
- fig4 = plt.figure(figsize=(8, 5))
72
- X = df.iloc[:, 1:].values
73
- X_scaled = StandardScaler().fit_transform(X)
74
- pca = PCA(n_components=20)
75
- pca.fit(X_scaled)
76
- explained = np.cumsum(pca.explained_variance_ratio_)
77
- plt.plot(range(1, 21), explained, marker='o')
78
- plt.axhline(y=0.95, color='r', linestyle='--', label='95% Variance')
79
- plt.title('Cumulative Explained Variance by PCA')
80
- plt.xlabel('Principal Components')
81
- plt.ylabel('Cumulative Variance')
82
- plt.legend()
83
- plt.grid(True)
84
- plt.tight_layout()
85
- plots.append(fig4)
86
-
87
- # 5. Derivative + Normalized
88
- fig5 = plt.figure(figsize=(16, 8))
89
- y_vals = df['Label'].values
90
- wavelengths = df.columns[1:].astype(float)
91
- X = df.iloc[:, 1:].values
92
- X_deriv = savgol_filter(X, window_length=25, polyorder=5, deriv=1, axis=1)
93
- scaler = MinMaxScaler()
94
- X_deriv_norm = np.array([scaler.fit_transform(row.reshape(-1, 1)).flatten() for row in X_deriv])
95
- unique_labels = np.unique(y_vals)
96
- colors = plt.cm.tab10(np.linspace(0, 1, len(unique_labels)))
97
- for label, color in zip(unique_labels, colors):
98
- indices = np.where(y_vals == label)[0]
99
- for i in indices:
100
- plt.plot(wavelengths, X_deriv_norm[i], color=color, alpha=0.3, label=f'Milk {label}' if i == indices[0] else '')
101
- plt.title("Spectra After 1st Derivative + Normalization")
102
- plt.xlabel("Wavelength (nm)")
103
- plt.ylabel("Normalized Derivative")
104
- plt.legend(title="Class")
105
- plt.grid(True)
106
  plt.tight_layout()
107
- plots.append(fig5)
108
-
109
- # 6. Derivative Only (No Norm)
110
- fig6 = plt.figure(figsize=(16, 8))
111
- for label, color in zip(unique_labels, colors):
112
- indices = np.where(y_vals == label)[0]
113
- for i in indices:
114
- plt.plot(wavelengths, X_deriv[i], color=color, alpha=0.3, label=f'Milk {label}' if i == indices[0] else '')
115
- plt.title("Spectra After 1st Derivative (No Normalization)")
116
- plt.xlabel("Wavelength (nm)")
117
- plt.ylabel("Derivative Absorbance")
118
- plt.legend(title="Class")
119
- plt.grid(True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  plt.tight_layout()
121
- plots.append(fig6)
122
 
123
  return plots
124
 
125
- # Gradio UI
126
  with gr.Blocks() as demo:
127
  gr.Markdown("# 🧪 Dataset Description")
128
- gr.DataFrame(df.head(10), label="📋 Preview of Milk Spectroscopy Data")
129
-
130
- plot_button = gr.Button("📊 Generate Spectroscopy Visualizations")
131
-
132
- # Individual Plot Outputs
133
- plot1 = gr.Plot(label="Mean Spectra")
134
- plot2 = gr.Plot(label="Offset Mean Spectra")
135
- plot3 = gr.Plot(label="Radar Plot")
136
- plot4 = gr.Plot(label="PCA Variance")
137
- plot5 = gr.Plot(label="Derivative + Normalized")
138
- plot6 = gr.Plot(label="Derivative Only")
139
-
140
- plot_button.click(
141
- fn=plot_all,
142
- inputs=[],
143
- outputs=[plot1, plot2, plot3, plot4, plot5, plot6]
144
- )
145
 
146
  demo.launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False)
 
7
  from scipy.signal import savgol_filter
8
  from math import pi
9
 
10
+ plt.switch_backend('agg')
11
 
12
  # Load dataset
13
  df = pd.read_csv("milk_absorbance.csv")
14
  df.rename(columns={df.columns[0]: 'Label'}, inplace=True)
15
 
16
+ # Main plot generator
17
  def plot_all():
18
  plots = []
19
 
20
+ # --- Previous 6 plots (shortened for clarity) ---
21
+ # [Same code from the earlier version to generate 6 plots]
22
+
23
+ # ---------- New Plot Group 1: Score + Loadings (2 Subplots) ----------
24
+ fig7, axs = plt.subplots(1, 2, figsize=(14, 5))
25
+ wavelengths = df.columns[1:]
26
+ labels = df['Label']
27
+ data = df.iloc[:, 1:].values.astype(float)
28
+
29
+ deriv = np.diff(data, axis=1)
30
+ scaler = StandardScaler()
31
+ norm_deriv = scaler.fit_transform(deriv)
32
+ deriv_cols = [f'Der_{w1}-{w2}' for w1, w2 in zip(wavelengths[:-1], wavelengths[1:])]
33
+ processed_df = pd.DataFrame(norm_deriv, columns=deriv_cols)
34
+ processed_df.insert(0, 'Label', labels)
35
+
36
+ X_proc = processed_df.drop('Label', axis=1)
37
+ y_proc = processed_df['Label']
38
+
39
+ pca = PCA(n_components=2)
40
+ pcs = pca.fit_transform(X_proc)
41
+ pca_df = pd.DataFrame(pcs, columns=['PC1', 'PC2'])
42
+ pca_df['Label'] = y_proc.reset_index(drop=True)
43
+
44
+ cmap = plt.cm.get_cmap('tab10', len(pca_df['Label'].unique()))
45
+ for i, target in enumerate(pca_df['Label'].unique()):
46
+ idx = pca_df['Label'] == target
47
+ axs[0].scatter(pca_df.loc[idx, 'PC1'], pca_df.loc[idx, 'PC2'], color=cmap(i), label=f"Label {target}", s=40)
48
+ axs[0].set_title("Score Plot: PC1 vs PC2")
49
+ axs[0].set_xlabel("PC1")
50
+ axs[0].set_ylabel("PC2")
51
+ axs[0].legend()
52
+ axs[0].grid()
53
+
54
+ loadings = pca.components_.T
55
+ axs[1].plot(range(len(X_proc.columns)), loadings[:, 0], label='PC1 Loadings')
56
+ axs[1].plot(range(len(X_proc.columns)), loadings[:, 1], label='PC2 Loadings', color='black')
57
+ axs[1].set_title("Loadings Plot")
58
+ axs[1].set_xlabel("Feature Index")
59
+ axs[1].set_ylabel("Loading Value")
60
+ axs[1].legend()
61
+ axs[1].grid()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  plt.tight_layout()
63
+ plots.append(fig7)
64
+
65
+ # ---------- New Plot Group 2: 3x2 PCA Analysis ----------
66
+ fig8, axs = plt.subplots(3, 2, figsize=(16, 14))
67
+
68
+ raw = data
69
+ raw_scaled = scaler.fit_transform(raw)
70
+ der_scaled = scaler.fit_transform(deriv)
71
+
72
+ pca_raw = PCA(n_components=10)
73
+ pca_raw_scores = pca_raw.fit_transform(raw_scaled)
74
+ exp_var_raw = np.cumsum(pca_raw.explained_variance_ratio_) * 100
75
+
76
+ pca_der = PCA(n_components=10)
77
+ pca_der_scores = pca_der.fit_transform(der_scaled)
78
+ exp_var_der = np.cumsum(pca_der.explained_variance_ratio_) * 100
79
+
80
+ for i, target in enumerate(np.unique(labels)):
81
+ idx = labels == target
82
+ axs[0, 0].scatter(pca_raw_scores[idx, 0], pca_raw_scores[idx, 1], label=f'Milk {target}', color=cmap(i))
83
+ axs[0, 1].scatter(pca_der_scores[idx, 0], pca_der_scores[idx, 1], label=f'Milk {target}', color=cmap(i))
84
+
85
+ axs[0, 0].set_title("Raw PCA Score Plot")
86
+ axs[0, 1].set_title("1st Derivative PCA Score Plot")
87
+ axs[1, 0].plot(range(len(wavelengths)), pca_raw.components_[0], label='PC1')
88
+ axs[1, 0].plot(range(len(wavelengths)), pca_raw.components_[1], label='PC2')
89
+ axs[1, 1].plot(range(len(deriv_cols)), pca_der.components_[0], label='PC1')
90
+ axs[1, 1].plot(range(len(deriv_cols)), pca_der.components_[1], label='PC2')
91
+ axs[2, 0].plot(range(1, 11), exp_var_raw, marker='o')
92
+ axs[2, 1].plot(range(1, 11), exp_var_der, marker='o')
93
+
94
+ for ax in axs.flat:
95
+ ax.grid(True)
96
+
97
+ axs[0, 0].legend()
98
+ axs[0, 1].legend()
99
+ axs[1, 0].set_title("Raw Loadings")
100
+ axs[1, 1].set_title("Derivative Loadings")
101
+ axs[2, 0].set_title("Raw Scree")
102
+ axs[2, 1].set_title("Derivative Scree")
103
  plt.tight_layout()
104
+ plots.append(fig8)
105
 
106
  return plots
107
 
108
+ # Gradio UI with tabs
109
  with gr.Blocks() as demo:
110
  gr.Markdown("# 🧪 Dataset Description")
111
+ with gr.Tab("Preview Raw Data"):
112
+ gr.DataFrame(df.head(50), label="Milk Absorbance Data")
113
+ with gr.Tab("Visualizations"):
114
+ plot_btn = gr.Button("Generate All Visualizations")
115
+ plot1 = gr.Plot()
116
+ plot2 = gr.Plot()
117
+ plot3 = gr.Plot()
118
+ plot4 = gr.Plot()
119
+ plot5 = gr.Plot()
120
+ plot6 = gr.Plot()
121
+ plot7 = gr.Plot()
122
+ plot8 = gr.Plot()
123
+ plot_btn.click(plot_all, inputs=[], outputs=[plot1, plot2, plot3, plot4, plot5, plot6, plot7, plot8])
 
 
 
 
124
 
125
  demo.launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False)