Spaces:
Sleeping
Sleeping
File size: 3,047 Bytes
53e2114 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
import matplotlib.pyplot as plt
import numpy as np
import umap
from sklearn.manifold import TSNE
import tempfile
def plot_embedding(X, labels, method="UMAP", title="Clustering Visualization") -> str:
if method.upper() == "NONE":
# ไม่ลดมิติ กูทำแค่ plot scatter ตามข้อมูลเดิม 2 มิติ
if X.shape[1] < 2:
raise ValueError("Data must have at least 2 features for plotting without dimensionality reduction.")
plt.figure(figsize=(8, 6))
scatter = plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='tab10', s=30)
plt.title(f"No Dimensionality Reduction - {title}")
plt.colorbar(scatter, label="Cluster ID")
plt.tight_layout()
with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp_img:
plt.savefig(tmp_img.name)
plt.close()
return tmp_img.name
elif method.upper() == "UMAP":
reducer = umap.UMAP(random_state=69)
elif method.upper() == "TSNE":
reducer = TSNE(random_state=69, perplexity=30, max_iter=1000)
else:
raise ValueError(f"Unknown method: {method}. Use 'UMAP', 'TSNE', or 'None'.")
X_embedded = reducer.fit_transform(X)
plt.figure(figsize=(8, 6))
scatter = plt.scatter(X_embedded[:, 0], X_embedded[:, 1], c=labels, cmap='tab10', s=30)
plt.title(f"{method.upper()} - {title}")
plt.colorbar(scatter, label="Cluster ID")
plt.tight_layout()
with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp_img:
plt.savefig(tmp_img.name)
plt.close()
return tmp_img.name
def plot_som(som_model, X_scaled, labels):
"""
Visualize SOM clustering result with U-Matrix + labeled points.
som_model: trained SOM object (เช่น MiniSom)
X_scaled: scaled data array
labels: cluster labels assigned for each point
"""
plt.figure(figsize=(8, 8))
# วาด U-Matrix (distance map)
plt.pcolor(som_model.distance_map().T, cmap='bone_r')
plt.colorbar(label='Distance')
# วาดจุดข้อมูลบน SOM grid
markers = ['o', 's', 'D', '^', 'v', 'p', '*', 'h', 'x', '+'] # marker สำหรับ cluster สูงสุด 10 กลุ่ม
colors = plt.cm.tab10.colors
for cnt, x in enumerate(X_scaled):
w = som_model.winner(x) # ตำแหน่ง node ที่ชนะ (winner neuron)
cluster_id = labels[cnt] - 1 # adjust label to zero-based index
plt.plot(w[0] + 0.5, w[1] + 0.5, markers[cluster_id % len(markers)],
markerfacecolor=colors[cluster_id % len(colors)],
markeredgecolor='k',
markersize=12,
markeredgewidth=1.5)
plt.title("SOM Clustering Visualization (U-Matrix + Clustered Data Points)")
plt.tight_layout()
with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp_img:
plt.savefig(tmp_img.name)
plt.close()
return tmp_img.name
|