File size: 7,545 Bytes
d4798c4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 | import streamlit as st
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
from sklearn.decomposition import PCA
import os
# --- 1. Font Setting (NotoSansJP) ---
FONT_PATH = "NotoSansJP-Regular.ttf"
if os.path.exists(FONT_PATH):
jp_font = FontProperties(fname=FONT_PATH)
else:
jp_font = FontProperties()
plt.rcParams['font.family'] = 'sans-serif'
plt.rcParams['axes.unicode_minus'] = False
# --- 2. Data Generation (Foot Length & Width) ---
np.random.seed(42)
n_samples = 50
length = np.random.normal(250, 10, n_samples)
width = 0.4 * length + np.random.normal(0, 3, n_samples)
data = np.vstack([length, width]).T
data_centered = data - np.mean(data, axis=0)
# Actual PCA Result for validation
pca_model = PCA(n_components=2)
pca_model.fit(data_centered)
true_angle = np.degrees(np.arctan2(pca_model.components_[0, 1], pca_model.components_[0, 0]))
# --- Helper: Bilingual Text Helper ---
def get_bilingual_html(en_text, ja_text, font_size_en="1.8rem", font_size_ja="0.9rem"):
return f"""
<div style="margin-bottom: 10px;">
<div style="font-size: {font_size_en}; font-weight: bold; line-height: 1.1;">{en_text}</div>
<div style="font-size: {font_size_ja}; color: #7f8c8d; margin-top: 1px;">{ja_text}</div>
</div>
"""
# --- 3. Streamlit UI ---
st.set_page_config(page_title="PCA Puzzle App", layout="wide")
st.markdown(get_bilingual_html("🧩 PCA Puzzle: Find PC1 and PC2", "主成分分析パズル:第1・第2主成分を探そう", "2.2rem", "1.1rem"), unsafe_allow_html=True)
st.markdown(get_bilingual_html("Find the orthogonal axes that maximize the captured information.", "データのばらつき(情報量)を最も多く回収できる直交軸を探しましょう。", "1.2rem", "0.9rem"), unsafe_allow_html=True)
st.markdown("<br>", unsafe_allow_html=True)
col1, col2 = st.columns([2, 1])
with col2:
st.markdown(get_bilingual_html("Rotate the PCA Axes", "主成分軸の角度を調整する", "1.4rem", "0.85rem"), unsafe_allow_html=True)
# --- FIXED: Use label_visibility="collapsed" to avoid Streamlit warnings ---
angle = st.slider(
"Hidden Label for Slider",
min_value=-90.0, max_value=90.0, value=0.0, step=1.0,
label_visibility="collapsed"
)
# Vector calculations
theta1 = np.radians(angle)
vector1 = np.array([np.cos(theta1), np.sin(theta1)])
theta2 = theta1 + np.pi / 2 # Orthogonal axis
vector2 = np.array([np.cos(theta2), np.sin(theta2)])
# Variance calculations
total_var = np.sum(np.var(data_centered, axis=0))
proj1 = data_centered @ vector1
ratio1 = (np.var(proj1) / total_var) * 100
proj2 = data_centered @ vector2
ratio2 = (np.var(proj2) / total_var) * 100
# Metrics display
st.markdown(f"""
<div style="background-color: #f8fafc; padding: 20px; border-radius: 12px; border: 1px solid #e2e8f0; margin-top: 10px;">
<div style="margin-bottom: 15px;">
<div style="font-size: 1rem; font-weight: bold;">PC1 Variance Ratio (Contribution)</div>
<div style="font-size: 0.75rem; color: #7f8c8d;">第1主成分の寄与率</div>
<div style="font-size: 2rem; font-weight: 700; color: #C5A059;">{ratio1:.2f} %</div>
</div>
<div style="margin-bottom: 15px; border-top: 1px solid #e2e8f0; padding-top: 15px;">
<div style="font-size: 1rem; font-weight: bold;">PC2 Variance Ratio (Contribution)</div>
<div style="font-size: 0.75rem; color: #7f8c8d;">第2主成分の寄与率</div>
<div style="font-size: 2rem; font-weight: 700; color: #008080;">{ratio2:.2f} %</div>
</div>
<div style="background-color: #f1f5f9; margin: 10px -20px -20px -20px; padding: 15px; border-bottom-left-radius: 12px; border-bottom-right-radius: 12px;">
<div style="font-size: 0.9rem; font-weight: bold; color: #002B49;">Total Cumulative Ratio: {(ratio1+ratio2):.2f}%</div>
</div>
</div>
""", unsafe_allow_html=True)
st.markdown("<br>", unsafe_allow_html=True)
if st.button("Show Answer / 答えを表示"):
st.markdown(f"""
<div style="background-color: #fef9c3; padding: 15px; border-radius: 8px; border: 1px solid #fde047;">
<div style="font-weight: bold;">Optimal PC1 Angle: {true_angle:.1f}°</div>
<div style="font-size: 0.8rem; color: #7f8c8d;">最適な第1主成分の角度</div>
</div>
""", unsafe_allow_html=True)
with col1:
fig, ax = plt.subplots(figsize=(8, 8))
# Legend labels
lbl_data = "Foot Metrics\n足計測データ"
lbl_pc1 = "PC1 Axis (Main Search)\n第1主成分軸"
lbl_pc2 = "PC2 Axis (Orthogonal)\n第2主成分軸"
ax.scatter(data_centered[:, 0], data_centered[:, 1], alpha=0.5, c='#002B49', label=lbl_data)
lims = np.array([-35, 35])
ax.plot(lims, lims * np.tan(theta1), color='#C5A059', linewidth=3, label=lbl_pc1)
ax.plot(lims, lims * np.tan(theta2), color='#008080', linewidth=2, linestyle='--', label=lbl_pc2)
ax.set_xlabel("Length (Diff)\n長さ (平均との差)", fontproperties=jp_font)
ax.set_ylabel("Width (Diff)\n幅 (平均との差)", fontproperties=jp_font)
ax.set_title("PCA Axis Exploration\n主成分軸の探索", fontproperties=jp_font, fontsize=16, pad=15)
ax.set_xlim(-35, 35); ax.set_ylim(-35, 35)
ax.grid(True, linestyle='--', alpha=0.5)
ax.legend(prop=jp_font, loc='upper left', bbox_to_anchor=(1, 1))
st.pyplot(fig)
# --- 4. Key Learning Points (Bilingual Layout) ---
st.markdown("---")
st.markdown(get_bilingual_html("💡 Key Learning Points", "学習のポイント", "1.6rem", "0.95rem"), unsafe_allow_html=True)
learning_data = [
("1. Principle of Orthogonality", "主成分の直交性", "Principal components are always perpendicular to each other. PC2 captures the remaining variance that PC1 cannot explain.", "各主成分は常に互いに直交します。第1主成分で説明しきれなかった「残りのばらつき」を第2主成分が拾い上げます。"),
("2. Dimensionality Reduction", "次元圧縮", "By treating the diagonal line as a single number line, we successfully compressed 2D data into 1D.", "斜めの線を1つの数直線(1次元)と考えれば、2次元のデータを1次元に「圧縮」できたことになります。"),
("3. Conservation of Variance", "分散の保存則", "Notice how PC1 + PC2 always equals 100%. PCA doesn't lose total information; it merely shifts the perspective.", "軸をどう回してもPC1とPC2の合計は常に100%です。PCAは情報を捨てるのではなく、見方(座標軸)を変えているだけです。")
]
for title_en, title_ja, body_en, body_ja in learning_data:
st.markdown(f"""
<div style="margin-bottom: 20px;">
<div style="font-weight: bold; font-size: 1.15rem;">{title_en}</div>
<div style="font-size: 0.85rem; color: #7f8c8d; margin-bottom: 4px;">{title_ja}</div>
<div style="font-size: 1rem; line-height: 1.4;">{body_en}</div>
<div style="font-size: 0.85rem; color: #7f8c8d; line-height: 1.4;">{body_ja}</div>
</div>
""", unsafe_allow_html=True)
|