import streamlit as st import numpy as np import pandas as pd import matplotlib.pyplot as plt from matplotlib.font_manager import FontProperties from sklearn.decomposition import PCA import os # --- 1. Font Setting (NotoSansJP) --- FONT_PATH = "NotoSansJP-Regular.ttf" if os.path.exists(FONT_PATH): jp_font = FontProperties(fname=FONT_PATH) else: jp_font = FontProperties() plt.rcParams['font.family'] = 'sans-serif' plt.rcParams['axes.unicode_minus'] = False # --- 2. Data Generation (Foot Length & Width) --- np.random.seed(42) n_samples = 50 length = np.random.normal(250, 10, n_samples) width = 0.4 * length + np.random.normal(0, 3, n_samples) data = np.vstack([length, width]).T data_centered = data - np.mean(data, axis=0) # Actual PCA Result for validation pca_model = PCA(n_components=2) pca_model.fit(data_centered) true_angle = np.degrees(np.arctan2(pca_model.components_[0, 1], pca_model.components_[0, 0])) # --- Helper: Bilingual Text Helper --- def get_bilingual_html(en_text, ja_text, font_size_en="1.8rem", font_size_ja="0.9rem"): return f"""
{en_text}
{ja_text}
""" # --- 3. Streamlit UI --- st.set_page_config(page_title="PCA Puzzle App", layout="wide") st.markdown(get_bilingual_html("🧩 PCA Puzzle: Find PC1 and PC2", "主成分分析パズル:第1・第2主成分を探そう", "2.2rem", "1.1rem"), unsafe_allow_html=True) st.markdown(get_bilingual_html("Find the orthogonal axes that maximize the captured information.", "データのばらつき(情報量)を最も多く回収できる直交軸を探しましょう。", "1.2rem", "0.9rem"), unsafe_allow_html=True) st.markdown("
", unsafe_allow_html=True) col1, col2 = st.columns([2, 1]) with col2: st.markdown(get_bilingual_html("Rotate the PCA Axes", "主成分軸の角度を調整する", "1.4rem", "0.85rem"), unsafe_allow_html=True) # --- FIXED: Use label_visibility="collapsed" to avoid Streamlit warnings --- angle = st.slider( "Hidden Label for Slider", min_value=-90.0, max_value=90.0, value=0.0, step=1.0, label_visibility="collapsed" ) # Vector calculations theta1 = np.radians(angle) vector1 = np.array([np.cos(theta1), np.sin(theta1)]) theta2 = theta1 + np.pi / 2 # Orthogonal axis vector2 = np.array([np.cos(theta2), np.sin(theta2)]) # Variance calculations total_var = np.sum(np.var(data_centered, axis=0)) proj1 = data_centered @ vector1 ratio1 = (np.var(proj1) / total_var) * 100 proj2 = data_centered @ vector2 ratio2 = (np.var(proj2) / total_var) * 100 # Metrics display st.markdown(f"""
PC1 Variance Ratio (Contribution)
第1主成分の寄与率
{ratio1:.2f} %
PC2 Variance Ratio (Contribution)
第2主成分の寄与率
{ratio2:.2f} %
Total Cumulative Ratio: {(ratio1+ratio2):.2f}%
""", unsafe_allow_html=True) st.markdown("
", unsafe_allow_html=True) if st.button("Show Answer / 答えを表示"): st.markdown(f"""
Optimal PC1 Angle: {true_angle:.1f}°
最適な第1主成分の角度
""", unsafe_allow_html=True) with col1: fig, ax = plt.subplots(figsize=(8, 8)) # Legend labels lbl_data = "Foot Metrics\n足計測データ" lbl_pc1 = "PC1 Axis (Main Search)\n第1主成分軸" lbl_pc2 = "PC2 Axis (Orthogonal)\n第2主成分軸" ax.scatter(data_centered[:, 0], data_centered[:, 1], alpha=0.5, c='#002B49', label=lbl_data) lims = np.array([-35, 35]) ax.plot(lims, lims * np.tan(theta1), color='#C5A059', linewidth=3, label=lbl_pc1) ax.plot(lims, lims * np.tan(theta2), color='#008080', linewidth=2, linestyle='--', label=lbl_pc2) ax.set_xlabel("Length (Diff)\n長さ (平均との差)", fontproperties=jp_font) ax.set_ylabel("Width (Diff)\n幅 (平均との差)", fontproperties=jp_font) ax.set_title("PCA Axis Exploration\n主成分軸の探索", fontproperties=jp_font, fontsize=16, pad=15) ax.set_xlim(-35, 35); ax.set_ylim(-35, 35) ax.grid(True, linestyle='--', alpha=0.5) ax.legend(prop=jp_font, loc='upper left', bbox_to_anchor=(1, 1)) st.pyplot(fig) # --- 4. Key Learning Points (Bilingual Layout) --- st.markdown("---") st.markdown(get_bilingual_html("💡 Key Learning Points", "学習のポイント", "1.6rem", "0.95rem"), unsafe_allow_html=True) learning_data = [ ("1. Principle of Orthogonality", "主成分の直交性", "Principal components are always perpendicular to each other. PC2 captures the remaining variance that PC1 cannot explain.", "各主成分は常に互いに直交します。第1主成分で説明しきれなかった「残りのばらつき」を第2主成分が拾い上げます。"), ("2. Dimensionality Reduction", "次元圧縮", "By treating the diagonal line as a single number line, we successfully compressed 2D data into 1D.", "斜めの線を1つの数直線(1次元)と考えれば、2次元のデータを1次元に「圧縮」できたことになります。"), ("3. Conservation of Variance", "分散の保存則", "Notice how PC1 + PC2 always equals 100%. PCA doesn't lose total information; it merely shifts the perspective.", "軸をどう回してもPC1とPC2の合計は常に100%です。PCAは情報を捨てるのではなく、見方(座標軸)を変えているだけです。") ] for title_en, title_ja, body_en, body_ja in learning_data: st.markdown(f"""
{title_en}
{title_ja}
{body_en}
{body_ja}
""", unsafe_allow_html=True)