| import streamlit as st
|
| import numpy as np
|
| import pandas as pd
|
| import matplotlib.pyplot as plt
|
| from matplotlib.font_manager import FontProperties
|
| from sklearn.decomposition import PCA
|
| import os
|
|
|
|
|
| FONT_PATH = "NotoSansJP-Regular.ttf"
|
| if os.path.exists(FONT_PATH):
|
| jp_font = FontProperties(fname=FONT_PATH)
|
| else:
|
| jp_font = FontProperties()
|
|
|
| plt.rcParams['font.family'] = 'sans-serif'
|
| plt.rcParams['axes.unicode_minus'] = False
|
|
|
|
|
| np.random.seed(42)
|
| n_samples = 50
|
| length = np.random.normal(250, 10, n_samples)
|
| width = 0.4 * length + np.random.normal(0, 3, n_samples)
|
| data = np.vstack([length, width]).T
|
| data_centered = data - np.mean(data, axis=0)
|
|
|
|
|
| pca_model = PCA(n_components=2)
|
| pca_model.fit(data_centered)
|
| true_angle = np.degrees(np.arctan2(pca_model.components_[0, 1], pca_model.components_[0, 0]))
|
|
|
|
|
| def get_bilingual_html(en_text, ja_text, font_size_en="1.8rem", font_size_ja="0.9rem"):
|
| return f"""
|
| <div style="margin-bottom: 10px;">
|
| <div style="font-size: {font_size_en}; font-weight: bold; line-height: 1.1;">{en_text}</div>
|
| <div style="font-size: {font_size_ja}; color: #7f8c8d; margin-top: 1px;">{ja_text}</div>
|
| </div>
|
| """
|
|
|
|
|
| st.set_page_config(page_title="PCA Puzzle App", layout="wide")
|
|
|
| st.markdown(get_bilingual_html("🧩 PCA Puzzle: Find PC1 and PC2", "主成分分析パズル:第1・第2主成分を探そう", "2.2rem", "1.1rem"), unsafe_allow_html=True)
|
| st.markdown(get_bilingual_html("Find the orthogonal axes that maximize the captured information.", "データのばらつき(情報量)を最も多く回収できる直交軸を探しましょう。", "1.2rem", "0.9rem"), unsafe_allow_html=True)
|
| st.markdown("<br>", unsafe_allow_html=True)
|
|
|
| col1, col2 = st.columns([2, 1])
|
|
|
| with col2:
|
| st.markdown(get_bilingual_html("Rotate the PCA Axes", "主成分軸の角度を調整する", "1.4rem", "0.85rem"), unsafe_allow_html=True)
|
|
|
|
|
| angle = st.slider(
|
| "Hidden Label for Slider",
|
| min_value=-90.0, max_value=90.0, value=0.0, step=1.0,
|
| label_visibility="collapsed"
|
| )
|
|
|
|
|
| theta1 = np.radians(angle)
|
| vector1 = np.array([np.cos(theta1), np.sin(theta1)])
|
| theta2 = theta1 + np.pi / 2
|
| vector2 = np.array([np.cos(theta2), np.sin(theta2)])
|
|
|
|
|
| total_var = np.sum(np.var(data_centered, axis=0))
|
| proj1 = data_centered @ vector1
|
| ratio1 = (np.var(proj1) / total_var) * 100
|
| proj2 = data_centered @ vector2
|
| ratio2 = (np.var(proj2) / total_var) * 100
|
|
|
|
|
| st.markdown(f"""
|
| <div style="background-color: #f8fafc; padding: 20px; border-radius: 12px; border: 1px solid #e2e8f0; margin-top: 10px;">
|
| <div style="margin-bottom: 15px;">
|
| <div style="font-size: 1rem; font-weight: bold;">PC1 Variance Ratio (Contribution)</div>
|
| <div style="font-size: 0.75rem; color: #7f8c8d;">第1主成分の寄与率</div>
|
| <div style="font-size: 2rem; font-weight: 700; color: #C5A059;">{ratio1:.2f} %</div>
|
| </div>
|
| <div style="margin-bottom: 15px; border-top: 1px solid #e2e8f0; padding-top: 15px;">
|
| <div style="font-size: 1rem; font-weight: bold;">PC2 Variance Ratio (Contribution)</div>
|
| <div style="font-size: 0.75rem; color: #7f8c8d;">第2主成分の寄与率</div>
|
| <div style="font-size: 2rem; font-weight: 700; color: #008080;">{ratio2:.2f} %</div>
|
| </div>
|
| <div style="background-color: #f1f5f9; margin: 10px -20px -20px -20px; padding: 15px; border-bottom-left-radius: 12px; border-bottom-right-radius: 12px;">
|
| <div style="font-size: 0.9rem; font-weight: bold; color: #002B49;">Total Cumulative Ratio: {(ratio1+ratio2):.2f}%</div>
|
| </div>
|
| </div>
|
| """, unsafe_allow_html=True)
|
|
|
| st.markdown("<br>", unsafe_allow_html=True)
|
| if st.button("Show Answer / 答えを表示"):
|
| st.markdown(f"""
|
| <div style="background-color: #fef9c3; padding: 15px; border-radius: 8px; border: 1px solid #fde047;">
|
| <div style="font-weight: bold;">Optimal PC1 Angle: {true_angle:.1f}°</div>
|
| <div style="font-size: 0.8rem; color: #7f8c8d;">最適な第1主成分の角度</div>
|
| </div>
|
| """, unsafe_allow_html=True)
|
|
|
| with col1:
|
| fig, ax = plt.subplots(figsize=(8, 8))
|
|
|
|
|
| lbl_data = "Foot Metrics\n足計測データ"
|
| lbl_pc1 = "PC1 Axis (Main Search)\n第1主成分軸"
|
| lbl_pc2 = "PC2 Axis (Orthogonal)\n第2主成分軸"
|
|
|
| ax.scatter(data_centered[:, 0], data_centered[:, 1], alpha=0.5, c='#002B49', label=lbl_data)
|
|
|
| lims = np.array([-35, 35])
|
| ax.plot(lims, lims * np.tan(theta1), color='#C5A059', linewidth=3, label=lbl_pc1)
|
| ax.plot(lims, lims * np.tan(theta2), color='#008080', linewidth=2, linestyle='--', label=lbl_pc2)
|
|
|
| ax.set_xlabel("Length (Diff)\n長さ (平均との差)", fontproperties=jp_font)
|
| ax.set_ylabel("Width (Diff)\n幅 (平均との差)", fontproperties=jp_font)
|
| ax.set_title("PCA Axis Exploration\n主成分軸の探索", fontproperties=jp_font, fontsize=16, pad=15)
|
| ax.set_xlim(-35, 35); ax.set_ylim(-35, 35)
|
| ax.grid(True, linestyle='--', alpha=0.5)
|
| ax.legend(prop=jp_font, loc='upper left', bbox_to_anchor=(1, 1))
|
| st.pyplot(fig)
|
|
|
|
|
| st.markdown("---")
|
| st.markdown(get_bilingual_html("💡 Key Learning Points", "学習のポイント", "1.6rem", "0.95rem"), unsafe_allow_html=True)
|
|
|
| learning_data = [
|
| ("1. Principle of Orthogonality", "主成分の直交性", "Principal components are always perpendicular to each other. PC2 captures the remaining variance that PC1 cannot explain.", "各主成分は常に互いに直交します。第1主成分で説明しきれなかった「残りのばらつき」を第2主成分が拾い上げます。"),
|
| ("2. Dimensionality Reduction", "次元圧縮", "By treating the diagonal line as a single number line, we successfully compressed 2D data into 1D.", "斜めの線を1つの数直線(1次元)と考えれば、2次元のデータを1次元に「圧縮」できたことになります。"),
|
| ("3. Conservation of Variance", "分散の保存則", "Notice how PC1 + PC2 always equals 100%. PCA doesn't lose total information; it merely shifts the perspective.", "軸をどう回してもPC1とPC2の合計は常に100%です。PCAは情報を捨てるのではなく、見方(座標軸)を変えているだけです。")
|
| ]
|
|
|
| for title_en, title_ja, body_en, body_ja in learning_data:
|
| st.markdown(f"""
|
| <div style="margin-bottom: 20px;">
|
| <div style="font-weight: bold; font-size: 1.15rem;">{title_en}</div>
|
| <div style="font-size: 0.85rem; color: #7f8c8d; margin-bottom: 4px;">{title_ja}</div>
|
| <div style="font-size: 1rem; line-height: 1.4;">{body_en}</div>
|
| <div style="font-size: 0.85rem; color: #7f8c8d; line-height: 1.4;">{body_ja}</div>
|
| </div>
|
| """, unsafe_allow_html=True)
|
|
|
|
|