import streamlit as st
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
from sklearn.decomposition import PCA
import os
# --- 1. Font Setting (NotoSansJP) ---
FONT_PATH = "NotoSansJP-Regular.ttf"
if os.path.exists(FONT_PATH):
jp_font = FontProperties(fname=FONT_PATH)
else:
jp_font = FontProperties()
plt.rcParams['font.family'] = 'sans-serif'
plt.rcParams['axes.unicode_minus'] = False
# --- 2. Data Generation (Foot Length & Width) ---
np.random.seed(42)
n_samples = 50
length = np.random.normal(250, 10, n_samples)
width = 0.4 * length + np.random.normal(0, 3, n_samples)
data = np.vstack([length, width]).T
data_centered = data - np.mean(data, axis=0)
# Actual PCA Result for validation
pca_model = PCA(n_components=2)
pca_model.fit(data_centered)
true_angle = np.degrees(np.arctan2(pca_model.components_[0, 1], pca_model.components_[0, 0]))
# --- Helper: Bilingual Text Helper ---
def get_bilingual_html(en_text, ja_text, font_size_en="1.8rem", font_size_ja="0.9rem"):
return f"""
"""
# --- 3. Streamlit UI ---
st.set_page_config(page_title="PCA Puzzle App", layout="wide")
st.markdown(get_bilingual_html("🧩 PCA Puzzle: Find PC1 and PC2", "主成分分析パズル:第1・第2主成分を探そう", "2.2rem", "1.1rem"), unsafe_allow_html=True)
st.markdown(get_bilingual_html("Find the orthogonal axes that maximize the captured information.", "データのばらつき(情報量)を最も多く回収できる直交軸を探しましょう。", "1.2rem", "0.9rem"), unsafe_allow_html=True)
st.markdown("
", unsafe_allow_html=True)
col1, col2 = st.columns([2, 1])
with col2:
st.markdown(get_bilingual_html("Rotate the PCA Axes", "主成分軸の角度を調整する", "1.4rem", "0.85rem"), unsafe_allow_html=True)
# --- FIXED: Use label_visibility="collapsed" to avoid Streamlit warnings ---
angle = st.slider(
"Hidden Label for Slider",
min_value=-90.0, max_value=90.0, value=0.0, step=1.0,
label_visibility="collapsed"
)
# Vector calculations
theta1 = np.radians(angle)
vector1 = np.array([np.cos(theta1), np.sin(theta1)])
theta2 = theta1 + np.pi / 2 # Orthogonal axis
vector2 = np.array([np.cos(theta2), np.sin(theta2)])
# Variance calculations
total_var = np.sum(np.var(data_centered, axis=0))
proj1 = data_centered @ vector1
ratio1 = (np.var(proj1) / total_var) * 100
proj2 = data_centered @ vector2
ratio2 = (np.var(proj2) / total_var) * 100
# Metrics display
st.markdown(f"""
PC1 Variance Ratio (Contribution)
第1主成分の寄与率
{ratio1:.2f} %
PC2 Variance Ratio (Contribution)
第2主成分の寄与率
{ratio2:.2f} %
Total Cumulative Ratio: {(ratio1+ratio2):.2f}%
""", unsafe_allow_html=True)
st.markdown("
", unsafe_allow_html=True)
if st.button("Show Answer / 答えを表示"):
st.markdown(f"""
Optimal PC1 Angle: {true_angle:.1f}°
最適な第1主成分の角度
""", unsafe_allow_html=True)
with col1:
fig, ax = plt.subplots(figsize=(8, 8))
# Legend labels
lbl_data = "Foot Metrics\n足計測データ"
lbl_pc1 = "PC1 Axis (Main Search)\n第1主成分軸"
lbl_pc2 = "PC2 Axis (Orthogonal)\n第2主成分軸"
ax.scatter(data_centered[:, 0], data_centered[:, 1], alpha=0.5, c='#002B49', label=lbl_data)
lims = np.array([-35, 35])
ax.plot(lims, lims * np.tan(theta1), color='#C5A059', linewidth=3, label=lbl_pc1)
ax.plot(lims, lims * np.tan(theta2), color='#008080', linewidth=2, linestyle='--', label=lbl_pc2)
ax.set_xlabel("Length (Diff)\n長さ (平均との差)", fontproperties=jp_font)
ax.set_ylabel("Width (Diff)\n幅 (平均との差)", fontproperties=jp_font)
ax.set_title("PCA Axis Exploration\n主成分軸の探索", fontproperties=jp_font, fontsize=16, pad=15)
ax.set_xlim(-35, 35); ax.set_ylim(-35, 35)
ax.grid(True, linestyle='--', alpha=0.5)
ax.legend(prop=jp_font, loc='upper left', bbox_to_anchor=(1, 1))
st.pyplot(fig)
# --- 4. Key Learning Points (Bilingual Layout) ---
st.markdown("---")
st.markdown(get_bilingual_html("💡 Key Learning Points", "学習のポイント", "1.6rem", "0.95rem"), unsafe_allow_html=True)
learning_data = [
("1. Principle of Orthogonality", "主成分の直交性", "Principal components are always perpendicular to each other. PC2 captures the remaining variance that PC1 cannot explain.", "各主成分は常に互いに直交します。第1主成分で説明しきれなかった「残りのばらつき」を第2主成分が拾い上げます。"),
("2. Dimensionality Reduction", "次元圧縮", "By treating the diagonal line as a single number line, we successfully compressed 2D data into 1D.", "斜めの線を1つの数直線(1次元)と考えれば、2次元のデータを1次元に「圧縮」できたことになります。"),
("3. Conservation of Variance", "分散の保存則", "Notice how PC1 + PC2 always equals 100%. PCA doesn't lose total information; it merely shifts the perspective.", "軸をどう回してもPC1とPC2の合計は常に100%です。PCAは情報を捨てるのではなく、見方(座標軸)を変えているだけです。")
]
for title_en, title_ja, body_en, body_ja in learning_data:
st.markdown(f"""
{title_en}
{title_ja}
{body_en}
{body_ja}
""", unsafe_allow_html=True)