Msk7000's picture
Upload 5 files
d4798c4 verified
import streamlit as st
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
from sklearn.decomposition import PCA
import os
# --- 1. Font Setting (NotoSansJP) ---
FONT_PATH = "NotoSansJP-Regular.ttf"
if os.path.exists(FONT_PATH):
jp_font = FontProperties(fname=FONT_PATH)
else:
jp_font = FontProperties()
plt.rcParams['font.family'] = 'sans-serif'
plt.rcParams['axes.unicode_minus'] = False
# --- 2. Data Generation (Foot Length & Width) ---
np.random.seed(42)
n_samples = 50
length = np.random.normal(250, 10, n_samples)
width = 0.4 * length + np.random.normal(0, 3, n_samples)
data = np.vstack([length, width]).T
data_centered = data - np.mean(data, axis=0)
# Actual PCA Result for validation
pca_model = PCA(n_components=2)
pca_model.fit(data_centered)
true_angle = np.degrees(np.arctan2(pca_model.components_[0, 1], pca_model.components_[0, 0]))
# --- Helper: Bilingual Text Helper ---
def get_bilingual_html(en_text, ja_text, font_size_en="1.8rem", font_size_ja="0.9rem"):
return f"""
<div style="margin-bottom: 10px;">
<div style="font-size: {font_size_en}; font-weight: bold; line-height: 1.1;">{en_text}</div>
<div style="font-size: {font_size_ja}; color: #7f8c8d; margin-top: 1px;">{ja_text}</div>
</div>
"""
# --- 3. Streamlit UI ---
st.set_page_config(page_title="PCA Puzzle App", layout="wide")
st.markdown(get_bilingual_html("🧩 PCA Puzzle: Find PC1 and PC2", "主成分分析パズル:第1・第2主成分を探そう", "2.2rem", "1.1rem"), unsafe_allow_html=True)
st.markdown(get_bilingual_html("Find the orthogonal axes that maximize the captured information.", "データのばらつき(情報量)を最も多く回収できる直交軸を探しましょう。", "1.2rem", "0.9rem"), unsafe_allow_html=True)
st.markdown("<br>", unsafe_allow_html=True)
col1, col2 = st.columns([2, 1])
with col2:
st.markdown(get_bilingual_html("Rotate the PCA Axes", "主成分軸の角度を調整する", "1.4rem", "0.85rem"), unsafe_allow_html=True)
# --- FIXED: Use label_visibility="collapsed" to avoid Streamlit warnings ---
angle = st.slider(
"Hidden Label for Slider",
min_value=-90.0, max_value=90.0, value=0.0, step=1.0,
label_visibility="collapsed"
)
# Vector calculations
theta1 = np.radians(angle)
vector1 = np.array([np.cos(theta1), np.sin(theta1)])
theta2 = theta1 + np.pi / 2 # Orthogonal axis
vector2 = np.array([np.cos(theta2), np.sin(theta2)])
# Variance calculations
total_var = np.sum(np.var(data_centered, axis=0))
proj1 = data_centered @ vector1
ratio1 = (np.var(proj1) / total_var) * 100
proj2 = data_centered @ vector2
ratio2 = (np.var(proj2) / total_var) * 100
# Metrics display
st.markdown(f"""
<div style="background-color: #f8fafc; padding: 20px; border-radius: 12px; border: 1px solid #e2e8f0; margin-top: 10px;">
<div style="margin-bottom: 15px;">
<div style="font-size: 1rem; font-weight: bold;">PC1 Variance Ratio (Contribution)</div>
<div style="font-size: 0.75rem; color: #7f8c8d;">第1主成分の寄与率</div>
<div style="font-size: 2rem; font-weight: 700; color: #C5A059;">{ratio1:.2f} %</div>
</div>
<div style="margin-bottom: 15px; border-top: 1px solid #e2e8f0; padding-top: 15px;">
<div style="font-size: 1rem; font-weight: bold;">PC2 Variance Ratio (Contribution)</div>
<div style="font-size: 0.75rem; color: #7f8c8d;">第2主成分の寄与率</div>
<div style="font-size: 2rem; font-weight: 700; color: #008080;">{ratio2:.2f} %</div>
</div>
<div style="background-color: #f1f5f9; margin: 10px -20px -20px -20px; padding: 15px; border-bottom-left-radius: 12px; border-bottom-right-radius: 12px;">
<div style="font-size: 0.9rem; font-weight: bold; color: #002B49;">Total Cumulative Ratio: {(ratio1+ratio2):.2f}%</div>
</div>
</div>
""", unsafe_allow_html=True)
st.markdown("<br>", unsafe_allow_html=True)
if st.button("Show Answer / 答えを表示"):
st.markdown(f"""
<div style="background-color: #fef9c3; padding: 15px; border-radius: 8px; border: 1px solid #fde047;">
<div style="font-weight: bold;">Optimal PC1 Angle: {true_angle:.1f}°</div>
<div style="font-size: 0.8rem; color: #7f8c8d;">最適な第1主成分の角度</div>
</div>
""", unsafe_allow_html=True)
with col1:
fig, ax = plt.subplots(figsize=(8, 8))
# Legend labels
lbl_data = "Foot Metrics\n足計測データ"
lbl_pc1 = "PC1 Axis (Main Search)\n第1主成分軸"
lbl_pc2 = "PC2 Axis (Orthogonal)\n第2主成分軸"
ax.scatter(data_centered[:, 0], data_centered[:, 1], alpha=0.5, c='#002B49', label=lbl_data)
lims = np.array([-35, 35])
ax.plot(lims, lims * np.tan(theta1), color='#C5A059', linewidth=3, label=lbl_pc1)
ax.plot(lims, lims * np.tan(theta2), color='#008080', linewidth=2, linestyle='--', label=lbl_pc2)
ax.set_xlabel("Length (Diff)\n長さ (平均との差)", fontproperties=jp_font)
ax.set_ylabel("Width (Diff)\n幅 (平均との差)", fontproperties=jp_font)
ax.set_title("PCA Axis Exploration\n主成分軸の探索", fontproperties=jp_font, fontsize=16, pad=15)
ax.set_xlim(-35, 35); ax.set_ylim(-35, 35)
ax.grid(True, linestyle='--', alpha=0.5)
ax.legend(prop=jp_font, loc='upper left', bbox_to_anchor=(1, 1))
st.pyplot(fig)
# --- 4. Key Learning Points (Bilingual Layout) ---
st.markdown("---")
st.markdown(get_bilingual_html("💡 Key Learning Points", "学習のポイント", "1.6rem", "0.95rem"), unsafe_allow_html=True)
learning_data = [
("1. Principle of Orthogonality", "主成分の直交性", "Principal components are always perpendicular to each other. PC2 captures the remaining variance that PC1 cannot explain.", "各主成分は常に互いに直交します。第1主成分で説明しきれなかった「残りのばらつき」を第2主成分が拾い上げます。"),
("2. Dimensionality Reduction", "次元圧縮", "By treating the diagonal line as a single number line, we successfully compressed 2D data into 1D.", "斜めの線を1つの数直線(1次元)と考えれば、2次元のデータを1次元に「圧縮」できたことになります。"),
("3. Conservation of Variance", "分散の保存則", "Notice how PC1 + PC2 always equals 100%. PCA doesn't lose total information; it merely shifts the perspective.", "軸をどう回してもPC1とPC2の合計は常に100%です。PCAは情報を捨てるのではなく、見方(座標軸)を変えているだけです。")
]
for title_en, title_ja, body_en, body_ja in learning_data:
st.markdown(f"""
<div style="margin-bottom: 20px;">
<div style="font-weight: bold; font-size: 1.15rem;">{title_en}</div>
<div style="font-size: 0.85rem; color: #7f8c8d; margin-bottom: 4px;">{title_ja}</div>
<div style="font-size: 1rem; line-height: 1.4;">{body_en}</div>
<div style="font-size: 0.85rem; color: #7f8c8d; line-height: 1.4;">{body_ja}</div>
</div>
""", unsafe_allow_html=True)