Spaces:

Msk7000
/

PCA_Variance_Puzzle_Explorer

Running

File size: 7,545 Bytes

d4798c4

import streamlit as st
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
from sklearn.decomposition import PCA
import os

# --- 1. Font Setting (NotoSansJP) ---
FONT_PATH = "NotoSansJP-Regular.ttf"
if os.path.exists(FONT_PATH):
    jp_font = FontProperties(fname=FONT_PATH)
else:
    jp_font = FontProperties()

plt.rcParams['font.family'] = 'sans-serif'
plt.rcParams['axes.unicode_minus'] = False

# --- 2. Data Generation (Foot Length & Width) ---
np.random.seed(42)
n_samples = 50
length = np.random.normal(250, 10, n_samples)
width = 0.4 * length + np.random.normal(0, 3, n_samples)
data = np.vstack([length, width]).T
data_centered = data - np.mean(data, axis=0)

# Actual PCA Result for validation
pca_model = PCA(n_components=2)
pca_model.fit(data_centered)
true_angle = np.degrees(np.arctan2(pca_model.components_[0, 1], pca_model.components_[0, 0]))

# --- Helper: Bilingual Text Helper ---
def get_bilingual_html(en_text, ja_text, font_size_en="1.8rem", font_size_ja="0.9rem"):
    return f"""

    <div style="margin-bottom: 10px;">

        <div style="font-size: {font_size_en}; font-weight: bold; line-height: 1.1;">{en_text}</div>

        <div style="font-size: {font_size_ja}; color: #7f8c8d; margin-top: 1px;">{ja_text}</div>

    </div>

    """

# --- 3. Streamlit UI ---
st.set_page_config(page_title="PCA Puzzle App", layout="wide")

st.markdown(get_bilingual_html("🧩 PCA Puzzle: Find PC1 and PC2", "主成分分析パズル：第1・第2主成分を探そう", "2.2rem", "1.1rem"), unsafe_allow_html=True)
st.markdown(get_bilingual_html("Find the orthogonal axes that maximize the captured information.", "データのばらつき（情報量）を最も多く回収できる直交軸を探しましょう。", "1.2rem", "0.9rem"), unsafe_allow_html=True)
st.markdown("<br>", unsafe_allow_html=True)

col1, col2 = st.columns([2, 1])

with col2:
    st.markdown(get_bilingual_html("Rotate the PCA Axes", "主成分軸の角度を調整する", "1.4rem", "0.85rem"), unsafe_allow_html=True)
    
    # --- FIXED: Use label_visibility="collapsed" to avoid Streamlit warnings ---
    angle = st.slider(
        "Hidden Label for Slider", 
        min_value=-90.0, max_value=90.0, value=0.0, step=1.0, 
        label_visibility="collapsed"
    )
    
    # Vector calculations
    theta1 = np.radians(angle)
    vector1 = np.array([np.cos(theta1), np.sin(theta1)])
    theta2 = theta1 + np.pi / 2 # Orthogonal axis
    vector2 = np.array([np.cos(theta2), np.sin(theta2)])
    
    # Variance calculations
    total_var = np.sum(np.var(data_centered, axis=0))
    proj1 = data_centered @ vector1
    ratio1 = (np.var(proj1) / total_var) * 100
    proj2 = data_centered @ vector2
    ratio2 = (np.var(proj2) / total_var) * 100
    
    # Metrics display
    st.markdown(f"""

        <div style="background-color: #f8fafc; padding: 20px; border-radius: 12px; border: 1px solid #e2e8f0; margin-top: 10px;">

            <div style="margin-bottom: 15px;">

                <div style="font-size: 1rem; font-weight: bold;">PC1 Variance Ratio (Contribution)</div>

                <div style="font-size: 0.75rem; color: #7f8c8d;">第1主成分の寄与率</div>

                <div style="font-size: 2rem; font-weight: 700; color: #C5A059;">{ratio1:.2f} %</div>

            </div>

            <div style="margin-bottom: 15px; border-top: 1px solid #e2e8f0; padding-top: 15px;">

                <div style="font-size: 1rem; font-weight: bold;">PC2 Variance Ratio (Contribution)</div>

                <div style="font-size: 0.75rem; color: #7f8c8d;">第2主成分の寄与率</div>

                <div style="font-size: 2rem; font-weight: 700; color: #008080;">{ratio2:.2f} %</div>

            </div>

            <div style="background-color: #f1f5f9; margin: 10px -20px -20px -20px; padding: 15px; border-bottom-left-radius: 12px; border-bottom-right-radius: 12px;">

                <div style="font-size: 0.9rem; font-weight: bold; color: #002B49;">Total Cumulative Ratio: {(ratio1+ratio2):.2f}%</div>

            </div>

        </div>

    """, unsafe_allow_html=True)
    
    st.markdown("<br>", unsafe_allow_html=True)
    if st.button("Show Answer / 答えを表示"):
        st.markdown(f"""

            <div style="background-color: #fef9c3; padding: 15px; border-radius: 8px; border: 1px solid #fde047;">

                <div style="font-weight: bold;">Optimal PC1 Angle: {true_angle:.1f}°</div>

                <div style="font-size: 0.8rem; color: #7f8c8d;">最適な第1主成分の角度</div>

            </div>

        """, unsafe_allow_html=True)

with col1:
    fig, ax = plt.subplots(figsize=(8, 8))
    
    # Legend labels
    lbl_data = "Foot Metrics\n足計測データ"
    lbl_pc1 = "PC1 Axis (Main Search)\n第1主成分軸"
    lbl_pc2 = "PC2 Axis (Orthogonal)\n第2主成分軸"
    
    ax.scatter(data_centered[:, 0], data_centered[:, 1], alpha=0.5, c='#002B49', label=lbl_data)
    
    lims = np.array([-35, 35])
    ax.plot(lims, lims * np.tan(theta1), color='#C5A059', linewidth=3, label=lbl_pc1)
    ax.plot(lims, lims * np.tan(theta2), color='#008080', linewidth=2, linestyle='--', label=lbl_pc2)
    
    ax.set_xlabel("Length (Diff)\n長さ (平均との差)", fontproperties=jp_font)
    ax.set_ylabel("Width (Diff)\n幅 (平均との差)", fontproperties=jp_font)
    ax.set_title("PCA Axis Exploration\n主成分軸の探索", fontproperties=jp_font, fontsize=16, pad=15)
    ax.set_xlim(-35, 35); ax.set_ylim(-35, 35)
    ax.grid(True, linestyle='--', alpha=0.5)
    ax.legend(prop=jp_font, loc='upper left', bbox_to_anchor=(1, 1))
    st.pyplot(fig)

# --- 4. Key Learning Points (Bilingual Layout) ---
st.markdown("---")
st.markdown(get_bilingual_html("💡 Key Learning Points", "学習のポイント", "1.6rem", "0.95rem"), unsafe_allow_html=True)

learning_data = [
    ("1. Principle of Orthogonality", "主成分の直交性", "Principal components are always perpendicular to each other. PC2 captures the remaining variance that PC1 cannot explain.", "各主成分は常に互いに直交します。第1主成分で説明しきれなかった「残りのばらつき」を第2主成分が拾い上げます。"),
    ("2. Dimensionality Reduction", "次元圧縮", "By treating the diagonal line as a single number line, we successfully compressed 2D data into 1D.", "斜めの線を1つの数直線（1次元）と考えれば、2次元のデータを1次元に「圧縮」できたことになります。"),
    ("3. Conservation of Variance", "分散の保存則", "Notice how PC1 + PC2 always equals 100%. PCA doesn't lose total information; it merely shifts the perspective.", "軸をどう回してもPC1とPC2の合計は常に100%です。PCAは情報を捨てるのではなく、見方（座標軸）を変えているだけです。")
]

for title_en, title_ja, body_en, body_ja in learning_data:
    st.markdown(f"""

        <div style="margin-bottom: 20px;">

            <div style="font-weight: bold; font-size: 1.15rem;">{title_en}</div>

            <div style="font-size: 0.85rem; color: #7f8c8d; margin-bottom: 4px;">{title_ja}</div>

            <div style="font-size: 1rem; line-height: 1.4;">{body_en}</div>

            <div style="font-size: 0.85rem; color: #7f8c8d; line-height: 1.4;">{body_ja}</div>

        </div>

    """, unsafe_allow_html=True)