File size: 7,545 Bytes
d4798c4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import streamlit as st
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
from sklearn.decomposition import PCA
import os

# --- 1. Font Setting (NotoSansJP) ---
FONT_PATH = "NotoSansJP-Regular.ttf"
if os.path.exists(FONT_PATH):
    jp_font = FontProperties(fname=FONT_PATH)
else:
    jp_font = FontProperties()

plt.rcParams['font.family'] = 'sans-serif'
plt.rcParams['axes.unicode_minus'] = False

# --- 2. Data Generation (Foot Length & Width) ---
np.random.seed(42)
n_samples = 50
length = np.random.normal(250, 10, n_samples)
width = 0.4 * length + np.random.normal(0, 3, n_samples)
data = np.vstack([length, width]).T
data_centered = data - np.mean(data, axis=0)

# Actual PCA Result for validation
pca_model = PCA(n_components=2)
pca_model.fit(data_centered)
true_angle = np.degrees(np.arctan2(pca_model.components_[0, 1], pca_model.components_[0, 0]))

# --- Helper: Bilingual Text Helper ---
def get_bilingual_html(en_text, ja_text, font_size_en="1.8rem", font_size_ja="0.9rem"):
    return f"""

    <div style="margin-bottom: 10px;">

        <div style="font-size: {font_size_en}; font-weight: bold; line-height: 1.1;">{en_text}</div>

        <div style="font-size: {font_size_ja}; color: #7f8c8d; margin-top: 1px;">{ja_text}</div>

    </div>

    """

# --- 3. Streamlit UI ---
st.set_page_config(page_title="PCA Puzzle App", layout="wide")

st.markdown(get_bilingual_html("🧩 PCA Puzzle: Find PC1 and PC2", "主成分分析パズル:第1・第2主成分を探そう", "2.2rem", "1.1rem"), unsafe_allow_html=True)
st.markdown(get_bilingual_html("Find the orthogonal axes that maximize the captured information.", "データのばらつき(情報量)を最も多く回収できる直交軸を探しましょう。", "1.2rem", "0.9rem"), unsafe_allow_html=True)
st.markdown("<br>", unsafe_allow_html=True)

col1, col2 = st.columns([2, 1])

with col2:
    st.markdown(get_bilingual_html("Rotate the PCA Axes", "主成分軸の角度を調整する", "1.4rem", "0.85rem"), unsafe_allow_html=True)
    
    # --- FIXED: Use label_visibility="collapsed" to avoid Streamlit warnings ---
    angle = st.slider(
        "Hidden Label for Slider", 
        min_value=-90.0, max_value=90.0, value=0.0, step=1.0, 
        label_visibility="collapsed"
    )
    
    # Vector calculations
    theta1 = np.radians(angle)
    vector1 = np.array([np.cos(theta1), np.sin(theta1)])
    theta2 = theta1 + np.pi / 2 # Orthogonal axis
    vector2 = np.array([np.cos(theta2), np.sin(theta2)])
    
    # Variance calculations
    total_var = np.sum(np.var(data_centered, axis=0))
    proj1 = data_centered @ vector1
    ratio1 = (np.var(proj1) / total_var) * 100
    proj2 = data_centered @ vector2
    ratio2 = (np.var(proj2) / total_var) * 100
    
    # Metrics display
    st.markdown(f"""

        <div style="background-color: #f8fafc; padding: 20px; border-radius: 12px; border: 1px solid #e2e8f0; margin-top: 10px;">

            <div style="margin-bottom: 15px;">

                <div style="font-size: 1rem; font-weight: bold;">PC1 Variance Ratio (Contribution)</div>

                <div style="font-size: 0.75rem; color: #7f8c8d;">第1主成分の寄与率</div>

                <div style="font-size: 2rem; font-weight: 700; color: #C5A059;">{ratio1:.2f} %</div>

            </div>

            <div style="margin-bottom: 15px; border-top: 1px solid #e2e8f0; padding-top: 15px;">

                <div style="font-size: 1rem; font-weight: bold;">PC2 Variance Ratio (Contribution)</div>

                <div style="font-size: 0.75rem; color: #7f8c8d;">第2主成分の寄与率</div>

                <div style="font-size: 2rem; font-weight: 700; color: #008080;">{ratio2:.2f} %</div>

            </div>

            <div style="background-color: #f1f5f9; margin: 10px -20px -20px -20px; padding: 15px; border-bottom-left-radius: 12px; border-bottom-right-radius: 12px;">

                <div style="font-size: 0.9rem; font-weight: bold; color: #002B49;">Total Cumulative Ratio: {(ratio1+ratio2):.2f}%</div>

            </div>

        </div>

    """, unsafe_allow_html=True)
    
    st.markdown("<br>", unsafe_allow_html=True)
    if st.button("Show Answer / 答えを表示"):
        st.markdown(f"""

            <div style="background-color: #fef9c3; padding: 15px; border-radius: 8px; border: 1px solid #fde047;">

                <div style="font-weight: bold;">Optimal PC1 Angle: {true_angle:.1f}°</div>

                <div style="font-size: 0.8rem; color: #7f8c8d;">最適な第1主成分の角度</div>

            </div>

        """, unsafe_allow_html=True)

with col1:
    fig, ax = plt.subplots(figsize=(8, 8))
    
    # Legend labels
    lbl_data = "Foot Metrics\n足計測データ"
    lbl_pc1 = "PC1 Axis (Main Search)\n第1主成分軸"
    lbl_pc2 = "PC2 Axis (Orthogonal)\n第2主成分軸"
    
    ax.scatter(data_centered[:, 0], data_centered[:, 1], alpha=0.5, c='#002B49', label=lbl_data)
    
    lims = np.array([-35, 35])
    ax.plot(lims, lims * np.tan(theta1), color='#C5A059', linewidth=3, label=lbl_pc1)
    ax.plot(lims, lims * np.tan(theta2), color='#008080', linewidth=2, linestyle='--', label=lbl_pc2)
    
    ax.set_xlabel("Length (Diff)\n長さ (平均との差)", fontproperties=jp_font)
    ax.set_ylabel("Width (Diff)\n幅 (平均との差)", fontproperties=jp_font)
    ax.set_title("PCA Axis Exploration\n主成分軸の探索", fontproperties=jp_font, fontsize=16, pad=15)
    ax.set_xlim(-35, 35); ax.set_ylim(-35, 35)
    ax.grid(True, linestyle='--', alpha=0.5)
    ax.legend(prop=jp_font, loc='upper left', bbox_to_anchor=(1, 1))
    st.pyplot(fig)

# --- 4. Key Learning Points (Bilingual Layout) ---
st.markdown("---")
st.markdown(get_bilingual_html("💡 Key Learning Points", "学習のポイント", "1.6rem", "0.95rem"), unsafe_allow_html=True)

learning_data = [
    ("1. Principle of Orthogonality", "主成分の直交性", "Principal components are always perpendicular to each other. PC2 captures the remaining variance that PC1 cannot explain.", "各主成分は常に互いに直交します。第1主成分で説明しきれなかった「残りのばらつき」を第2主成分が拾い上げます。"),
    ("2. Dimensionality Reduction", "次元圧縮", "By treating the diagonal line as a single number line, we successfully compressed 2D data into 1D.", "斜めの線を1つの数直線(1次元)と考えれば、2次元のデータを1次元に「圧縮」できたことになります。"),
    ("3. Conservation of Variance", "分散の保存則", "Notice how PC1 + PC2 always equals 100%. PCA doesn't lose total information; it merely shifts the perspective.", "軸をどう回してもPC1とPC2の合計は常に100%です。PCAは情報を捨てるのではなく、見方(座標軸)を変えているだけです。")
]

for title_en, title_ja, body_en, body_ja in learning_data:
    st.markdown(f"""

        <div style="margin-bottom: 20px;">

            <div style="font-weight: bold; font-size: 1.15rem;">{title_en}</div>

            <div style="font-size: 0.85rem; color: #7f8c8d; margin-bottom: 4px;">{title_ja}</div>

            <div style="font-size: 1rem; line-height: 1.4;">{body_en}</div>

            <div style="font-size: 0.85rem; color: #7f8c8d; line-height: 1.4;">{body_ja}</div>

        </div>

    """, unsafe_allow_html=True)