File size: 22,975 Bytes
ddb7b62
 
 
6cf04a5
160827a
6cf04a5
 
ddb7b62
 
 
 
6cf04a5
 
ddb7b62
 
5a29ef3
 
 
 
 
ddb7b62
5a29ef3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6cf04a5
 
 
 
 
 
5a29ef3
 
6cf04a5
 
 
 
 
 
5a29ef3
 
6cf04a5
 
 
 
 
 
 
 
 
 
 
 
 
 
160827a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58eaa88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6cf04a5
 
 
 
 
 
 
5a29ef3
 
6cf04a5
 
 
 
5a29ef3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6cf04a5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58eaa88
 
6cf04a5
 
58eaa88
6cf04a5
58eaa88
6cf04a5
58eaa88
6cf04a5
58eaa88
6cf04a5
 
 
 
 
 
 
 
 
ddb7b62
 
 
 
 
6cf04a5
 
ddb7b62
6cf04a5
 
ddb7b62
6cf04a5
 
ddb7b62
58eaa88
ddb7b62
58eaa88
ddb7b62
6cf04a5
 
58eaa88
ddb7b62
6cf04a5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ddb7b62
6cf04a5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ddb7b62
6cf04a5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ddb7b62
6cf04a5
58eaa88
 
160827a
58eaa88
160827a
 
 
 
79cc31b
58eaa88
 
 
 
 
 
 
 
 
 
 
 
160827a
58eaa88
 
160827a
58eaa88
 
160827a
 
58eaa88
 
 
160827a
 
 
58eaa88
160827a
58eaa88
 
 
 
 
 
 
242cdb4
58eaa88
c2d6aa5
58eaa88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6cf04a5
58eaa88
6cf04a5
58eaa88
6cf04a5
58eaa88
6cf04a5
 
 
58eaa88
6cf04a5
58eaa88
6cf04a5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ddb7b62
6cf04a5
 
 
 
 
 
ddb7b62
6cf04a5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ddb7b62
6cf04a5
 
 
 
 
ddb7b62
6cf04a5
 
 
ddb7b62
6cf04a5
 
 
 
ddb7b62
 
 
6e53cef
00c87be
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
import gradio as gr
import pandas as pd
import os
import json
import base64
import plotly.express as px
import plotly.graph_objects as go
from core_logic import (
    query_bm25_index, 
    lift_at_k, 
    lift_ci,
    compute_keyword_similarity,
    warmup_bm25
)

def resolve_existing_path(candidates):
    for path in candidates:
        if path and os.path.exists(path):
            return path
    return next((path for path in candidates if path), "")


INDEX_DIR = resolve_existing_path([
    os.getenv('BM25_INDEX_DIR'),
    '/data/bm25_indexes',
    '/data/bm25_indexes/gt_reranking_sets',
    '/data/ecaplan/splits-storage/bm25_indexes',
    '/homes/ecaplan/gscratch1/bm25_indexes/gt_reranking_sets'
])

# Resolve PSLP data from Space bucket first, then local fallback.
TH3_DATA_PATH = resolve_existing_path([
    os.getenv('PSLP_DATA_PATH'),
    '/data/keyword_similarities_with_lifts_4.1.jsonl',
    '/data/keyword_similarities_with_lifts_4.1_sample.jsonl',
    '/homes/ecaplan/gscratch2/demo_dataset/keyword_similarities_with_lifts_4.1.jsonl'
])

print(f"Resolved INDEX_DIR: {INDEX_DIR}")
print(f"Resolved TH3_DATA_PATH: {TH3_DATA_PATH}")

try:
    if os.path.exists(TH3_DATA_PATH):
        PSLP_DF = pd.read_json(TH3_DATA_PATH, lines=True)
    else:
        PSLP_DF = pd.DataFrame()
except Exception as e:
    print(f"Failed to load PSLP data from {TH3_DATA_PATH}: {e}")
    PSLP_DF = pd.DataFrame()
    
CREATIVE_LEXICON_PATH = "/homes/ecaplan/subspace/keyword_similarities.jsonl"
try:
    # Load this larger dataset strictly for the "Auto-fill Creative Lexicon" button
    CREATIVE_LEXICON_DF = pd.read_json(CREATIVE_LEXICON_PATH, lines=True)
except Exception as e:
    print(f"Failed to load creative lexicon data from {CREATIVE_LEXICON_PATH}: {e}")
    CREATIVE_LEXICON_DF = PSLP_DF


TRIVIAL_LEXICONS = {
    "black": "I am black, as a black person, being black, black history, African American, my identity as black, black community, black people, Afro-American, black heritage, growing up black, black culture, black pride, our black community, identify as black, black experiences, black lives, black voices, black families, black neighborhoods",
    "jewish": "I am Jewish, as a Jew, being Jewish, Judaism, Jewish people, Jewish history, Jewish community, my Jewish identity, growing up Jewish, Jewish heritage, synagogue, Torah, Rabbi, Yom Kippur, Hanukkah, Passover, Kosher, Jewish traditions, Shabbat, Jewish culture",
    "catholic": "I am Catholic, as a Catholic, being Catholic, Catholicism, Catholic church, Catholic faith, Catholic community, my Catholic identity, growing up Catholic, Catholic heritage, going to Mass, Eucharist, Vatican, Pope, Catholic priest, Rosary, Catholic traditions, Catholic school, catechism, Catholic teachings",
    "hindu_jain_sikh": "I am Hindu, I am Jain, I am Sikh, as a Hindu, as a Sikh, as a Jain, Hinduism, Jainism, Sikhism, my Hindu faith, my Sikh identity, my Jain beliefs, Hindu community, Sikh community, Jain community, growing up Hindu, growing up Sikh, growing up Jain, puja, Gurdwara",
    "construction": "I work in construction, as a construction worker, in the construction industry, being in construction, construction site, construction jobs, construction projects, building trades, general contractor, construction crew, working in construction, construction management, heavy equipment, hard hat, construction materials, building site, construction field, construction experience, construction company, building contractor",
    "teacher": "I am a teacher, as an educator, being a teacher, teaching profession, working in education, in my classroom, my students, teaching experience, school teacher, education field, my teaching career, lesson plans, classroom management, as a schoolteacher, public school teacher, teaching jobs, educator experience, teaching staff, special education, elementary teacher"
}

TRIVIALITY_THRESHOLD = 0.65

CAT_ICON_PATH = resolve_existing_path([
    os.getenv('CAT_SPLITS_ICON_PATH'),
    os.path.join(os.path.dirname(__file__), 'cat_splits.png'),
    '/homes/ecaplan/gscratch2/splits_demo_app/cat_splits.png'
])

def load_icon_data_uri(path):
    if not path or not os.path.exists(path):
        return ""
    try:
        with open(path, "rb") as f:
            encoded = base64.b64encode(f.read()).decode("ascii")
        return f"data:image/png;base64,{encoded}"
    except Exception as e:
        print(f"Failed to load title icon from {path}: {e}")
        return ""

CAT_ICON_SRC = load_icon_data_uri(CAT_ICON_PATH)
TITLE_ICON_HTML = (
    f"<img src='{CAT_ICON_SRC}' alt='Splits cat icon' style='height:1em; width:auto; display:inline-block; flex:0 0 auto;'/>"
    if CAT_ICON_SRC else ""
)

BUTTON_FILL_CSS = """
.lexicon-action-row {
    align-items: stretch !important;
}

#quick-triv-btn,
#quick-creat-btn,
#clear-btn {
    display: flex;
    align-self: stretch !important;
}

#quick-triv-btn button,
#quick-creat-btn button,
#clear-btn button {
    width: 100%;
    min-height: 44px;
    height: 100% !important;
}
"""

def get_topics_for_demos(target, contrast):
    if not os.path.exists(INDEX_DIR):
        return [("No indices found", "")]
        
    dirs = [d for d in os.listdir(INDEX_DIR) if os.path.isdir(os.path.join(INDEX_DIR, d))]
    import re
    choices = []

    pair_regex = re.compile(fr"(_({target}|{contrast})[-_]({target}|{contrast}))")
    
    for d in dirs:
        if re.search(r"_[0-9]+-[0-9]+$", d):
            continue

        match = pair_regex.search(d)
        if not match:
            continue

        left_demo = match.group(2)
        right_demo = match.group(3)
        if left_demo == right_demo:
            continue
        if {left_demo, right_demo} != {target, contrast}:
            continue

        topic_part = d[:match.start()]
        # Clean display name (hide backend index suffixes)
        display_name = topic_part.replace('_', ' ').replace('-', '/')
        choices.append((display_name, d))
                
    choices.sort(key=lambda x: x[0])
    return choices if choices else [("No indices found for this pair", "")]

def load_seed_words():
    try:
        with open("demo_seed_words.json", "r") as f:
            return json.load(f)
    except Exception:
        return {}

def compute_triviality(lexicon: list, target_demo: str):
    seed_dict = load_seed_words()
    seed_words = seed_dict.get(target_demo, [])
    if not seed_words or not lexicon:
        return 0.0
    sim_metrics = compute_keyword_similarity(seed_words, lexicon, device='cpu')
    return sim_metrics['Recall']

def generate_verdict_banner(lift, pval, triviality):
    is_trivial = triviality >= TRIVIALITY_THRESHOLD
    is_sig = lift > 1.0 and pval < 0.05
    
    # Use Gradio CSS vars and neutral styling so cards work in light and dark mode.
    base_style = "padding:20px; border-radius:12px; text-align:center; box-shadow: 0 4px 6px rgba(0,0,0,0.1); margin-bottom: 15px;"
    
    if is_trivial and is_sig:
        return f"<div style='{base_style} background-color: rgba(255, 193, 7, 0.2); border: 1px solid #ffe69c;'><h2 style='margin:0; color: #d39e00;'>🟡 Supported, but Trivial</h2><p style='margin-top:10px; font-size:16px;'>This lexicon successfully isolates the target demographic, but it is likely <strong>too obvious/definitional</strong> to be of interest.</p></div>"
    elif is_trivial and not is_sig:
        return f"<div style='{base_style} background-color: rgba(220, 53, 69, 0.2); border: 1px solid #f5c6cb;'><h2 style='margin:0; color: #c82333;'>🔴 Trivial & Unsupported</h2><p style='margin-top:10px; font-size:16px;'>This lexicon is <strong>definitional</strong> to the target demographic, and also failed to provide significant lift for the target demographic.</p></div>"
    elif not is_trivial and is_sig:
        return f"<div style='{base_style} background-color: rgba(40, 167, 69, 0.2); border: 1px solid #c3e6cb;'><h2 style='margin:0; color: #218838;'>🟢 Promising PSLP!</h2><p style='margin-top:10px; font-size:16px;'>This hypothesis is <strong>supported by the data</strong> (high lift) AND is <strong>unexpected</strong> (low triviality). Worthy of further study.</p></div>"
    else:
        return f"<div style='{base_style} background-color: rgba(108, 117, 125, 0.2); border: 1px solid #d6d8db;'><h2 style='margin:0; color: var(--body-text-color);'>⚪ Unsupported Hypothesis</h2><p style='margin-top:10px; font-size:16px;'>This lexicon is non-trivial, but the data <strong>does not support</strong> the hypothesis (it distinguishes the demographics no better than random).</p></div>"

def format_demo(name):
    if not name:
        return name
    return "/".join(x.capitalize() for x in name.split('_'))

def run_evaluation(index_name, target_demo, contrast_demo, generated_words_str):
    if not index_name or not generated_words_str:
        return "<div style='color:red;'>Please select a topic and enter a lexicon.</div>", "", "", px.scatter(title="Waiting for input..."), pd.DataFrame()

    try:
        generated_words = [w.strip() for w in generated_words_str.split(",") if w.strip()]
        index_path = os.path.join(INDEX_DIR, index_name)
        
        target_fmt = format_demo(target_demo)
        contrast_fmt = format_demo(contrast_demo)
        
        # 1. Compute BM25 Lifts
        df_results = query_bm25_index(index_path, generated_words)
        
        lift_0_5_percent = lift_at_k(df_results, target_demo, k=0.005)
        pval_0_5, _, _ = lift_ci(df_results, target_demo, k=0.005)
        
        card_style = "background-color: var(--block-background-fill); padding:20px; border-radius:12px; border: 1px solid var(--border-color-primary); box-shadow: 0 2px 4px rgba(0,0,0,0.05); height: 100%;"
        
        lift_text = f"<div style='{card_style}'><h3 style='margin:0; font-size:24px; color: var(--body-text-color);'>Lift@0.5%: <span style='color:#007bff;'>{lift_0_5_percent:.2f}x</span></h3><p style='margin:8px 0; color: var(--body-text-color-subdued);'><strong style='color: inherit;'>p-value: {pval_0_5:.4f}</strong></p><p style='margin:0; font-size:14px; color: var(--body-text-color-subdued);'>This lexicon pulled <b style='color: inherit;'>{target_fmt}</b> posts to the top {lift_0_5_percent:.2f}x more than random compared to <b style='color: inherit;'>{contrast_fmt}</b>.</p></div>"
        
        # 2. Compute Triviality
        triviality = compute_triviality(generated_words, target_demo)
        triv_text = f"<div style='{card_style}'><h3 style='margin:0; font-size:24px; color: var(--body-text-color);'>Triviality Score: <span style='color:#6f42c1;'>{triviality:.3f}</span></h3><p style='margin:8px 0; color: var(--body-text-color-subdued);'><strong style='color: inherit;'>Threshold: &lt; {TRIVIALITY_THRESHOLD}</strong></p><p style='margin:0; font-size:14px; color: var(--body-text-color-subdued);'>Measures semantic similarity to the seed words of <b style='color: inherit;'>{target_fmt}</b>. Lower scores indicate a likely more unexpected PSLP.</p></div>"
        
        verdict = generate_verdict_banner(lift_0_5_percent, pval_0_5, triviality)
        
        # 3. 2D Filtration Plot
        import copy
        global tab3_plot
        if tab3_plot is not None:
            fig = copy.deepcopy(tab3_plot)
        else:
            fig = go.Figure()

        y_axis_cap = max(3.0, float(lift_0_5_percent) * 1.2)
        fig.add_shape(type="rect", x0=0, y0=1.0, x1=TRIVIALITY_THRESHOLD, y1=y_axis_cap, fillcolor="LightGreen", opacity=0.3, layer="below", line_width=0)
        fig.add_hline(y=1.0, line_dash="dash", line_color="gray")
        fig.add_vline(x=TRIVIALITY_THRESHOLD, line_dash="dash", line_color="gray")

        fig.add_trace(go.Scattergl(x=[triviality], y=[lift_0_5_percent], mode='markers+text', text=['Current PSLP'], textposition="top right", showlegend=False, marker=dict(size=20, symbol='star', color='red', line=dict(width=2, color='DarkSlateGrey'))))

        fig.update_layout(title="PSLP Filtration Map", showlegend=False, xaxis_title="Triviality (Lower = More Unexpected)", yaxis_title="Lift@0.5% (Higher = Stronger Data Support)", xaxis=dict(range=[0.0, 1.0]), yaxis=dict(range=[0.0, y_axis_cap]), margin=dict(l=40, r=40, t=40, b=40))
        
        # 4. Preview Top Hits
        top_hits_df = df_results.head(5)[['id', 'score', 'demographic', 'content']]
        
        return verdict, lift_text, triv_text, fig, top_hits_df
    except Exception as e:
        return f"<div style='color:red; padding:10px; border:1px solid red; border-radius:5px;'>Error evaluating hypothesis: {str(e)}</div>", "", "", px.scatter(title="Error"), pd.DataFrame()

def load_tab3_data():
    try:
        if PSLP_DF.empty:
            return px.scatter(title=f"Could not find PSLP Space data.")
        df = PSLP_DF.copy()
        if "lift@0.5" not in df.columns:
            df["lift@0.5"] = df.get("lift@1.0", 0)  
        if "demo1_recall" not in df.columns:
            df["demo1_recall"] = 0.5 
            
        if 'keyword_type' in df.columns:
            types = df['keyword_type'].unique()
            if len(types) > 0:
                n_per_group = max(1, 3000 // len(types))
                sampled_dfs = []
                for t in types:
                    group = df[df['keyword_type'] == t]
                    sampled_dfs.append(group.sample(min(len(group), n_per_group)))
                df = pd.concat(sampled_dfs)
            
        fig = px.scatter(
            df, 
            x="demo1_recall", 
            y="lift@0.5", 
            hover_data=["topic", "demo1", "demo2", "keyword_type", "keywords"],
            title="The Hypothesis Space (Pre-computed PSLPs)",
            labels={"demo1_recall": "Triviality", "lift@0.5": "Lift@0.5%"},
            opacity=0.4,
            color_discrete_sequence=['#4a90e2']
        )
        fig.update_traces(marker=dict(size=5, line=dict(width=0.5, color='white')))
        max_limit = df['lift@0.5'].max() if not df.empty else 2.0
        fig.add_shape(type="rect", x0=0, y0=1.0, x1=TRIVIALITY_THRESHOLD, y1=max_limit, fillcolor="LightGreen", opacity=0.3, layer="below", line_width=0)
        fig.add_hline(y=1.0, line_dash="dash", line_color="gray")
        fig.add_vline(x=TRIVIALITY_THRESHOLD, line_dash="dash", line_color="gray")
        fig.update_layout(margin=dict(l=40, r=40, t=40, b=40), showlegend=False)
        return fig
    except Exception as e:
        return px.scatter(title=f"Could not load data for the Hypothesis Space: {e}")

print("Warming up BERT model...")
try:
    warmup_choices = get_topics_for_demos("black", "teacher")
    warmup_index_name = next((idx for _, idx in warmup_choices if idx), None)
    if warmup_index_name:
        warmup_index_path = os.path.join(INDEX_DIR, warmup_index_name)
        print(f"Warming up BM25 index: {warmup_index_name}")
        warmup_bm25(warmup_index_path, warmup_keyword="community", warmup_k=1)
    else:
        print("BM25 warmup skipped: no default index found")
except Exception as e:
    print(f"BM25 warmup failed: {e}")

try:
    compute_keyword_similarity(["test"], ["warmup"], device="cpu")
except Exception as e:
    print(f"BERT warmup failed: {e}")

tab3_plot = load_tab3_data()

# Utilize a clean, modern Gradio Theme
with gr.Blocks(title="Splits! Sandbox", theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="blue"), css=BUTTON_FILL_CSS) as demo:
    # --- HEADER ---
    gr.Markdown(f"""
        <h1 style='display:flex; align-items:center; gap:0.35em; margin:0 0 0.2em 0;'>
            {TITLE_ICON_HTML}
            <span>Splits! Language & Culture Sandbox</span>
        </h1>

        Welcome to the companion demo for **Splits!**, created by **[Eylon Caplan](https://eyloncaplan.github.io/)**. Explore how different sociocultural groups use entirely different vocabularies to discuss the *same topics*.

        <div style="margin-top: 16px; margin-bottom: 8px;">
            <a href="https://arxiv.org/abs/2504.04640" target="_blank"
               style="display: inline-flex; align-items: center; gap: 8px; background-color: var(--button-secondary-background-fill); border: 1px solid var(--border-color-primary); border-radius: 999px; padding: 8px 18px; text-decoration: none; color: var(--body-text-color); font-weight: 500; font-size: 0.95em; box-shadow: 0 1px 3px rgba(0,0,0,0.05);">
                📄 <b>Read the paper:</b> Splits! Flexible Sociocultural Linguistic Investigation at Scale 
                <span style="color: var(--body-text-color-subdued); margin-left: 4px;">↗</span>
            </a>
        </div>
    """)

    with gr.Accordion("📖 What is this & How to use it?", open=False):
        gr.Markdown("""
        ### 🤔 What is this?
        The way we speak is heavily influenced by our background and culture. This tool lets you explore how different groups of people (like teachers, construction workers, or people of different faiths) use entirely different vocabularies to discuss the *same topics*.

        ### 🛠️ What can it do?
        Think of it as a search tool for testing cultural language trends. You can test your own guesses (hypotheses) about how people talk. For example, if both Jewish and Catholic people are talking about "Healthcare", do they focus on different things?

        You pick the groups, pick a topic, and type in some words. The tool will then crunch the data and tell you two things:
        1. 📊 **Is it true? (Lift):** Does your chosen group *actually* use these words more than the other group?
        2. 💡 **Is it interesting? (Triviality):** Are these words an unexpected, deep cultural insight? Or are they just boring, obvious terms (like a Catholic person using the word "church")?

        ### ⚙️ How to use
        1. Select a **Target** group, a **Contrast** group, and a **Discussion Topic**.
        2. Provide a **Candidate Lexicon** (a list of words you guess the target group uses *more* than the contrast group for this topic).
        3. Click **Test Hypothesis** to see if the data supports your idea!

        """)

    # --- INPUTS ---
    with gr.Row():
        with gr.Column(scale=1):
            gr.Markdown("### ⚙️ Step 1: Set the Context")
            with gr.Group():
                with gr.Row():
                    target_demo = gr.Dropdown(choices=["black", "jewish", "catholic", "hindu_jain_sikh", "construction", "teacher"], label="🎯 Target Demographic", value="black", scale=2)
                    swap_btn = gr.Button("🔄 Swap", scale=1, min_width=60)
                    contrast_demo = gr.Dropdown(choices=["black", "jewish", "catholic", "hindu_jain_sikh", "construction", "teacher"], label="⚖️ Contrast Demographic", value="teacher", scale=2)

                choices = get_topics_for_demos("black", "teacher")
                default_idx = choices[0][1] if choices and choices[0][1] != "" else None
                index_dropdown = gr.Dropdown(choices=choices, label="💬 Discussion Topic", value=default_idx)

        with gr.Column(scale=1):
            gr.Markdown("### 📝 Step 2: Define Lexicon")
            with gr.Group():
                lexicon_input = gr.Textbox(
                    show_label=False,
                    placeholder="e.g. word1, phrase two, word3...",
                    info="Enter a comma-separated list of words/phrases you hypothesize the Target uses more than the Contrast in the selected Topic.",
                    lines=3
                )
                with gr.Row(elem_classes=["lexicon-action-row"]):
                    quick_triv = gr.Button("🟡 Auto-fill Trivial (Obvious)", elem_id="quick-triv-btn", scale=1)
                    quick_creat = gr.Button("✨ Auto-fill Creative (LLM-Generated)", elem_id="quick-creat-btn", scale=1)
                    clear_btn = gr.Button("🗑️ Clear", variant="secondary", elem_id="clear-btn", scale=1)

    test_btn = gr.Button("🚀 Test Hypothesis!", variant="primary", size="lg")

    gr.Markdown("---")

    verdict_out = gr.HTML()

    with gr.Row():
        lift_out = gr.HTML()
        triv_out = gr.HTML()

    plot_out = gr.Plot(value=tab3_plot)

    with gr.Accordion("🔍 View Top Retrieved Posts (Contextualize the Lexicon)", open=False):
        posts_out = gr.Dataframe(headers=["ID", "BM25 Score", "Demographic", "Content"], interactive=False)
        
    # --- Event Handlers ---
    def swap_demos(t, c):
        return c, t
        
    swap_btn.click(fn=swap_demos, inputs=[target_demo, contrast_demo], outputs=[target_demo, contrast_demo])
    
    def update_idx_choices(t, c):
        opts = get_topics_for_demos(t, c)
        default_val = opts[0][1] if opts and opts[0][1] != "" else None
        return gr.update(choices=opts, value=default_val)
        
    target_demo.change(fn=update_idx_choices, inputs=[target_demo, contrast_demo], outputs=[index_dropdown])
    contrast_demo.change(fn=update_idx_choices, inputs=[target_demo, contrast_demo], outputs=[index_dropdown])
    
    def fill_creative_lexicon(target, contrast, index_path):
        import re
        if CREATIVE_LEXICON_DF.empty:
            return "No data to sample from"
            
        folder_name = os.path.basename(str(index_path)) if index_path else ""
        match = re.search(fr"(_({target}|{contrast})-({target}|{contrast}))", folder_name)
        topic_display = ""
        if match:
            topic_part = folder_name[:match.start()]
            topic_display = topic_part.replace('_', ' ').replace('-', '/')
            
        subset = CREATIVE_LEXICON_DF[
            (CREATIVE_LEXICON_DF['demo1'] == target) & (CREATIVE_LEXICON_DF['demo2'] == contrast)
        ]
        
        import difflib
        possible_topics = subset['topic'].unique()
        matches = difflib.get_close_matches(topic_display, possible_topics, n=1, cutoff=0.4)
        
        if matches:
            topic_subset = subset[subset['topic'] == matches[0]]
            if not topic_subset.empty:
                subset = topic_subset
                
        if subset.empty:
            return "Sample word 1, sample word 2"
            
        row = subset.sample(1).iloc[0]
        keywords = row['keywords']
        if isinstance(keywords, list):
            return ", ".join(keywords)
        return str(keywords)

    quick_triv.click(fn=lambda t: TRIVIAL_LEXICONS.get(t, "Trivial words..."), inputs=[target_demo], outputs=[lexicon_input])
    quick_creat.click(fn=fill_creative_lexicon, inputs=[target_demo, contrast_demo, index_dropdown], outputs=[lexicon_input])
    clear_btn.click(fn=lambda: "", outputs=[lexicon_input])
    
    test_btn.click(
        fn=run_evaluation, 
        inputs=[index_dropdown, target_demo, contrast_demo, lexicon_input], 
        outputs=[verdict_out, lift_out, triv_out, plot_out, posts_out]
    )

if __name__ == "__main__":
    port = int(os.environ.get("GRADIO_SERVER_PORT", 7860))
    demo.launch(server_name="0.0.0.0", server_port=port, ssr_mode=False)