Spaces:

AshwinP
/

SequentialMonteCarlo

Running on Zero

File size: 36,128 Bytes

"""
Learning Sequential Monte Carlo (SMC) Through the Plain-English Translator

An interactive educational space that teaches Sequential Monte Carlo methods
using a practical application: helping professionals explain complex concepts
without using industry jargon.
"""

import torch
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import random
import json
import os
import math

# Mock spaces module for local development (only needed on HuggingFace Spaces)
try:
    import spaces
except ImportError:
    class spaces:
        @staticmethod
        def GPU(func):
            return func

# Load benchmark data
BENCHMARK_DATA_PATH = os.path.join(os.path.dirname(__file__), "benchmark_data.json")
with open(BENCHMARK_DATA_PATH, "r") as f:
    BENCHMARK_DATA = json.load(f)

# Path to infographic
INFOGRAPHIC_PATH = os.path.join(os.path.dirname(__file__), "Sequential_monte_carlo.png")

# ============================================================================
# MODEL SETUP
# ============================================================================

AVAILABLE_MODELS = {
    "TinyLlama-1.1B (Fast)": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
    "Qwen2-0.5B (Fastest)": "Qwen/Qwen2-0.5B-Instruct",
    "Qwen2.5-7B (Best Quality)": "Qwen/Qwen2.5-7B-Instruct",
    "Qwen3-8B (Latest)": "Qwen/Qwen3-8B",
    "Gemma-2-2B (Requires HF Login)": "google/gemma-2-2b-it",
}

loaded_models = {}
loaded_tokenizers = {}

def load_model(model_name: str):
    model_id = AVAILABLE_MODELS.get(model_name, "TinyLlama/TinyLlama-1.1B-Chat-v1.0")
    if model_id not in loaded_tokenizers:
        loaded_tokenizers[model_id] = AutoTokenizer.from_pretrained(model_id)
    if model_id not in loaded_models:
        loaded_models[model_id] = AutoModelForCausalLM.from_pretrained(
            model_id, device_map="auto", torch_dtype=torch.float16
        )
    return loaded_tokenizers[model_id], loaded_models[model_id]

# ============================================================================
# JARGON DICTIONARIES
# ============================================================================

JARGON_DICTIONARIES = {
    "Legal": [
        "liability", "liable", "indemnify", "indemnification", "breach",
        "statute", "damages", "negligence", "herein", "aforementioned",
        "plaintiff", "defendant", "jurisdiction", "arbitration", "tort",
        "fiduciary", "escrow", "lien", "deposition", "stipulation",
        "injunction", "subpoena", "affidavit", "adjudicate", "appellant"
    ],
    "Medical": [
        "prognosis", "diagnosis", "etiology", "pathology", "contraindicated",
        "idiopathic", "nosocomial", "comorbidity", "prophylactic", "benign",
        "malignant", "metastasis", "hemorrhage", "ischemia", "infarction",
        "edema", "necrosis", "lesion", "syndrome", "acute", "chronic",
        "bilateral", "unilateral", "subcutaneous", "intravenous"
    ],
    "Financial": [
        "amortization", "liquidity", "collateral", "derivative", "equity",
        "fiduciary", "hedge", "leverage", "portfolio", "securities",
        "dividend", "depreciation", "liability", "asset", "accrual",
        "arbitrage", "capitalization", "yield", "maturity", "principal",
        "compound", "annuity", "underwriting", "insolvency", "solvency"
    ],
    "Technical/Engineering": [
        "algorithm", "bandwidth", "latency", "throughput", "scalability",
        "deprecated", "refactor", "polymorphism", "encapsulation", "abstraction",
        "iteration", "recursion", "synchronous", "asynchronous", "protocol",
        "middleware", "backend", "frontend", "deployment", "infrastructure",
        "microservices", "containerization", "orchestration", "API", "SDK"
    ]
}

# ============================================================================
# SMC CORE FUNCTIONS
# ============================================================================

def is_safe(text: str, banned_words: list) -> bool:
    text_lower = text.lower()
    for word in banned_words:
        word_lower = word.lower()
        if (f" {word_lower} " in f" {text_lower} " or
            f" {word_lower}." in f" {text_lower}" or
            f" {word_lower}," in f" {text_lower}" or
            f" {word_lower}?" in f" {text_lower}" or
            f" {word_lower}!" in f" {text_lower}" or
            text_lower.startswith(f"{word_lower} ") or
            text_lower.endswith(f" {word_lower}")):
            return False
    return True

def find_jargon_used(text: str, banned_words: list) -> list:
    text_lower = text.lower()
    found = []
    for word in banned_words:
        word_lower = word.lower()
        if (f" {word_lower} " in f" {text_lower} " or
            f" {word_lower}." in f" {text_lower}" or
            f" {word_lower}," in f" {text_lower}" or
            f" {word_lower}?" in f" {text_lower}" or
            f" {word_lower}!" in f" {text_lower}" or
            text_lower.startswith(f"{word_lower} ") or
            text_lower.endswith(f" {word_lower}")):
            found.append(word)
    return found

def count_jargon(text: str, banned_words: list) -> int:
    return len(find_jargon_used(text, banned_words))

def compute_weight(text: str, banned_words: list, penalty_factor: float = 0.3) -> float:
    jargon_count = count_jargon(text, banned_words)
    return math.pow(penalty_factor, jargon_count)

def weighted_resample(particles: list, weights: list, num_samples: int) -> list:
    if not particles or not weights:
        return []
    total_weight = sum(weights)
    if total_weight == 0:
        probs = [1.0 / len(particles)] * len(particles)
    else:
        probs = [w / total_weight for w in weights]
    resampled = random.choices(particles, weights=probs, k=num_samples)
    unique = list(dict.fromkeys(resampled))
    return unique[:num_samples]

@spaces.GPU
def smc_translate(
    concept: str,
    profession: str,
    custom_banned_words: str = "",
    model_name: str = "TinyLlama-1.1B (Fast)",
    num_particles: int = 5,
    max_steps: int = 20,
    tokens_per_step: int = 15,
    constraint_mode: str = "Soft (Penalize)",
    progress=gr.Progress()
) -> tuple:
    tokenizer, model_inst = load_model(model_name)
    use_soft_constraints = "Soft" in constraint_mode

    banned_words = JARGON_DICTIONARIES.get(profession, []).copy()
    if custom_banned_words.strip():
        custom_list = [w.strip() for w in custom_banned_words.split(",") if w.strip()]
        banned_words.extend(custom_list)

    prompt = f"""You are an expert {profession.lower()} professional explaining a concept to a client with no background in your field.

Rules:
- Explain as if talking to a curious 10-year-old
- Use a concrete, relatable real-world example to illustrate the concept
- Avoid technical jargon - use everyday words instead
- Keep it concise: 2-3 sentences max

Concept to explain: {concept}

Simple explanation with example:"""

    particles = [prompt]
    trace_log = []
    trace_log.append(f"{'='*60}")
    trace_log.append(f"SMC PLAIN-ENGLISH TRANSLATOR - TRACE LOG")
    trace_log.append(f"{'='*60}")
    trace_log.append(f"Model: {model_name}")
    trace_log.append(f"Constraint Mode: {constraint_mode}")
    trace_log.append(f"Concept: {concept}")
    trace_log.append(f"Domain: {profession}")
    trace_log.append(f"Banned words: {len(banned_words)} terms")
    trace_log.append(f"Particles: {num_particles} | Steps: {max_steps} | Tokens/step: {tokens_per_step}")
    trace_log.append(f"{'='*60}")
    trace_log.append("")

    for step in progress.tqdm(range(max_steps), desc="SMC Iteration"):
        candidates = []

        # STEP 1: EXPLORE - Generate multiple continuations
        for particle in particles:
            inputs = tokenizer(particle, return_tensors="pt").to(model_inst.device)
            with torch.no_grad():
                outputs = model_inst.generate(
                    **inputs,
                    max_new_tokens=tokens_per_step,
                    num_return_sequences=3,
                    do_sample=True,
                    temperature=0.9 if use_soft_constraints else 0.8,
                    top_p=0.95 if use_soft_constraints else 0.9,
                    pad_token_id=tokenizer.eos_token_id
                )
            for out in outputs:
                decoded = tokenizer.decode(out, skip_special_tokens=True)
                candidates.append(decoded)

        if not candidates:
            trace_log.append(f"Step {step+1}: No candidates generated - stopping")
            break

        # STEP 2: FILTER/WEIGHT - Apply constraints
        if use_soft_constraints:
            weights = [compute_weight(c, banned_words, penalty_factor=0.3) for c in candidates]
            jargon_counts = [count_jargon(c, banned_words) for c in candidates]
            clean_count = sum(1 for c in jargon_counts if c == 0)
            trace_log.append(f"Step {step+1}: {len(candidates)} particles explored")
            trace_log.append(f"         {clean_count} jargon-free | Weights: [{min(weights):.2f} - {max(weights):.2f}]")

            # STEP 3: RESAMPLE - Weighted selection
            particles = weighted_resample(candidates, weights, num_particles)
            if not particles:
                trace_log.append(f"         Resampling failed - stopping")
                break
            trace_log.append(f"         Resampled to {len(particles)} particles")
        else:
            valid_candidates = []
            pruned_count = 0
            for candidate in candidates:
                if is_safe(candidate, banned_words):
                    valid_candidates.append(candidate)
                else:
                    pruned_count += 1

            trace_log.append(f"Step {step+1}: {len(candidates)} particles explored")
            trace_log.append(f"         {len(valid_candidates)} survived | {pruned_count} pruned (contained jargon)")

            if valid_candidates:
                unique_candidates = list(set(valid_candidates))
                random.shuffle(unique_candidates)
                particles = unique_candidates[:num_particles]
            else:
                trace_log.append(f"         ALL PARTICLES DIED - jargon unavoidable!")
                break

        # Check for completion
        current_text = particles[0].split("Simple explanation with example:")[-1].strip()
        if current_text.endswith(('.', '!', '?')) and len(current_text) > 80:
            trace_log.append(f"\nNatural completion reached at step {step+1}")
            break

    trace_log.append("")
    trace_log.append(f"{'='*60}")

    # Get best result
    if particles:
        if use_soft_constraints:
            best_idx = 0
            best_jargon_count = float('inf')
            for i, p in enumerate(particles):
                jc = count_jargon(p, banned_words)
                if jc < best_jargon_count:
                    best_jargon_count = jc
                    best_idx = i
            final_text = particles[best_idx].split("Simple explanation with example:")[-1].strip()
        else:
            final_text = particles[0].split("Simple explanation with example:")[-1].strip()
    else:
        final_text = "(All generation paths used jargon - try soft constraints!)"

    final_jargon = find_jargon_used(final_text, banned_words)
    if final_jargon:
        trace_log.append(f"RESULT: Contains jargon: {final_jargon}")
    else:
        trace_log.append(f"RESULT: Jargon-free output achieved!")
    trace_log.append(f"{'='*60}")

    return final_text, "\n".join(trace_log), ", ".join(banned_words)

# ============================================================================
# EXAMPLES
# ============================================================================

EXAMPLES = [
    ["Force Majeure clause and why it might void our contract", "Legal", ""],
    ["Why we need to add an indemnification clause to protect your business", "Legal", ""],
    ["Your MRI shows a benign lesion that we should monitor", "Medical", ""],
    ["The etiology of your chronic fatigue syndrome", "Medical", ""],
    ["How compound interest and amortization affect your mortgage payments", "Financial", ""],
    ["Why we recommend diversifying your portfolio with low-liquidity assets", "Financial", ""],
    ["Why our API has high latency and how microservices could help", "Technical/Engineering", ""],
    ["The difference between synchronous and asynchronous processing", "Technical/Engineering", ""],
]

# ============================================================================
# GRADIO INTERFACE
# ============================================================================

with gr.Blocks(title="Learn SMC: The Plain-English Translator") as demo:

    # ==================== HEADER ====================
    gr.Markdown("""
    # Learning Sequential Monte Carlo (SMC)
    ## An Interactive Guide Using the Plain-English Translator

    Welcome! This space teaches you about **Sequential Monte Carlo** methods through a practical application:
    helping professionals explain complex concepts without using jargon.

    Navigate through the tabs to learn the theory, try the interactive demo, and see our experimental results.
    """)

    with gr.Tabs():

        # ==================== TAB 1: LEARN SMC ====================
        with gr.TabItem("1. Learn SMC"):
            gr.Markdown("""
            # Understanding Sequential Monte Carlo

            Sequential Monte Carlo (SMC) is a powerful technique for solving problems where you need to
            navigate through a space of possibilities while satisfying constraints. Let's understand it
            through both theory and our practical application.
            """)

            # Infographic
            gr.Markdown("## The Big Picture")
            gr.Image(INFOGRAPHIC_PATH, label="How AI Learns to See the Future: An Introduction to SMC", show_label=True)

            gr.Markdown("---")

            # Section 1: The Problem
            gr.Markdown("""
            ## 1. The Problem: Standard AI's "Greedy" Trap

            ### What's Wrong with Normal Text Generation?

            Most AI language models work **greedily** - they pick the best next word based on immediate probability,
            without considering long-term consequences. This creates a fundamental problem:

            **The Greedy Trap:**
            - The model chooses what seems best *right now*
            - It can't "see" that this choice leads to a dead end
            - Once committed, it can't backtrack

            ### Our Example: The Curse of Knowledge

            When a lawyer tries to explain "Force Majeure" to a client, a standard AI naturally reaches for
            legal terminology because those words are statistically most likely in that context:

            ```
            Standard AI: "Force Majeure is a contractual provision that excuses liability
                         when extraordinary circumstances prevent fulfillment..."
            ```

            The AI picked "liability," "contractual," and "provision" because they're the most probable
            next words - but now it's stuck using jargon the client won't understand!

            **This is like choosing the path in a maze that looks shortest, only to hit a dead end.**
            """)

            gr.Markdown("---")

            # Section 2: The Breakthrough
            gr.Markdown("""
            ## 2. The Breakthrough: Introducing SMC

            ### The Key Insight: Explore Multiple Futures Simultaneously

            Instead of committing to one path, SMC maintains **thousands of "particles"** - each representing
            a different possible future. Think of it as sending out scouts in every direction.

            ### How It Works in Our Translator:

            ```
            Standard AI:  One path → "Force Majeure is a contractual..." → STUCK WITH JARGON

            SMC Approach: Path A → "Imagine you promised your friend..."     ✓ Keep exploring
                         Path B → "This is a liability clause..."           ✗ Contains jargon
                         Path C → "Think of it like a 'nobody's fault'..."  ✓ Keep exploring
                         Path D → "The contractual provision states..."     ✗ Contains jargon
                         Path E → "It's like when a big storm..."           ✓ Keep exploring
            ```

            **We explore multiple possibilities in parallel, keeping the promising ones and discarding the rest.**
            """)

            gr.Markdown("---")

            # Section 3: The Process
            gr.Markdown("""
            ## 3. The Process: How SMC Finds the Optimal Path

            SMC follows a three-step cycle that repeats until we reach our goal:

            ### Step 1: EXPLORE (Expand)
            Each surviving particle generates multiple possible continuations.
            If we have 5 particles and each generates 3 continuations, we now have 15 candidates.

            ### Step 2: FILTER (Evaluate)
            We evaluate each candidate against our constraint (no jargon).
            This is "survival of the fittest" - unpromising paths fade out.

            **Two Filtering Strategies:**

            | Strategy | How It Works | Pros | Cons |
            |----------|--------------|------|------|
            | **Hard Constraints** | Completely eliminate any particle with jargon | Guarantees jargon-free output | Can kill ALL particles if jargon is unavoidable |
            | **Soft Constraints** | Reduce weight of particles with jargon (but let them survive) | More robust, allows gradual steering | May have occasional jargon slip through |

            ### Step 3: RESAMPLE (Select)
            We select particles for the next round based on their fitness:
            - **Hard mode:** Random selection from survivors
            - **Soft mode:** Weighted random selection (better particles more likely to be chosen)

            ### The Math Behind Soft Constraints:
            ```
            Weight = 0.3 ^ (number of jargon words)

            0 jargon words → Weight = 1.0   (100% chance)
            1 jargon word  → Weight = 0.3   (30% chance)
            2 jargon words → Weight = 0.09  (9% chance)
            3 jargon words → Weight = 0.027 (2.7% chance)
            ```
            """)

            gr.Markdown("---")

            # Section 4: The Impact
            gr.Markdown("""
            ## 4. The Impact: From Prediction to Strategy

            SMC transforms AI from a **reactive predictor** to a **strategic planner**.

            ### What This Means for Our Translator:

            | Approach | Can Plan Ahead? | Handles Constraints? | Success Rate |
            |----------|-----------------|---------------------|--------------|
            | Standard Greedy | No - commits immediately | No - uses probable words | N/A (always uses jargon) |
            | SMC Hard | Yes - explores multiple paths | Yes - prunes violations | 25% (particles often die) |
            | SMC Soft | Yes - explores multiple paths | Yes - penalizes violations | **100%** |

            ### Beyond Translation: Where Else Is SMC Used?

            - **Robotics:** Planning movements while avoiding obstacles
            - **Autonomous Vehicles:** Predicting traffic and planning routes
            - **Finance:** Portfolio optimization with risk constraints
            - **Drug Discovery:** Exploring molecular structures with safety constraints

            ### The Fundamental Shift:

            > *"If your AI could plan 10 steps ahead instead of 1, what impossible problem would you have it solve first?"*

            SMC represents moving from **simple prediction** to **true strategic foresight**.
            """)

            gr.Markdown("---")

            # Connection to Next Tab
            gr.Markdown("""
            ## Ready to Try It Yourself?

            Now that you understand how SMC works, head to the **"2. Try It: Translator"** tab
            to see it in action! You can:

            - Watch particles explore and get filtered in real-time
            - Compare hard vs soft constraints
            - Try different professional domains (Legal, Medical, Financial, Technical)
            """)


        # ==================== TAB 2: TRY IT ====================
        with gr.TabItem("2. Try It: Translator"):
            gr.Markdown("""
            # The Plain-English Translator

            ## The Problem We're Solving

            **The Curse of Knowledge:** Experts often struggle to explain concepts without jargon.
            A standard AI naturally uses technical terms because they're statistically probable.

            **Our Solution:** Use SMC to explore multiple explanations simultaneously,
            filtering out any path that uses forbidden terminology. This forces the model
            to find creative, plain-language alternatives.

            ---
            """)

            with gr.Row():
                with gr.Column(scale=2):
                    concept_input = gr.Textbox(
                        label="Concept to Explain",
                        placeholder="e.g., 'Force Majeure clause and why it might void our contract'",
                        lines=2
                    )

                    profession_dropdown = gr.Dropdown(
                        choices=["Legal", "Medical", "Financial", "Technical/Engineering"],
                        value="Legal",
                        label="Professional Domain",
                        info="Each domain has its own set of banned jargon terms"
                    )

                    custom_words = gr.Textbox(
                        label="Additional Banned Words (optional)",
                        placeholder="e.g., contract, clause, party",
                        lines=1
                    )

                    model_dropdown = gr.Dropdown(
                        choices=list(AVAILABLE_MODELS.keys()),
                        value="TinyLlama-1.1B (Fast)",
                        label="Model"
                    )

                    constraint_mode = gr.Radio(
                        choices=["Hard (Prune)", "Soft (Penalize)"],
                        value="Soft (Penalize)",
                        label="Constraint Mode",
                        info="Soft constraints are more robust - see the Learn tab for explanation"
                    )

                    with gr.Row():
                        num_particles = gr.Slider(
                            minimum=2, maximum=10, value=5, step=1,
                            label="Particles",
                            info="More = more exploration"
                        )
                        max_steps = gr.Slider(
                            minimum=5, maximum=30, value=15, step=5,
                            label="Max Steps",
                            info="SMC iterations"
                        )
                        tokens_per_step = gr.Slider(
                            minimum=5, maximum=30, value=15, step=5,
                            label="Tokens/Step",
                            info="Generation length per iteration"
                        )

                    translate_btn = gr.Button("Translate to Plain English", variant="primary", size="lg")

                with gr.Column(scale=1):
                    gr.Markdown("""
                    ### SMC in Action

                    When you click translate, watch the trace log to see:

                    1. **Particles explored** - Multiple paths generated
                    2. **Filtering** - Jargon paths penalized/pruned
                    3. **Resampling** - Best particles selected
                    4. **Convergence** - Final jargon-free output

                    **Tip:** Try the same concept with Hard vs Soft constraints
                    to see the difference!
                    """)

            gr.Markdown("---")

            gr.Markdown("### Output")
            smc_output = gr.Textbox(label="Plain-English Explanation", lines=5, show_label=True)

            with gr.Accordion("SMC Trace Log (See the algorithm in action)", open=True):
                trace_output = gr.Textbox(label="", lines=20, show_label=False)

            with gr.Accordion("Banned Words for This Domain", open=False):
                banned_words_display = gr.Textbox(label="", lines=3, show_label=False)

            gr.Markdown("---")
            gr.Markdown("### Example Scenarios")
            gr.Examples(examples=EXAMPLES, inputs=[concept_input, profession_dropdown, custom_words], label="")


        # ==================== TAB 3: EXPERIMENTS ====================
        with gr.TabItem("3. Our Experiments"):
            gr.Markdown("""
            # What We Learned: An Experimental Journey

            This tab documents our experimental journey in applying SMC to constrained text generation.
            We tested multiple approaches and models to understand what works and what doesn't.

            ---
            """)

            gr.Markdown("""
            ## The Experimental Setup

            ### Goal
            Generate plain-English explanations of professional concepts (Legal, Medical, Financial, Technical)
            that a 10-year-old could understand - **without using any domain-specific jargon**.

            ### Benchmark
            We created 12 test cases (3 per domain) with gold-standard translations from Claude Opus 4.5.
            Each output was scored on:

            | Criterion | Points | Description |
            |-----------|--------|-------------|
            | Jargon-Free | 25 | No banned terminology used |
            | Has Example | 25 | Uses relatable analogy |
            | Appropriate Length | 25 | 20-100 words |
            | Coherence | 25 | Proper sentence structure |

            ---
            """)

            # Experiment 1: Hard Constraints
            gr.Markdown("""
            ## Experiment 1: Hard Constraints (Prune All Jargon)

            ### Hypothesis
            If we completely eliminate any generation path containing jargon, the model will be forced
            to find jargon-free alternatives.

            ### Setup
            - Models: TinyLlama-1.1B, Qwen2-0.5B, Gemma-2-2B
            - Parameters: 5 particles, 25 max steps, 6 tokens per step
            - Constraint: **Hard** - any particle with jargon is immediately pruned

            ### Results
            """)

            # Build data from benchmark
            gemma_data = BENCHMARK_DATA["model_results"]["Gemma-2-2B"]
            tinyllama_data = BENCHMARK_DATA["model_results"]["TinyLlama-1.1B"]
            qwen_data = BENCHMARK_DATA["model_results"]["Qwen2-0.5B"]

            gr.Markdown(f"""
            | Model | Score | Success Rate | Outcome |
            |-------|-------|--------------|---------|
            | Gemma-2-2B | {gemma_data['total_score']}/{gemma_data['max_possible']} ({gemma_data['percentage']}%) | {gemma_data.get('successful_outputs', 3)}/12 | 9 empty outputs |
            | TinyLlama-1.1B | {tinyllama_data['total_score']}/{tinyllama_data['max_possible']} ({tinyllama_data['percentage']}%) | {tinyllama_data.get('successful_outputs', 3)}/12 | 9 empty outputs |
            | Qwen2-0.5B | {qwen_data['total_score']}/{qwen_data['max_possible']} ({qwen_data['percentage']}%) | {qwen_data.get('successful_outputs', 2)}/12 | 10 empty outputs |

            ### What Happened?
            **75% of test cases produced empty outputs!**

            The problem: When explaining medical concepts, the model naturally reaches for words like
            "benign," "lesion," and "diagnosis." With hard constraints, EVERY generation path
            contained at least one banned word, causing **total particle death**.

            ### Key Learning
            Hard constraints are too aggressive. Domain-specific vocabulary is so deeply embedded
            in model weights that it's nearly impossible to avoid entirely through pruning alone.

            ---
            """)

            # Experiment 2: Soft Constraints
            gr.Markdown("""
            ## Experiment 2: Soft Constraints (Weighted Resampling)

            ### Hypothesis
            Instead of killing particles with jargon, we should **penalize** them with lower weights.
            This allows gradual steering toward jargon-free outputs while preventing particle death.

            ### The Key Insight
            ```
            Weight = penalty_factor ^ (jargon_count)

            With penalty_factor = 0.3:
            - 0 jargon words → weight = 1.0
            - 1 jargon word  → weight = 0.3
            - 2 jargon words → weight = 0.09
            ```

            Particles with jargon can **survive** but are less likely to be selected for the next generation.
            Over time, the population naturally shifts toward jargon-free outputs.

            ### Setup
            - Model: Qwen2.5-7B (via Ollama)
            - Parameters: 5 particles, 15 max steps, 25 tokens per step
            - Constraint: **Soft** - penalty factor 0.3

            ### Results
            """)

            qwen25_soft_data = BENCHMARK_DATA["model_results"].get("Qwen2.5-7B-SoftConstraint", {})

            gr.Markdown(f"""
            | Model | Score | Success Rate | Jargon Violations |
            |-------|-------|--------------|-------------------|
            | Qwen2.5-7B (Soft) | {qwen25_soft_data.get('total_score', 920)}/{qwen25_soft_data.get('max_possible', 1200)} ({qwen25_soft_data.get('percentage', 76.7)}%) | **{qwen25_soft_data.get('successful_outputs', 12)}/12** | 1/12 |

            ### The Transformation
            | Metric | Hard Constraints | Soft Constraints |
            |--------|------------------|------------------|
            | Success Rate | 25% (3/12) | **100% (12/12)** |
            | Average Score | ~44% | **76.7%** |
            | Empty Outputs | 9/12 | **0/12** |

            ### What Changed?
            - Particles with jargon no longer die instantly
            - The population gradually evolves toward jargon-free outputs
            - Even if early generations contain jargon, later generations learn to avoid it
            - The one jargon violation ("synchronous") was unavoidable given the topic

            ---
            """)

            # Comparison Browser
            gr.Markdown("## Compare Results Across Models")
            gr.Markdown("Select an example to see how different approaches performed:")

            all_examples = []
            for domain in ["Legal", "Medical", "Financial", "Technical/Engineering"]:
                for concept in BENCHMARK_DATA["claude_opus_benchmarks"][domain].keys():
                    all_examples.append(f"{domain}: {concept[:55]}...")

            example_dropdown = gr.Dropdown(choices=all_examples, value=all_examples[0], label="Select Example")

            first_domain = "Legal"
            first_concept = list(BENCHMARK_DATA["claude_opus_benchmarks"]["Legal"].keys())[0]
            initial_claude = BENCHMARK_DATA["claude_opus_benchmarks"][first_domain][first_concept]["translation"]
            initial_qwen25 = BENCHMARK_DATA["model_results"].get("Qwen2.5-7B-SoftConstraint", {}).get("results", {}).get(first_domain, {}).get(first_concept, {}).get("output", "") or "(Not available)"
            initial_gemma = BENCHMARK_DATA["model_results"]["Gemma-2-2B"]["results"][first_domain][first_concept].get("output", "") or "(Hard constraints killed all particles)"

            with gr.Row():
                with gr.Column():
                    gr.Markdown("**Claude Opus 4.5 (Gold Standard)**")
                    claude_output = gr.Textbox(value=initial_claude, lines=4, interactive=False, show_label=False)
                with gr.Column():
                    gr.Markdown("**Qwen2.5-7B (Soft Constraints)**")
                    qwen25_output = gr.Textbox(value=initial_qwen25, lines=4, interactive=False, show_label=False)

            with gr.Row():
                with gr.Column():
                    gr.Markdown("**Gemma-2-2B (Hard Constraints)**")
                    gemma_output = gr.Textbox(value=initial_gemma, lines=4, interactive=False, show_label=False)
                with gr.Column():
                    gr.Markdown("**TinyLlama-1.1B (Hard Constraints)**")
                    initial_tiny = BENCHMARK_DATA["model_results"]["TinyLlama-1.1B"]["results"][first_domain][first_concept].get("output", "") or "(Hard constraints killed all particles)"
                    tinyllama_output = gr.Textbox(value=initial_tiny, lines=4, interactive=False, show_label=False)

            def update_example_outputs(selection):
                domain = selection.split(":")[0]
                concept_preview = selection.split(": ")[1].replace("...", "")
                for concept in BENCHMARK_DATA["claude_opus_benchmarks"][domain].keys():
                    if concept.startswith(concept_preview.strip()):
                        claude = BENCHMARK_DATA["claude_opus_benchmarks"][domain][concept]["translation"]
                        qwen25 = BENCHMARK_DATA["model_results"].get("Qwen2.5-7B-SoftConstraint", {}).get("results", {}).get(domain, {}).get(concept, {}).get("output", "") or "(Not available)"
                        gemma = BENCHMARK_DATA["model_results"]["Gemma-2-2B"]["results"][domain].get(concept, {}).get("output", "") or "(Hard constraints killed all particles)"
                        tiny = BENCHMARK_DATA["model_results"]["TinyLlama-1.1B"]["results"][domain].get(concept, {}).get("output", "") or "(Hard constraints killed all particles)"
                        return claude, qwen25, gemma, tiny
                return "Not found", "Not found", "Not found", "Not found"

            example_dropdown.change(
                fn=update_example_outputs,
                inputs=[example_dropdown],
                outputs=[claude_output, qwen25_output, gemma_output, tinyllama_output]
            )

            gr.Markdown("---")

            # Key Takeaways
            gr.Markdown("""
            ## Key Takeaways

            ### What We Learned About SMC for Constrained Generation

            1. **Soft constraints dramatically outperform hard constraints**
               - Hard pruning causes particle death when constraints conflict with model priors
               - Weighted resampling allows graceful degradation and recovery

            2. **Penalty factor matters**
               - 0.3 (70% reduction per jargon word) provided good balance
               - Too aggressive (0.1) → still causes particle death
               - Too lenient (0.5) → jargon persists too long

            3. **Model size affects vocabulary diversity**
               - Larger models (7B+) have more alternative phrasings available
               - Smaller models get stuck more easily because they have fewer "escape routes"

            4. **SMC enables strategic generation**
               - Standard greedy generation commits immediately and can't backtrack
               - SMC explores multiple futures and converges on the best path

            ### Broader Implications

            This technique applies beyond jargon filtering:
            - **Content moderation:** Generate text avoiding harmful content
            - **Style transfer:** Guide generation toward specific writing styles
            - **Factual grounding:** Penalize generations that contradict known facts
            - **Length control:** Soft constraints on verbosity

            ---
            *Experiments conducted December 2025. Models tested via HuggingFace Transformers and Ollama.*
            """)

    # Event handlers
    translate_btn.click(
        fn=smc_translate,
        inputs=[concept_input, profession_dropdown, custom_words, model_dropdown, num_particles, max_steps, tokens_per_step, constraint_mode],
        outputs=[smc_output, trace_output, banned_words_display]
    )

if __name__ == "__main__":
    demo.launch(theme=gr.themes.Soft())