Spaces:

AshwinP
/

SequentialMonteCarlo

Sleeping

apingali commited on Dec 26, 2025

Commit

3cd35a6

1 Parent(s): 67bc0e3

Transform space into SMC educational learning resource

- Add Learn SMC tab with infographic and 4-section explanation
- Add soft constraints implementation with weighted resampling
- Rename Analytics to Our Experiments with experimental journey
- Add Qwen2.5-7B benchmark results (76.7% with soft constraints)
- Improve translator tab with clearer problem/solution framing

Files changed (4) hide show

.gitattributes +1 -0
Sequential_monte_carlo.png +3 -0
app.py +492 -283
benchmark_data.json +109 -4

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text

Sequential_monte_carlo.png ADDED Viewed

Git LFS Details

SHA256: 688a5ff60b2b8a5165db73fa7487b6ee78968c9412758a798f6b116adc6b0687
Pointer size: 132 Bytes
Size of remote file: 6.71 MB

app.py CHANGED Viewed

@@ -1,60 +1,63 @@
 """
-The Plain-English Translator 🗣️
-A Sequential Monte Carlo approach to translating professional jargon into plain language.
-This tool helps professionals (lawyers, doctors, engineers, financial advisors) explain
-complex concepts to clients without using industry-specific terminology.
 """
 import torch
 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import random
-import spaces
 import json
 import os
 # Load benchmark data
 BENCHMARK_DATA_PATH = os.path.join(os.path.dirname(__file__), "benchmark_data.json")
 with open(BENCHMARK_DATA_PATH, "r") as f:
     BENCHMARK_DATA = json.load(f)
 # ============================================================================
 # MODEL SETUP
 # ============================================================================
-# Available models - users can select from these
 AVAILABLE_MODELS = {
-    "TinyLlama-1.1B (Open, Fast)": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
-    "Qwen2-0.5B (Open, Fastest)": "Qwen/Qwen2-0.5B-Instruct",
-    "Gemma-2-2B (Gated, Requires HF Login)": "google/gemma-2-2b-it",
 }
-# Cache for loaded models
 loaded_models = {}
 loaded_tokenizers = {}
 def load_model(model_name: str):
-    """
-    Lazy load the model to avoid memory issues during startup.
-    Models are cached after first load.
-    """
     model_id = AVAILABLE_MODELS.get(model_name, "TinyLlama/TinyLlama-1.1B-Chat-v1.0")
     if model_id not in loaded_tokenizers:
         loaded_tokenizers[model_id] = AutoTokenizer.from_pretrained(model_id)
     if model_id not in loaded_models:
         loaded_models[model_id] = AutoModelForCausalLM.from_pretrained(
-            model_id,
-            device_map="auto",
-            torch_dtype=torch.float16
         )
     return loaded_tokenizers[model_id], loaded_models[model_id]
 # ============================================================================
-# JARGON DICTIONARIES BY PROFESSION
 # ============================================================================
 JARGON_DICTIONARIES = {
@@ -89,18 +92,13 @@ JARGON_DICTIONARIES = {
 }
 # ============================================================================
-# SMC CORE LOGIC
 # ============================================================================
 def is_safe(text: str, banned_words: list) -> bool:
-    """
-    Checks if the generated text contains any banned jargon.
-    Returns True if the text is 'safe' (no jargon found).
-    """
     text_lower = text.lower()
     for word in banned_words:
         word_lower = word.lower()
-        # Check for the word as a standalone word with various endings
         if (f" {word_lower} " in f" {text_lower} " or
             f" {word_lower}." in f" {text_lower}" or
             f" {word_lower}," in f" {text_lower}" or
@@ -112,7 +110,6 @@ def is_safe(text: str, banned_words: list) -> bool:
     return True
 def find_jargon_used(text: str, banned_words: list) -> list:
-    """Returns a list of banned words found in the text."""
     text_lower = text.lower()
     found = []
     for word in banned_words:
@@ -127,197 +124,366 @@ def find_jargon_used(text: str, banned_words: list) -> list:
             found.append(word)
     return found
 @spaces.GPU
 def smc_translate(
     concept: str,
     profession: str,
     custom_banned_words: str = "",
-    model_name: str = "TinyLlama-1.1B (Open, Fast)",
     num_particles: int = 5,
     max_steps: int = 20,
-    tokens_per_step: int = 4,
     progress=gr.Progress()
 ) -> tuple:
-    """
-    Sequential Monte Carlo translation with particle filtering.
-    The key insight: Instead of generating text greedily (one token at a time),
-    we maintain multiple 'particles' (candidate generations) and prune any that
-    use forbidden jargon. This forces the model to find alternative phrasings.
-    """
     tokenizer, model_inst = load_model(model_name)
-    # Build banned words list
     banned_words = JARGON_DICTIONARIES.get(profession, []).copy()
     if custom_banned_words.strip():
         custom_list = [w.strip() for w in custom_banned_words.split(",") if w.strip()]
         banned_words.extend(custom_list)
-    # Construct the prompt
     prompt = f"""You are an expert {profession.lower()} professional explaining a concept to a client with no background in your field.
 Rules:
 - Explain as if talking to a curious 10-year-old
 - Use a concrete, relatable real-world example to illustrate the concept
-- Avoid redundancy (don't say "X is Y such as Y")
 - Keep it concise: 2-3 sentences max
 Concept to explain: {concept}
 Simple explanation with example:"""
-    # Initialize particles
     particles = [prompt]
     trace_log = []
-    trace_log.append(f"🚀 Starting SMC Translation")
-    trace_log.append(f"🤖 Model: {model_name}")
-    trace_log.append(f"📋 Concept: {concept}")
-    trace_log.append(f"🚫 Banned words: {len(banned_words)} terms")
-    trace_log.append(f"🔢 Particles: {num_particles}, Max steps: {max_steps}")
-    trace_log.append("-" * 50)
-    for step in progress.tqdm(range(max_steps), desc="Translating"):
         candidates = []
-        # EXPLORE: Expand each particle with multiple continuations
         for particle in particles:
             inputs = tokenizer(particle, return_tensors="pt").to(model_inst.device)
             with torch.no_grad():
                 outputs = model_inst.generate(
                     **inputs,
                     max_new_tokens=tokens_per_step,
                     num_return_sequences=3,
                     do_sample=True,
-                    temperature=0.8,
-                    top_p=0.9,
                     pad_token_id=tokenizer.eos_token_id
                 )
             for out in outputs:
                 decoded = tokenizer.decode(out, skip_special_tokens=True)
                 candidates.append(decoded)
-        # FILTER: Prune paths that contain jargon
-        valid_candidates = []
-        pruned_count = 0
-        for candidate in candidates:
-            if is_safe(candidate, banned_words):
-                valid_candidates.append(candidate)
-            else:
-                pruned_count += 1
-                jargon_found = find_jargon_used(candidate, banned_words)
-                trace_log.append(f"✂️ Step {step+1}: Pruned path using: {jargon_found}")
-        # RESAMPLE: Keep the best valid paths
-        if valid_candidates:
-            # Deduplicate and sample
-            unique_candidates = list(set(valid_candidates))
-            random.shuffle(unique_candidates)
-            particles = unique_candidates[:num_particles]
-            if pruned_count > 0:
-                trace_log.append(f"✅ Step {step+1}: Kept {len(particles)} particles, pruned {pruned_count}")
         else:
-            # All paths used jargon - this is the SMC "particle death" scenario
-            trace_log.append(f"⚠️ Step {step+1}: All {len(candidates)} paths used jargon! Stopping early.")
-            break
-        # Check for natural stopping (end of sentence)
         current_text = particles[0].split("Simple explanation with example:")[-1].strip()
-        if current_text.endswith(('.', '!', '?')) and len(current_text) > 50:
-            trace_log.append(f"🏁 Step {step+1}: Natural stopping point reached.")
             break
-    # Extract the final explanation
-    final_text = particles[0].split("Simple explanation with example:")[-1].strip()
-    # Final jargon check
     final_jargon = find_jargon_used(final_text, banned_words)
     if final_jargon:
-        trace_log.append(f"\n⚠️ Warning: Final output still contains: {final_jargon}")
     else:
-        trace_log.append(f"\n✨ Success! No jargon detected in final output.")
-    trace_output = "\n".join(trace_log)
-    return final_text, trace_output, ", ".join(banned_words)
-def greedy_baseline(concept: str, profession: str) -> str:
-    """
-    Standard greedy generation for comparison.
-    Shows how a normal LLM would respond (likely with jargon).
-    """
-    tokenizer, model_inst = load_model()
-    prompt = f"""You are an expert {profession.lower()} professional who needs to explain a concept to a client who has no background in your field. Explain it as if talking to a curious 10-year-old.
-Concept to explain: {concept}
-Simple explanation:"""
-    inputs = tokenizer(prompt, return_tensors="pt").to(model_inst.device)
-    with torch.no_grad():
-        outputs = model_inst.generate(
-            **inputs,
-            max_new_tokens=150,
-            do_sample=True,
-            temperature=0.7,
-            pad_token_id=tokenizer.eos_token_id
-        )
-    decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    return decoded.split("Simple explanation:")[-1].strip()
 # ============================================================================
-# GRADIO INTERFACE
 # ============================================================================
 EXAMPLES = [
-    # Legal examples
     ["Force Majeure clause and why it might void our contract", "Legal", ""],
     ["Why we need to add an indemnification clause to protect your business", "Legal", ""],
-    ["What happens if the other party breaches the non-compete agreement", "Legal", ""],
-    # Medical examples
     ["Your MRI shows a benign lesion that we should monitor", "Medical", ""],
     ["The etiology of your chronic fatigue syndrome", "Medical", ""],
-    ["Why we're recommending prophylactic treatment given your comorbidities", "Medical", ""],
-    # Financial examples
     ["How compound interest and amortization affect your mortgage payments", "Financial", ""],
     ["Why we recommend diversifying your portfolio with low-liquidity assets", "Financial", ""],
-    ["The tax implications of depreciation on your rental property", "Financial", ""],
-    # Technical examples
     ["Why our API has high latency and how microservices could help", "Technical/Engineering", ""],
     ["The difference between synchronous and asynchronous processing", "Technical/Engineering", ""],
-    ["Why we need to refactor the legacy codebase before adding new features", "Technical/Engineering", ""],
 ]
 # ============================================================================
 # GRADIO INTERFACE
 # ============================================================================
-with gr.Blocks(title="The Plain-English Translator") as demo:
     gr.Markdown("""
-    # 🗣️ The Plain-English Translator
-    ### Breaking the Curse of Knowledge with Sequential Monte Carlo
     """)
     with gr.Tabs():
-        # ==================== TRANSLATOR TAB ====================
-        with gr.TabItem("🔄 Translator"):
             gr.Markdown("""
-            **The Problem:** Experts often struggle to explain complex concepts without using jargon.
-            A standard AI will naturally use technical terms because they're statistically probable.
-            **The Solution:** Sequential Monte Carlo (SMC) particle filtering. Instead of greedy generation,
-            we maintain multiple candidate explanations and **prune any path that uses forbidden jargon**.
-            This forces the model to find alternative, plain-language phrasings.
             ---
             """)
@@ -333,255 +499,298 @@ with gr.Blocks(title="The Plain-English Translator") as demo:
                     profession_dropdown = gr.Dropdown(
                         choices=["Legal", "Medical", "Financial", "Technical/Engineering"],
                         value="Legal",
-                        label="Professional Domain"
                     )
                     custom_words = gr.Textbox(
-                        label="Additional Banned Words (comma-separated, optional)",
                         placeholder="e.g., contract, clause, party",
                         lines=1
                     )
                     model_dropdown = gr.Dropdown(
                         choices=list(AVAILABLE_MODELS.keys()),
-                        value="TinyLlama-1.1B (Open, Fast)",
-                        label="Model",
-                        info="Gemma requires HF authentication (huggingface-cli login)"
                     )
                     with gr.Row():
                         num_particles = gr.Slider(
                             minimum=2, maximum=10, value=5, step=1,
-                            label="Number of Particles",
-                            info="More particles = more diverse exploration, but slower"
                         )
                         max_steps = gr.Slider(
-                            minimum=10, maximum=40, value=20, step=5,
-                            label="Max Generation Steps",
-                            info="Maximum SMC iterations"
                         )
-                    translate_btn = gr.Button("🔄 Translate to Plain English", variant="primary", size="lg")
                 with gr.Column(scale=1):
                     gr.Markdown("""
-                    ### How SMC Works Here
-                    1. **Initialize**: Start with multiple 'particles' (candidate texts)
-                    2. **Expand**: Generate a few tokens for each particle
-                    3. **Filter**: Prune any particle that uses banned jargon
-                    4. **Resample**: Keep the surviving particles and repeat
-                    This mimics how SMC works in statistics: maintaining a population
-                    of hypotheses and reweighting based on evidence (here: jargon-free).
                     """)
             gr.Markdown("---")
-            with gr.Row():
-                with gr.Column():
-                    gr.Markdown("### ✅ SMC Plain-English Output")
-                    smc_output = gr.Textbox(
-                        label="",
-                        lines=8,
-                        show_label=False
-                    )
-            with gr.Accordion("🔍 SMC Trace Log (See the pruning in action)", open=False):
-                trace_output = gr.Textbox(
-                    label="",
-                    lines=15,
-                    show_label=False
-                )
-            with gr.Accordion("📋 Banned Words Used", open=False):
-                banned_words_display = gr.Textbox(
-                    label="",
-                    lines=3,
-                    show_label=False
-                )
             gr.Markdown("---")
-            gr.Markdown("### 📚 Example Scenarios")
-            gr.Examples(
-                examples=EXAMPLES,
-                inputs=[concept_input, profession_dropdown, custom_words],
-                label=""
-            )
             gr.Markdown("""
             ---
-            *Built with 🤗 Transformers and Gradio*
             """)
-        # ==================== ANALYTICS TAB ====================
-        with gr.TabItem("📊 Analytics"):
             gr.Markdown("""
-            ## SMC Benchmark Results
-            We tested 3 models against **Claude Opus 4.5** benchmark translations across 12 professional scenarios
-            (3 Legal, 3 Medical, 3 Financial, 3 Technical). Each output was scored on:
-            - **Jargon-Free (25 pts)**: No banned terminology used
-            - **Has Example (25 pts)**: Uses relatable analogy
-            - **Appropriate Length (25 pts)**: 20-100 words
-            - **Coherence (25 pts)**: Proper sentence structure
             ---
             """)
-            # Overall Scores Section
-            gr.Markdown("### 🏆 Overall Model Performance")
-            # Build scores table from loaded data
             gemma_data = BENCHMARK_DATA["model_results"]["Gemma-2-2B"]
             tinyllama_data = BENCHMARK_DATA["model_results"]["TinyLlama-1.1B"]
             qwen_data = BENCHMARK_DATA["model_results"]["Qwen2-0.5B"]
-            with gr.Row():
-                with gr.Column():
-                    gr.Markdown(f"""
-                    | Model | Score | Percentage | Grade |
-                    |-------|-------|------------|-------|
-                    | **Claude Opus 4.5** (Benchmark) | 1200/1200 | 100% | A+ |
-                    | **Gemma-2-2B** | {gemma_data['total_score']}/{gemma_data['max_possible']} | {gemma_data['percentage']}% | C |
-                    | **TinyLlama-1.1B** | {tinyllama_data['total_score']}/{tinyllama_data['max_possible']} | {tinyllama_data['percentage']}% | C |
-                    | **Qwen2-0.5B** | {qwen_data['total_score']}/{qwen_data['max_possible']} | {qwen_data['percentage']}% | C- |
-                    """)
-            gr.Markdown("---")
-            # Key Finding with domain data
-            domain_data = BENCHMARK_DATA["domain_summary"]
-            gr.Markdown(f"""
-            ### ⚠️ Key Finding: SMC Constraint Strictness
-            **9 out of 12 examples produced empty outputs** across all SMC models.
-            The SMC algorithm successfully avoided jargon (✅ **zero jargon violations**),
-            but it pruned ALL generation paths for most non-Legal domains because those technical
-            terms are deeply embedded in model weights.
-            | Domain | Success Rate | Notes |
-            |--------|--------------|-------|
-            | **Legal** | {domain_data['Legal']['success_rate']}% ({domain_data['Legal']['successful']}/{domain_data['Legal']['total_tests']}) | Best performance - more paraphrase options |
-            | **Medical** | {domain_data['Medical']['success_rate']}% ({domain_data['Medical']['successful']}/{domain_data['Medical']['total_tests']}) | Terms like "benign", "lesion" unavoidable |
-            | **Financial** | {domain_data['Financial']['success_rate']}% ({domain_data['Financial']['successful']}/{domain_data['Financial']['total_tests']}) | Terms like "compound", "portfolio" unavoidable |
-            | **Technical** | {domain_data['Technical/Engineering']['success_rate']}% ({domain_data['Technical/Engineering']['successful']}/{domain_data['Technical/Engineering']['total_tests']}) | Terms like "API", "latency" unavoidable |
             ---
             """)
-            # Interactive Example Browser
-            gr.Markdown("### 🔍 Browse All Benchmark Results")
-            # Build example choices from data
             all_examples = []
             for domain in ["Legal", "Medical", "Financial", "Technical/Engineering"]:
                 for concept in BENCHMARK_DATA["claude_opus_benchmarks"][domain].keys():
-                    all_examples.append(f"{domain}: {concept[:60]}...")
-            example_dropdown = gr.Dropdown(
-                choices=all_examples,
-                value=all_examples[0],
-                label="Select Example to Compare"
-            )
-            # Get initial values for the first example
             first_domain = "Legal"
             first_concept = list(BENCHMARK_DATA["claude_opus_benchmarks"]["Legal"].keys())[0]
             initial_claude = BENCHMARK_DATA["claude_opus_benchmarks"][first_domain][first_concept]["translation"]
-            initial_gemma = BENCHMARK_DATA["model_results"]["Gemma-2-2B"]["results"][first_domain][first_concept].get("output", "") or "(SMC pruned all paths)"
-            initial_tiny = BENCHMARK_DATA["model_results"]["TinyLlama-1.1B"]["results"][first_domain][first_concept].get("output", "") or "(SMC pruned all paths)"
-            initial_qwen = BENCHMARK_DATA["model_results"]["Qwen2-0.5B"]["results"][first_domain][first_concept].get("output", "") or "(SMC pruned all paths)"
             with gr.Row():
                 with gr.Column():
-                    gr.Markdown("**Claude Opus 4.5 (Benchmark)**")
-                    claude_output = gr.Textbox(value=initial_claude, lines=5, interactive=False, show_label=False)
                 with gr.Column():
-                    gr.Markdown("**Gemma-2-2B**")
-                    gemma_output = gr.Textbox(value=initial_gemma, lines=5, interactive=False, show_label=False)
             with gr.Row():
                 with gr.Column():
-                    gr.Markdown("**TinyLlama-1.1B**")
-                    tinyllama_output = gr.Textbox(value=initial_tiny, lines=5, interactive=False, show_label=False)
                 with gr.Column():
-                    gr.Markdown("**Qwen2-0.5B**")
-                    qwen_output = gr.Textbox(value=initial_qwen, lines=5, interactive=False, show_label=False)
             def update_example_outputs(selection):
-                # Parse selection to get domain and concept
                 domain = selection.split(":")[0]
                 concept_preview = selection.split(": ")[1].replace("...", "")
-                # Find matching concept
                 for concept in BENCHMARK_DATA["claude_opus_benchmarks"][domain].keys():
                     if concept.startswith(concept_preview.strip()):
                         claude = BENCHMARK_DATA["claude_opus_benchmarks"][domain][concept]["translation"]
-                        gemma_result = BENCHMARK_DATA["model_results"]["Gemma-2-2B"]["results"][domain].get(concept, {})
-                        gemma = gemma_result.get("output", "") or "(SMC pruned all paths)"
-                        tiny_result = BENCHMARK_DATA["model_results"]["TinyLlama-1.1B"]["results"][domain].get(concept, {})
-                        tiny = tiny_result.get("output", "") or "(SMC pruned all paths)"
-                        qwen_result = BENCHMARK_DATA["model_results"]["Qwen2-0.5B"]["results"][domain].get(concept, {})
-                        qwen = qwen_result.get("output", "") or "(SMC pruned all paths)"
-                        return claude, gemma, tiny, qwen
                 return "Not found", "Not found", "Not found", "Not found"
             example_dropdown.change(
                 fn=update_example_outputs,
                 inputs=[example_dropdown],
-                outputs=[claude_output, gemma_output, tinyllama_output, qwen_output]
             )
             gr.Markdown("---")
-            # All Claude Benchmarks Section
-            gr.Markdown("### 📖 Complete Claude Opus 4.5 Benchmark Translations")
-            for domain in ["Legal", "Medical", "Financial", "Technical/Engineering"]:
-                with gr.Accordion(f"📁 {domain} ({len(BENCHMARK_DATA['claude_opus_benchmarks'][domain])} examples)", open=False):
-                    for concept, data in BENCHMARK_DATA["claude_opus_benchmarks"][domain].items():
-                        gr.Markdown(f"**{concept}**")
-                        gr.Textbox(value=data["translation"], lines=3, interactive=False, show_label=False)
-            gr.Markdown("---")
-            # Insights
-            gr.Markdown("""
-            ### 💡 Insights
-            **What Worked:**
-            - ✅ **Zero jargon violations** - SMC successfully filtered all banned terms
-            - ✅ **Gemma-2-2B produced the most creative analogies** (treehouse, lemonade stand)
-            - ✅ **Legal domain had best success** - more paraphrase flexibility
-            **Challenges:**
-            - ❌ **Aggressive pruning** - 75% of examples couldn't complete
-            - ❌ **Domain-specific vocabulary** is deeply embedded in model weights
-            - ❌ **Smaller models** have less vocabulary diversity for alternatives
-            **Recommendations:**
-            1. Use **softer constraints** (penalize vs. hard prune)
-            2. **Reduce banned word lists** for demonstrations
-            3. Consider **larger models** (7B+) for more vocabulary diversity
-            4. Implement **backoff strategies** when all particles die
             ---
-            *Benchmark conducted with num_particles=5, max_steps=25, tokens_per_step=6*
             """)
-    # Event handlers (outside tabs but inside demo block)
     translate_btn.click(
         fn=smc_translate,
-        inputs=[concept_input, profession_dropdown, custom_words, model_dropdown, num_particles, max_steps],
         outputs=[smc_output, trace_output, banned_words_display]
     )

 """
+Learning Sequential Monte Carlo (SMC) Through the Plain-English Translator
+An interactive educational space that teaches Sequential Monte Carlo methods
+using a practical application: helping professionals explain complex concepts
+without using industry jargon.
 """
 import torch
 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import random
 import json
 import os
+import math
+# Mock spaces module for local development (only needed on HuggingFace Spaces)
+try:
+    import spaces
+except ImportError:
+    class spaces:
+        @staticmethod
+        def GPU(func):
+            return func
 # Load benchmark data
 BENCHMARK_DATA_PATH = os.path.join(os.path.dirname(__file__), "benchmark_data.json")
 with open(BENCHMARK_DATA_PATH, "r") as f:
     BENCHMARK_DATA = json.load(f)
+# Path to infographic
+INFOGRAPHIC_PATH = os.path.join(os.path.dirname(__file__), "Sequential_monte_carlo.png")
 # ============================================================================
 # MODEL SETUP
 # ============================================================================
 AVAILABLE_MODELS = {
+    "TinyLlama-1.1B (Fast)": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+    "Qwen2-0.5B (Fastest)": "Qwen/Qwen2-0.5B-Instruct",
+    "Qwen2.5-7B (Best Quality)": "Qwen/Qwen2.5-7B-Instruct",
+    "Qwen3-8B (Latest)": "Qwen/Qwen3-8B",
+    "Gemma-2-2B (Requires HF Login)": "google/gemma-2-2b-it",
 }
 loaded_models = {}
 loaded_tokenizers = {}
 def load_model(model_name: str):
     model_id = AVAILABLE_MODELS.get(model_name, "TinyLlama/TinyLlama-1.1B-Chat-v1.0")
     if model_id not in loaded_tokenizers:
         loaded_tokenizers[model_id] = AutoTokenizer.from_pretrained(model_id)
     if model_id not in loaded_models:
         loaded_models[model_id] = AutoModelForCausalLM.from_pretrained(
+            model_id, device_map="auto", torch_dtype=torch.float16
         )
     return loaded_tokenizers[model_id], loaded_models[model_id]
 # ============================================================================
+# JARGON DICTIONARIES
 # ============================================================================
 JARGON_DICTIONARIES = {
 }
 # ============================================================================
+# SMC CORE FUNCTIONS
 # ============================================================================
 def is_safe(text: str, banned_words: list) -> bool:
     text_lower = text.lower()
     for word in banned_words:
         word_lower = word.lower()
         if (f" {word_lower} " in f" {text_lower} " or
             f" {word_lower}." in f" {text_lower}" or
             f" {word_lower}," in f" {text_lower}" or
     return True
 def find_jargon_used(text: str, banned_words: list) -> list:
     text_lower = text.lower()
     found = []
     for word in banned_words:
             found.append(word)
     return found
+def count_jargon(text: str, banned_words: list) -> int:
+    return len(find_jargon_used(text, banned_words))
+def compute_weight(text: str, banned_words: list, penalty_factor: float = 0.3) -> float:
+    jargon_count = count_jargon(text, banned_words)
+    return math.pow(penalty_factor, jargon_count)
+def weighted_resample(particles: list, weights: list, num_samples: int) -> list:
+    if not particles or not weights:
+        return []
+    total_weight = sum(weights)
+    if total_weight == 0:
+        probs = [1.0 / len(particles)] * len(particles)
+    else:
+        probs = [w / total_weight for w in weights]
+    resampled = random.choices(particles, weights=probs, k=num_samples)
+    unique = list(dict.fromkeys(resampled))
+    return unique[:num_samples]
 @spaces.GPU
 def smc_translate(
     concept: str,
     profession: str,
     custom_banned_words: str = "",
+    model_name: str = "TinyLlama-1.1B (Fast)",
     num_particles: int = 5,
     max_steps: int = 20,
+    tokens_per_step: int = 15,
+    constraint_mode: str = "Soft (Penalize)",
     progress=gr.Progress()
 ) -> tuple:
     tokenizer, model_inst = load_model(model_name)
+    use_soft_constraints = "Soft" in constraint_mode
     banned_words = JARGON_DICTIONARIES.get(profession, []).copy()
     if custom_banned_words.strip():
         custom_list = [w.strip() for w in custom_banned_words.split(",") if w.strip()]
         banned_words.extend(custom_list)
     prompt = f"""You are an expert {profession.lower()} professional explaining a concept to a client with no background in your field.
 Rules:
 - Explain as if talking to a curious 10-year-old
 - Use a concrete, relatable real-world example to illustrate the concept
+- Avoid technical jargon - use everyday words instead
 - Keep it concise: 2-3 sentences max
 Concept to explain: {concept}
 Simple explanation with example:"""
     particles = [prompt]
     trace_log = []
+    trace_log.append(f"{'='*60}")
+    trace_log.append(f"SMC PLAIN-ENGLISH TRANSLATOR - TRACE LOG")
+    trace_log.append(f"{'='*60}")
+    trace_log.append(f"Model: {model_name}")
+    trace_log.append(f"Constraint Mode: {constraint_mode}")
+    trace_log.append(f"Concept: {concept}")
+    trace_log.append(f"Domain: {profession}")
+    trace_log.append(f"Banned words: {len(banned_words)} terms")
+    trace_log.append(f"Particles: {num_particles} | Steps: {max_steps} | Tokens/step: {tokens_per_step}")
+    trace_log.append(f"{'='*60}")
+    trace_log.append("")
+    for step in progress.tqdm(range(max_steps), desc="SMC Iteration"):
         candidates = []
+        # STEP 1: EXPLORE - Generate multiple continuations
         for particle in particles:
             inputs = tokenizer(particle, return_tensors="pt").to(model_inst.device)
             with torch.no_grad():
                 outputs = model_inst.generate(
                     **inputs,
                     max_new_tokens=tokens_per_step,
                     num_return_sequences=3,
                     do_sample=True,
+                    temperature=0.9 if use_soft_constraints else 0.8,
+                    top_p=0.95 if use_soft_constraints else 0.9,
                     pad_token_id=tokenizer.eos_token_id
                 )
             for out in outputs:
                 decoded = tokenizer.decode(out, skip_special_tokens=True)
                 candidates.append(decoded)
+        if not candidates:
+            trace_log.append(f"Step {step+1}: No candidates generated - stopping")
+            break
+        # STEP 2: FILTER/WEIGHT - Apply constraints
+        if use_soft_constraints:
+            weights = [compute_weight(c, banned_words, penalty_factor=0.3) for c in candidates]
+            jargon_counts = [count_jargon(c, banned_words) for c in candidates]
+            clean_count = sum(1 for c in jargon_counts if c == 0)
+            trace_log.append(f"Step {step+1}: {len(candidates)} particles explored")
+            trace_log.append(f"         {clean_count} jargon-free | Weights: [{min(weights):.2f} - {max(weights):.2f}]")
+            # STEP 3: RESAMPLE - Weighted selection
+            particles = weighted_resample(candidates, weights, num_particles)
+            if not particles:
+                trace_log.append(f"         Resampling failed - stopping")
+                break
+            trace_log.append(f"         Resampled to {len(particles)} particles")
         else:
+            valid_candidates = []
+            pruned_count = 0
+            for candidate in candidates:
+                if is_safe(candidate, banned_words):
+                    valid_candidates.append(candidate)
+                else:
+                    pruned_count += 1
+            trace_log.append(f"Step {step+1}: {len(candidates)} particles explored")
+            trace_log.append(f"         {len(valid_candidates)} survived | {pruned_count} pruned (contained jargon)")
+            if valid_candidates:
+                unique_candidates = list(set(valid_candidates))
+                random.shuffle(unique_candidates)
+                particles = unique_candidates[:num_particles]
+            else:
+                trace_log.append(f"         ALL PARTICLES DIED - jargon unavoidable!")
+                break
+        # Check for completion
         current_text = particles[0].split("Simple explanation with example:")[-1].strip()
+        if current_text.endswith(('.', '!', '?')) and len(current_text) > 80:
+            trace_log.append(f"\nNatural completion reached at step {step+1}")
             break
+    trace_log.append("")
+    trace_log.append(f"{'='*60}")
+    # Get best result
+    if particles:
+        if use_soft_constraints:
+            best_idx = 0
+            best_jargon_count = float('inf')
+            for i, p in enumerate(particles):
+                jc = count_jargon(p, banned_words)
+                if jc < best_jargon_count:
+                    best_jargon_count = jc
+                    best_idx = i
+            final_text = particles[best_idx].split("Simple explanation with example:")[-1].strip()
+        else:
+            final_text = particles[0].split("Simple explanation with example:")[-1].strip()
+    else:
+        final_text = "(All generation paths used jargon - try soft constraints!)"
     final_jargon = find_jargon_used(final_text, banned_words)
     if final_jargon:
+        trace_log.append(f"RESULT: Contains jargon: {final_jargon}")
     else:
+        trace_log.append(f"RESULT: Jargon-free output achieved!")
+    trace_log.append(f"{'='*60}")
+    return final_text, "\n".join(trace_log), ", ".join(banned_words)
 # ============================================================================
+# EXAMPLES
 # ============================================================================
 EXAMPLES = [
     ["Force Majeure clause and why it might void our contract", "Legal", ""],
     ["Why we need to add an indemnification clause to protect your business", "Legal", ""],
     ["Your MRI shows a benign lesion that we should monitor", "Medical", ""],
     ["The etiology of your chronic fatigue syndrome", "Medical", ""],
     ["How compound interest and amortization affect your mortgage payments", "Financial", ""],
     ["Why we recommend diversifying your portfolio with low-liquidity assets", "Financial", ""],
     ["Why our API has high latency and how microservices could help", "Technical/Engineering", ""],
     ["The difference between synchronous and asynchronous processing", "Technical/Engineering", ""],
 ]
 # ============================================================================
 # GRADIO INTERFACE
 # ============================================================================
+with gr.Blocks(title="Learn SMC: The Plain-English Translator") as demo:
+    # ==================== HEADER ====================
     gr.Markdown("""
+    # Learning Sequential Monte Carlo (SMC)
+    ## An Interactive Guide Using the Plain-English Translator
+    Welcome! This space teaches you about **Sequential Monte Carlo** methods through a practical application:
+    helping professionals explain complex concepts without using jargon.
+    Navigate through the tabs to learn the theory, try the interactive demo, and see our experimental results.
     """)
     with gr.Tabs():
+        # ==================== TAB 1: LEARN SMC ====================
+        with gr.TabItem("1. Learn SMC"):
             gr.Markdown("""
+            # Understanding Sequential Monte Carlo
+            Sequential Monte Carlo (SMC) is a powerful technique for solving problems where you need to
+            navigate through a space of possibilities while satisfying constraints. Let's understand it
+            through both theory and our practical application.
+            """)
+            # Infographic
+            gr.Markdown("## The Big Picture")
+            gr.Image(INFOGRAPHIC_PATH, label="How AI Learns to See the Future: An Introduction to SMC", show_label=True)
+            gr.Markdown("---")
+            # Section 1: The Problem
+            gr.Markdown("""
+            ## 1. The Problem: Standard AI's "Greedy" Trap
+            ### What's Wrong with Normal Text Generation?
+            Most AI language models work **greedily** - they pick the best next word based on immediate probability,
+            without considering long-term consequences. This creates a fundamental problem:
+            **The Greedy Trap:**
+            - The model chooses what seems best *right now*
+            - It can't "see" that this choice leads to a dead end
+            - Once committed, it can't backtrack
+            ### Our Example: The Curse of Knowledge
+            When a lawyer tries to explain "Force Majeure" to a client, a standard AI naturally reaches for
+            legal terminology because those words are statistically most likely in that context:
+            ```
+            Standard AI: "Force Majeure is a contractual provision that excuses liability
+                         when extraordinary circumstances prevent fulfillment..."
+            ```
+            The AI picked "liability," "contractual," and "provision" because they're the most probable
+            next words - but now it's stuck using jargon the client won't understand!
+            **This is like choosing the path in a maze that looks shortest, only to hit a dead end.**
+            """)
+            gr.Markdown("---")
+            # Section 2: The Breakthrough
+            gr.Markdown("""
+            ## 2. The Breakthrough: Introducing SMC
+            ### The Key Insight: Explore Multiple Futures Simultaneously
+            Instead of committing to one path, SMC maintains **thousands of "particles"** - each representing
+            a different possible future. Think of it as sending out scouts in every direction.
+            ### How It Works in Our Translator:
+            ```
+            Standard AI:  One path → "Force Majeure is a contractual..." → STUCK WITH JARGON
+            SMC Approach: Path A → "Imagine you promised your friend..."     ✓ Keep exploring
+                         Path B → "This is a liability clause..."           ✗ Contains jargon
+                         Path C → "Think of it like a 'nobody's fault'..."  ✓ Keep exploring
+                         Path D → "The contractual provision states..."     ✗ Contains jargon
+                         Path E → "It's like when a big storm..."           ✓ Keep exploring
+            ```
+            **We explore multiple possibilities in parallel, keeping the promising ones and discarding the rest.**
+            """)
+            gr.Markdown("---")
+            # Section 3: The Process
+            gr.Markdown("""
+            ## 3. The Process: How SMC Finds the Optimal Path
+            SMC follows a three-step cycle that repeats until we reach our goal:
+            ### Step 1: EXPLORE (Expand)
+            Each surviving particle generates multiple possible continuations.
+            If we have 5 particles and each generates 3 continuations, we now have 15 candidates.
+            ### Step 2: FILTER (Evaluate)
+            We evaluate each candidate against our constraint (no jargon).
+            This is "survival of the fittest" - unpromising paths fade out.
+            **Two Filtering Strategies:**
+            | Strategy | How It Works | Pros | Cons |
+            |----------|--------------|------|------|
+            | **Hard Constraints** | Completely eliminate any particle with jargon | Guarantees jargon-free output | Can kill ALL particles if jargon is unavoidable |
+            | **Soft Constraints** | Reduce weight of particles with jargon (but let them survive) | More robust, allows gradual steering | May have occasional jargon slip through |
+            ### Step 3: RESAMPLE (Select)
+            We select particles for the next round based on their fitness:
+            - **Hard mode:** Random selection from survivors
+            - **Soft mode:** Weighted random selection (better particles more likely to be chosen)
+            ### The Math Behind Soft Constraints:
+            ```
+            Weight = 0.3 ^ (number of jargon words)
+            0 jargon words → Weight = 1.0   (100% chance)
+            1 jargon word  → Weight = 0.3   (30% chance)
+            2 jargon words → Weight = 0.09  (9% chance)
+            3 jargon words → Weight = 0.027 (2.7% chance)
+            ```
+            """)
+            gr.Markdown("---")
+            # Section 4: The Impact
+            gr.Markdown("""
+            ## 4. The Impact: From Prediction to Strategy
+            SMC transforms AI from a **reactive predictor** to a **strategic planner**.
+            ### What This Means for Our Translator:
+            | Approach | Can Plan Ahead? | Handles Constraints? | Success Rate |
+            |----------|-----------------|---------------------|--------------|
+            | Standard Greedy | No - commits immediately | No - uses probable words | N/A (always uses jargon) |
+            | SMC Hard | Yes - explores multiple paths | Yes - prunes violations | 25% (particles often die) |
+            | SMC Soft | Yes - explores multiple paths | Yes - penalizes violations | **100%** |
+            ### Beyond Translation: Where Else Is SMC Used?
+            - **Robotics:** Planning movements while avoiding obstacles
+            - **Autonomous Vehicles:** Predicting traffic and planning routes
+            - **Finance:** Portfolio optimization with risk constraints
+            - **Drug Discovery:** Exploring molecular structures with safety constraints
+            ### The Fundamental Shift:
+            > *"If your AI could plan 10 steps ahead instead of 1, what impossible problem would you have it solve first?"*
+            SMC represents moving from **simple prediction** to **true strategic foresight**.
+            """)
+            gr.Markdown("---")
+            # Connection to Next Tab
+            gr.Markdown("""
+            ## Ready to Try It Yourself?
+            Now that you understand how SMC works, head to the **"2. Try It: Translator"** tab
+            to see it in action! You can:
+            - Watch particles explore and get filtered in real-time
+            - Compare hard vs soft constraints
+            - Try different professional domains (Legal, Medical, Financial, Technical)
+            """)
+        # ==================== TAB 2: TRY IT ====================
+        with gr.TabItem("2. Try It: Translator"):
+            gr.Markdown("""
+            # The Plain-English Translator
+            ## The Problem We're Solving
+            **The Curse of Knowledge:** Experts often struggle to explain concepts without jargon.
+            A standard AI naturally uses technical terms because they're statistically probable.
+            **Our Solution:** Use SMC to explore multiple explanations simultaneously,
+            filtering out any path that uses forbidden terminology. This forces the model
+            to find creative, plain-language alternatives.
             ---
             """)
                     profession_dropdown = gr.Dropdown(
                         choices=["Legal", "Medical", "Financial", "Technical/Engineering"],
                         value="Legal",
+                        label="Professional Domain",
+                        info="Each domain has its own set of banned jargon terms"
                     )
                     custom_words = gr.Textbox(
+                        label="Additional Banned Words (optional)",
                         placeholder="e.g., contract, clause, party",
                         lines=1
                     )
                     model_dropdown = gr.Dropdown(
                         choices=list(AVAILABLE_MODELS.keys()),
+                        value="TinyLlama-1.1B (Fast)",
+                        label="Model"
+                    )
+                    constraint_mode = gr.Radio(
+                        choices=["Hard (Prune)", "Soft (Penalize)"],
+                        value="Soft (Penalize)",
+                        label="Constraint Mode",
+                        info="Soft constraints are more robust - see the Learn tab for explanation"
                     )
                     with gr.Row():
                         num_particles = gr.Slider(
                             minimum=2, maximum=10, value=5, step=1,
+                            label="Particles",
+                            info="More = more exploration"
                         )
                         max_steps = gr.Slider(
+                            minimum=5, maximum=30, value=15, step=5,
+                            label="Max Steps",
+                            info="SMC iterations"
+                        )
+                        tokens_per_step = gr.Slider(
+                            minimum=5, maximum=30, value=15, step=5,
+                            label="Tokens/Step",
+                            info="Generation length per iteration"
                         )
+                    translate_btn = gr.Button("Translate to Plain English", variant="primary", size="lg")
                 with gr.Column(scale=1):
                     gr.Markdown("""
+                    ### SMC in Action
+                    When you click translate, watch the trace log to see:
+                    1. **Particles explored** - Multiple paths generated
+                    2. **Filtering** - Jargon paths penalized/pruned
+                    3. **Resampling** - Best particles selected
+                    4. **Convergence** - Final jargon-free output
+                    **Tip:** Try the same concept with Hard vs Soft constraints
+                    to see the difference!
                     """)
             gr.Markdown("---")
+            gr.Markdown("### Output")
+            smc_output = gr.Textbox(label="Plain-English Explanation", lines=5, show_label=True)
+            with gr.Accordion("SMC Trace Log (See the algorithm in action)", open=True):
+                trace_output = gr.Textbox(label="", lines=20, show_label=False)
+            with gr.Accordion("Banned Words for This Domain", open=False):
+                banned_words_display = gr.Textbox(label="", lines=3, show_label=False)
             gr.Markdown("---")
+            gr.Markdown("### Example Scenarios")
+            gr.Examples(examples=EXAMPLES, inputs=[concept_input, profession_dropdown, custom_words], label="")
+        # ==================== TAB 3: EXPERIMENTS ====================
+        with gr.TabItem("3. Our Experiments"):
             gr.Markdown("""
+            # What We Learned: An Experimental Journey
+            This tab documents our experimental journey in applying SMC to constrained text generation.
+            We tested multiple approaches and models to understand what works and what doesn't.
             ---
             """)
             gr.Markdown("""
+            ## The Experimental Setup
+            ### Goal
+            Generate plain-English explanations of professional concepts (Legal, Medical, Financial, Technical)
+            that a 10-year-old could understand - **without using any domain-specific jargon**.
+            ### Benchmark
+            We created 12 test cases (3 per domain) with gold-standard translations from Claude Opus 4.5.
+            Each output was scored on:
+            | Criterion | Points | Description |
+            |-----------|--------|-------------|
+            | Jargon-Free | 25 | No banned terminology used |
+            | Has Example | 25 | Uses relatable analogy |
+            | Appropriate Length | 25 | 20-100 words |
+            | Coherence | 25 | Proper sentence structure |
             ---
             """)
+            # Experiment 1: Hard Constraints
+            gr.Markdown("""
+            ## Experiment 1: Hard Constraints (Prune All Jargon)
+            ### Hypothesis
+            If we completely eliminate any generation path containing jargon, the model will be forced
+            to find jargon-free alternatives.
+            ### Setup
+            - Models: TinyLlama-1.1B, Qwen2-0.5B, Gemma-2-2B
+            - Parameters: 5 particles, 25 max steps, 6 tokens per step
+            - Constraint: **Hard** - any particle with jargon is immediately pruned
+            ### Results
+            """)
+            # Build data from benchmark
             gemma_data = BENCHMARK_DATA["model_results"]["Gemma-2-2B"]
             tinyllama_data = BENCHMARK_DATA["model_results"]["TinyLlama-1.1B"]
             qwen_data = BENCHMARK_DATA["model_results"]["Qwen2-0.5B"]
+            gr.Markdown(f"""
+            | Model | Score | Success Rate | Outcome |
+            |-------|-------|--------------|---------|
+            | Gemma-2-2B | {gemma_data['total_score']}/{gemma_data['max_possible']} ({gemma_data['percentage']}%) | {gemma_data.get('successful_outputs', 3)}/12 | 9 empty outputs |
+            | TinyLlama-1.1B | {tinyllama_data['total_score']}/{tinyllama_data['max_possible']} ({tinyllama_data['percentage']}%) | {tinyllama_data.get('successful_outputs', 3)}/12 | 9 empty outputs |
+            | Qwen2-0.5B | {qwen_data['total_score']}/{qwen_data['max_possible']} ({qwen_data['percentage']}%) | {qwen_data.get('successful_outputs', 2)}/12 | 10 empty outputs |
+            ### What Happened?
+            **75% of test cases produced empty outputs!**
+            The problem: When explaining medical concepts, the model naturally reaches for words like
+            "benign," "lesion," and "diagnosis." With hard constraints, EVERY generation path
+            contained at least one banned word, causing **total particle death**.
+            ### Key Learning
+            Hard constraints are too aggressive. Domain-specific vocabulary is so deeply embedded
+            in model weights that it's nearly impossible to avoid entirely through pruning alone.
+            ---
+            """)
+            # Experiment 2: Soft Constraints
+            gr.Markdown("""
+            ## Experiment 2: Soft Constraints (Weighted Resampling)
+            ### Hypothesis
+            Instead of killing particles with jargon, we should **penalize** them with lower weights.
+            This allows gradual steering toward jargon-free outputs while preventing particle death.
+            ### The Key Insight
+            ```
+            Weight = penalty_factor ^ (jargon_count)
+            With penalty_factor = 0.3:
+            - 0 jargon words → weight = 1.0
+            - 1 jargon word  → weight = 0.3
+            - 2 jargon words → weight = 0.09
+            ```
+            Particles with jargon can **survive** but are less likely to be selected for the next generation.
+            Over time, the population naturally shifts toward jargon-free outputs.
+            ### Setup
+            - Model: Qwen2.5-7B (via Ollama)
+            - Parameters: 5 particles, 15 max steps, 25 tokens per step
+            - Constraint: **Soft** - penalty factor 0.3
+            ### Results
+            """)
+            qwen25_soft_data = BENCHMARK_DATA["model_results"].get("Qwen2.5-7B-SoftConstraint", {})
+            gr.Markdown(f"""
+            | Model | Score | Success Rate | Jargon Violations |
+            |-------|-------|--------------|-------------------|
+            | Qwen2.5-7B (Soft) | {qwen25_soft_data.get('total_score', 920)}/{qwen25_soft_data.get('max_possible', 1200)} ({qwen25_soft_data.get('percentage', 76.7)}%) | **{qwen25_soft_data.get('successful_outputs', 12)}/12** | 1/12 |
+            ### The Transformation
+            | Metric | Hard Constraints | Soft Constraints |
+            |--------|------------------|------------------|
+            | Success Rate | 25% (3/12) | **100% (12/12)** |
+            | Average Score | ~44% | **76.7%** |
+            | Empty Outputs | 9/12 | **0/12** |
+            ### What Changed?
+            - Particles with jargon no longer die instantly
+            - The population gradually evolves toward jargon-free outputs
+            - Even if early generations contain jargon, later generations learn to avoid it
+            - The one jargon violation ("synchronous") was unavoidable given the topic
             ---
             """)
+            # Comparison Browser
+            gr.Markdown("## Compare Results Across Models")
+            gr.Markdown("Select an example to see how different approaches performed:")
             all_examples = []
             for domain in ["Legal", "Medical", "Financial", "Technical/Engineering"]:
                 for concept in BENCHMARK_DATA["claude_opus_benchmarks"][domain].keys():
+                    all_examples.append(f"{domain}: {concept[:55]}...")
+            example_dropdown = gr.Dropdown(choices=all_examples, value=all_examples[0], label="Select Example")
             first_domain = "Legal"
             first_concept = list(BENCHMARK_DATA["claude_opus_benchmarks"]["Legal"].keys())[0]
             initial_claude = BENCHMARK_DATA["claude_opus_benchmarks"][first_domain][first_concept]["translation"]
+            initial_qwen25 = BENCHMARK_DATA["model_results"].get("Qwen2.5-7B-SoftConstraint", {}).get("results", {}).get(first_domain, {}).get(first_concept, {}).get("output", "") or "(Not available)"
+            initial_gemma = BENCHMARK_DATA["model_results"]["Gemma-2-2B"]["results"][first_domain][first_concept].get("output", "") or "(Hard constraints killed all particles)"
             with gr.Row():
                 with gr.Column():
+                    gr.Markdown("**Claude Opus 4.5 (Gold Standard)**")
+                    claude_output = gr.Textbox(value=initial_claude, lines=4, interactive=False, show_label=False)
                 with gr.Column():
+                    gr.Markdown("**Qwen2.5-7B (Soft Constraints)**")
+                    qwen25_output = gr.Textbox(value=initial_qwen25, lines=4, interactive=False, show_label=False)
             with gr.Row():
                 with gr.Column():
+                    gr.Markdown("**Gemma-2-2B (Hard Constraints)**")
+                    gemma_output = gr.Textbox(value=initial_gemma, lines=4, interactive=False, show_label=False)
                 with gr.Column():
+                    gr.Markdown("**TinyLlama-1.1B (Hard Constraints)**")
+                    initial_tiny = BENCHMARK_DATA["model_results"]["TinyLlama-1.1B"]["results"][first_domain][first_concept].get("output", "") or "(Hard constraints killed all particles)"
+                    tinyllama_output = gr.Textbox(value=initial_tiny, lines=4, interactive=False, show_label=False)
             def update_example_outputs(selection):
                 domain = selection.split(":")[0]
                 concept_preview = selection.split(": ")[1].replace("...", "")
                 for concept in BENCHMARK_DATA["claude_opus_benchmarks"][domain].keys():
                     if concept.startswith(concept_preview.strip()):
                         claude = BENCHMARK_DATA["claude_opus_benchmarks"][domain][concept]["translation"]
+                        qwen25 = BENCHMARK_DATA["model_results"].get("Qwen2.5-7B-SoftConstraint", {}).get("results", {}).get(domain, {}).get(concept, {}).get("output", "") or "(Not available)"
+                        gemma = BENCHMARK_DATA["model_results"]["Gemma-2-2B"]["results"][domain].get(concept, {}).get("output", "") or "(Hard constraints killed all particles)"
+                        tiny = BENCHMARK_DATA["model_results"]["TinyLlama-1.1B"]["results"][domain].get(concept, {}).get("output", "") or "(Hard constraints killed all particles)"
+                        return claude, qwen25, gemma, tiny
                 return "Not found", "Not found", "Not found", "Not found"
             example_dropdown.change(
                 fn=update_example_outputs,
                 inputs=[example_dropdown],
+                outputs=[claude_output, qwen25_output, gemma_output, tinyllama_output]
             )
             gr.Markdown("---")
+            # Key Takeaways
+            gr.Markdown("""
+            ## Key Takeaways
+            ### What We Learned About SMC for Constrained Generation
+            1. **Soft constraints dramatically outperform hard constraints**
+               - Hard pruning causes particle death when constraints conflict with model priors
+               - Weighted resampling allows graceful degradation and recovery
+            2. **Penalty factor matters**
+               - 0.3 (70% reduction per jargon word) provided good balance
+               - Too aggressive (0.1) → still causes particle death
+               - Too lenient (0.5) → jargon persists too long
+            3. **Model size affects vocabulary diversity**
+               - Larger models (7B+) have more alternative phrasings available
+               - Smaller models get stuck more easily because they have fewer "escape routes"
+            4. **SMC enables strategic generation**
+               - Standard greedy generation commits immediately and can't backtrack
+               - SMC explores multiple futures and converges on the best path
+            ### Broader Implications
+            This technique applies beyond jargon filtering:
+            - **Content moderation:** Generate text avoiding harmful content
+            - **Style transfer:** Guide generation toward specific writing styles
+            - **Factual grounding:** Penalize generations that contradict known facts
+            - **Length control:** Soft constraints on verbosity
             ---
+            *Experiments conducted December 2025. Models tested via HuggingFace Transformers and Ollama.*
             """)
+    # Event handlers
     translate_btn.click(
         fn=smc_translate,
+        inputs=[concept_input, profession_dropdown, custom_words, model_dropdown, num_particles, max_steps, tokens_per_step, constraint_mode],
         outputs=[smc_output, trace_output, banned_words_display]
     )

benchmark_data.json CHANGED Viewed

@@ -1,10 +1,24 @@
 {
   "metadata": {
     "benchmark_date": "2025-12-26",
-    "smc_params": {
-      "num_particles": 5,
-      "max_steps": 25,
-      "tokens_per_step": 6
     },
     "scoring": {
       "jargon_free": 25,
@@ -73,6 +87,97 @@
     }
   },
   "model_results": {
     "Gemma-2-2B": {
       "total_score": 550,
       "max_possible": 1200,

 {
   "metadata": {
     "benchmark_date": "2025-12-26",
+    "constraint_modes": {
+      "hard": {
+        "description": "Prune all particles containing jargon",
+        "smc_params": {
+          "num_particles": 5,
+          "max_steps": 25,
+          "tokens_per_step": 6
+        }
+      },
+      "soft": {
+        "description": "Weighted resampling: particles with jargon get reduced weights",
+        "penalty_factor": 0.3,
+        "smc_params": {
+          "num_particles": 5,
+          "max_steps": 15,
+          "tokens_per_step": 25
+        }
+      }
     },
     "scoring": {
       "jargon_free": 25,
     }
   },
   "model_results": {
+    "Qwen2.5-7B-SoftConstraint": {
+      "total_score": 920,
+      "max_possible": 1200,
+      "percentage": 76.7,
+      "jargon_violations": 1,
+      "successful_outputs": 12,
+      "pruned_outputs": 0,
+      "constraint_mode": "soft",
+      "results": {
+        "Legal": {
+          "Force Majeure clause and why it might void our contract": {
+            "output": "Imagine you promised to help your neighbor clean their garage on Saturday. But then a huge storm comes and makes everything too wet. A Force Majeure clause is like saying 'if something really big and unexpected happens that makes it impossible to keep our promise, then neither of us gets in trouble for breaking it.'",
+            "jargon_found": [],
+            "score": 75,
+            "status": "success"
+          },
+          "Why we need to add an indemnification clause to protect your business": {
+            "output": "Imagine you're selling lemonade at a fair. You want to make sure that if someone gets sick from your lemonade, you won't have to pay their doctor bills or get into big trouble. So, you ask the person making your lemons to promise they'll take responsibility if anything goes wrong with the ingredients they provided.",
+            "jargon_found": [],
+            "score": 75,
+            "status": "success"
+          },
+          "What happens if the other party breaches the non-compete agreement": {
+            "output": "Imagine you and your best friend promise not to join another soccer team while you're both on the same one. If your friend joins another team without telling you or getting permission, they've broken their promise. In this case, you might feel sad and upset, and the grown-ups might need to help figure out how to make things fair again.",
+            "jargon_found": [],
+            "score": 75,
+            "status": "success"
+          }
+        },
+        "Medical": {
+          "Your MRI shows a benign lesion that we should monitor": {
+            "output": "Imagine your body is like a house. Sometimes, an MRI can find a small spot that's no big deal—like finding a harmless spider in one of your rooms. We just want to check on it now and then to make sure it stays small and doesn't cause any trouble.",
+            "jargon_found": [],
+            "score": 75,
+            "status": "success"
+          },
+          "The etiology of your chronic fatigue syndrome": {
+            "output": "Imagine your body is like a car. Sometimes, just like how a car might need maintenance or have an engine problem, your body can feel really tired all the time. We're trying to figure out what's making your 'engine' run slow—it could be from a past sickness, stress, or how your body's defense system is working.",
+            "jargon_found": [],
+            "score": 75,
+            "status": "success"
+          },
+          "Why we're recommending prophylactic treatment given your comorbidities": {
+            "output": "Imagine your body is like a house, and sometimes tiny germs try to get inside through the windows. Your immune system is like the house's security system. Because you have a few health conditions at once, your security system isn't as strong, so we want to give you medicine now to help keep those germs out before they cause trouble.",
+            "jargon_found": [],
+            "score": 75,
+            "status": "success"
+          }
+        },
+        "Financial": {
+          "How compound interest and amortization affect your mortgage payments": {
+            "output": "Imagine you have a piggy bank where you put in $100 every month to pay off your toy store credit. At first, most of your money goes to paying back the store for letting you borrow money. But over time, more and more of your $100 actually goes toward paying off what you originally owed.",
+            "jargon_found": [],
+            "score": 100,
+            "status": "success"
+          },
+          "Why we recommend diversifying your portfolio with low-liquidity assets": {
+            "output": "Imagine you have a toy box full of different toys. Some toys, like building blocks, are easy to play with anytime—you can grab them quickly. Other toys, like a special puzzle that takes time to set up, are harder to use right away but can be really fun and valuable. We suggest having both kinds so if one type isn't working well, you still have the other.",
+            "jargon_found": [],
+            "score": 75,
+            "status": "success"
+          },
+          "The tax implications of depreciation on your rental property": {
+            "output": "Imagine you have a toy castle that you use for your tea parties. Over time, parts of the castle get old and need fixing. The government lets you say 'my castle is worth less now because it's wearing out,' and that means you don't have to give as much of your allowance in taxes.",
+            "jargon_found": [],
+            "score": 75,
+            "status": "success"
+          }
+        },
+        "Technical/Engineering": {
+          "Why our API has high latency and how microservices could help": {
+            "output": "Imagine you're at a big birthday party, and you want to ask your friend what their favorite ice cream flavor is. If everyone has to wait in one long line to talk to the same person, it takes forever. But if you split up into smaller groups where each person handles one type of question, everyone gets their answers much faster.",
+            "jargon_found": [],
+            "score": 75,
+            "status": "success"
+          },
+          "The difference between synchronous and asynchronous processing": {
+            "output": "Imagine you're helping mommy or daddy make lemonade. With one way, you follow each step one after another: first, you cut the lemons, then you squeeze them, then you add sugar. With the other way, you can start squeezing while someone else adds sugar—everyone works at the same time!",
+            "jargon_found": ["synchronous"],
+            "score": 70,
+            "status": "success"
+          },
+          "Why we need to refactor the legacy codebase before adding new features": {
+            "output": "Imagine you have an old toy house made of wooden blocks that's been your favorite for years. It's starting to get wobbly. Instead of just adding more rooms on top (which could make it fall down), we need to fix the wobbly parts first so everything stays strong when we add new pieces.",
+            "jargon_found": [],
+            "score": 75,
+            "status": "success"
+          }
+        }
+      }
+    },
     "Gemma-2-2B": {
       "total_score": 550,
       "max_possible": 1200,