""" Learning Sequential Monte Carlo (SMC) Through the Plain-English Translator An interactive educational space that teaches Sequential Monte Carlo methods using a practical application: helping professionals explain complex concepts without using industry jargon. """ import torch import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM import random import json import os import math # Mock spaces module for local development (only needed on HuggingFace Spaces) try: import spaces except ImportError: class spaces: @staticmethod def GPU(func): return func # Load benchmark data BENCHMARK_DATA_PATH = os.path.join(os.path.dirname(__file__), "benchmark_data.json") with open(BENCHMARK_DATA_PATH, "r") as f: BENCHMARK_DATA = json.load(f) # Path to infographic INFOGRAPHIC_PATH = os.path.join(os.path.dirname(__file__), "Sequential_monte_carlo.png") # ============================================================================ # MODEL SETUP # ============================================================================ AVAILABLE_MODELS = { "TinyLlama-1.1B (Fast)": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "Qwen2-0.5B (Fastest)": "Qwen/Qwen2-0.5B-Instruct", "Qwen2.5-7B (Best Quality)": "Qwen/Qwen2.5-7B-Instruct", "Qwen3-8B (Latest)": "Qwen/Qwen3-8B", "Gemma-2-2B (Requires HF Login)": "google/gemma-2-2b-it", } loaded_models = {} loaded_tokenizers = {} def load_model(model_name: str): model_id = AVAILABLE_MODELS.get(model_name, "TinyLlama/TinyLlama-1.1B-Chat-v1.0") if model_id not in loaded_tokenizers: loaded_tokenizers[model_id] = AutoTokenizer.from_pretrained(model_id) if model_id not in loaded_models: loaded_models[model_id] = AutoModelForCausalLM.from_pretrained( model_id, device_map="auto", torch_dtype=torch.float16 ) return loaded_tokenizers[model_id], loaded_models[model_id] # ============================================================================ # JARGON DICTIONARIES # ============================================================================ JARGON_DICTIONARIES = { "Legal": [ "liability", "liable", "indemnify", "indemnification", "breach", "statute", "damages", "negligence", "herein", "aforementioned", "plaintiff", "defendant", "jurisdiction", "arbitration", "tort", "fiduciary", "escrow", "lien", "deposition", "stipulation", "injunction", "subpoena", "affidavit", "adjudicate", "appellant" ], "Medical": [ "prognosis", "diagnosis", "etiology", "pathology", "contraindicated", "idiopathic", "nosocomial", "comorbidity", "prophylactic", "benign", "malignant", "metastasis", "hemorrhage", "ischemia", "infarction", "edema", "necrosis", "lesion", "syndrome", "acute", "chronic", "bilateral", "unilateral", "subcutaneous", "intravenous" ], "Financial": [ "amortization", "liquidity", "collateral", "derivative", "equity", "fiduciary", "hedge", "leverage", "portfolio", "securities", "dividend", "depreciation", "liability", "asset", "accrual", "arbitrage", "capitalization", "yield", "maturity", "principal", "compound", "annuity", "underwriting", "insolvency", "solvency" ], "Technical/Engineering": [ "algorithm", "bandwidth", "latency", "throughput", "scalability", "deprecated", "refactor", "polymorphism", "encapsulation", "abstraction", "iteration", "recursion", "synchronous", "asynchronous", "protocol", "middleware", "backend", "frontend", "deployment", "infrastructure", "microservices", "containerization", "orchestration", "API", "SDK" ] } # ============================================================================ # SMC CORE FUNCTIONS # ============================================================================ def is_safe(text: str, banned_words: list) -> bool: text_lower = text.lower() for word in banned_words: word_lower = word.lower() if (f" {word_lower} " in f" {text_lower} " or f" {word_lower}." in f" {text_lower}" or f" {word_lower}," in f" {text_lower}" or f" {word_lower}?" in f" {text_lower}" or f" {word_lower}!" in f" {text_lower}" or text_lower.startswith(f"{word_lower} ") or text_lower.endswith(f" {word_lower}")): return False return True def find_jargon_used(text: str, banned_words: list) -> list: text_lower = text.lower() found = [] for word in banned_words: word_lower = word.lower() if (f" {word_lower} " in f" {text_lower} " or f" {word_lower}." in f" {text_lower}" or f" {word_lower}," in f" {text_lower}" or f" {word_lower}?" in f" {text_lower}" or f" {word_lower}!" in f" {text_lower}" or text_lower.startswith(f"{word_lower} ") or text_lower.endswith(f" {word_lower}")): found.append(word) return found def count_jargon(text: str, banned_words: list) -> int: return len(find_jargon_used(text, banned_words)) def compute_weight(text: str, banned_words: list, penalty_factor: float = 0.3) -> float: jargon_count = count_jargon(text, banned_words) return math.pow(penalty_factor, jargon_count) def weighted_resample(particles: list, weights: list, num_samples: int) -> list: if not particles or not weights: return [] total_weight = sum(weights) if total_weight == 0: probs = [1.0 / len(particles)] * len(particles) else: probs = [w / total_weight for w in weights] resampled = random.choices(particles, weights=probs, k=num_samples) unique = list(dict.fromkeys(resampled)) return unique[:num_samples] @spaces.GPU def smc_translate( concept: str, profession: str, custom_banned_words: str = "", model_name: str = "TinyLlama-1.1B (Fast)", num_particles: int = 5, max_steps: int = 20, tokens_per_step: int = 15, constraint_mode: str = "Soft (Penalize)", progress=gr.Progress() ) -> tuple: tokenizer, model_inst = load_model(model_name) use_soft_constraints = "Soft" in constraint_mode banned_words = JARGON_DICTIONARIES.get(profession, []).copy() if custom_banned_words.strip(): custom_list = [w.strip() for w in custom_banned_words.split(",") if w.strip()] banned_words.extend(custom_list) prompt = f"""You are an expert {profession.lower()} professional explaining a concept to a client with no background in your field. Rules: - Explain as if talking to a curious 10-year-old - Use a concrete, relatable real-world example to illustrate the concept - Avoid technical jargon - use everyday words instead - Keep it concise: 2-3 sentences max Concept to explain: {concept} Simple explanation with example:""" particles = [prompt] trace_log = [] trace_log.append(f"{'='*60}") trace_log.append(f"SMC PLAIN-ENGLISH TRANSLATOR - TRACE LOG") trace_log.append(f"{'='*60}") trace_log.append(f"Model: {model_name}") trace_log.append(f"Constraint Mode: {constraint_mode}") trace_log.append(f"Concept: {concept}") trace_log.append(f"Domain: {profession}") trace_log.append(f"Banned words: {len(banned_words)} terms") trace_log.append(f"Particles: {num_particles} | Steps: {max_steps} | Tokens/step: {tokens_per_step}") trace_log.append(f"{'='*60}") trace_log.append("") for step in progress.tqdm(range(max_steps), desc="SMC Iteration"): candidates = [] # STEP 1: EXPLORE - Generate multiple continuations for particle in particles: inputs = tokenizer(particle, return_tensors="pt").to(model_inst.device) with torch.no_grad(): outputs = model_inst.generate( **inputs, max_new_tokens=tokens_per_step, num_return_sequences=3, do_sample=True, temperature=0.9 if use_soft_constraints else 0.8, top_p=0.95 if use_soft_constraints else 0.9, pad_token_id=tokenizer.eos_token_id ) for out in outputs: decoded = tokenizer.decode(out, skip_special_tokens=True) candidates.append(decoded) if not candidates: trace_log.append(f"Step {step+1}: No candidates generated - stopping") break # STEP 2: FILTER/WEIGHT - Apply constraints if use_soft_constraints: weights = [compute_weight(c, banned_words, penalty_factor=0.3) for c in candidates] jargon_counts = [count_jargon(c, banned_words) for c in candidates] clean_count = sum(1 for c in jargon_counts if c == 0) trace_log.append(f"Step {step+1}: {len(candidates)} particles explored") trace_log.append(f" {clean_count} jargon-free | Weights: [{min(weights):.2f} - {max(weights):.2f}]") # STEP 3: RESAMPLE - Weighted selection particles = weighted_resample(candidates, weights, num_particles) if not particles: trace_log.append(f" Resampling failed - stopping") break trace_log.append(f" Resampled to {len(particles)} particles") else: valid_candidates = [] pruned_count = 0 for candidate in candidates: if is_safe(candidate, banned_words): valid_candidates.append(candidate) else: pruned_count += 1 trace_log.append(f"Step {step+1}: {len(candidates)} particles explored") trace_log.append(f" {len(valid_candidates)} survived | {pruned_count} pruned (contained jargon)") if valid_candidates: unique_candidates = list(set(valid_candidates)) random.shuffle(unique_candidates) particles = unique_candidates[:num_particles] else: trace_log.append(f" ALL PARTICLES DIED - jargon unavoidable!") break # Check for completion current_text = particles[0].split("Simple explanation with example:")[-1].strip() if current_text.endswith(('.', '!', '?')) and len(current_text) > 80: trace_log.append(f"\nNatural completion reached at step {step+1}") break trace_log.append("") trace_log.append(f"{'='*60}") # Get best result if particles: if use_soft_constraints: best_idx = 0 best_jargon_count = float('inf') for i, p in enumerate(particles): jc = count_jargon(p, banned_words) if jc < best_jargon_count: best_jargon_count = jc best_idx = i final_text = particles[best_idx].split("Simple explanation with example:")[-1].strip() else: final_text = particles[0].split("Simple explanation with example:")[-1].strip() else: final_text = "(All generation paths used jargon - try soft constraints!)" final_jargon = find_jargon_used(final_text, banned_words) if final_jargon: trace_log.append(f"RESULT: Contains jargon: {final_jargon}") else: trace_log.append(f"RESULT: Jargon-free output achieved!") trace_log.append(f"{'='*60}") return final_text, "\n".join(trace_log), ", ".join(banned_words) # ============================================================================ # EXAMPLES # ============================================================================ EXAMPLES = [ ["Force Majeure clause and why it might void our contract", "Legal", ""], ["Why we need to add an indemnification clause to protect your business", "Legal", ""], ["Your MRI shows a benign lesion that we should monitor", "Medical", ""], ["The etiology of your chronic fatigue syndrome", "Medical", ""], ["How compound interest and amortization affect your mortgage payments", "Financial", ""], ["Why we recommend diversifying your portfolio with low-liquidity assets", "Financial", ""], ["Why our API has high latency and how microservices could help", "Technical/Engineering", ""], ["The difference between synchronous and asynchronous processing", "Technical/Engineering", ""], ] # ============================================================================ # GRADIO INTERFACE # ============================================================================ with gr.Blocks(title="Learn SMC: The Plain-English Translator") as demo: # ==================== HEADER ==================== gr.Markdown(""" # Learning Sequential Monte Carlo (SMC) ## An Interactive Guide Using the Plain-English Translator Welcome! This space teaches you about **Sequential Monte Carlo** methods through a practical application: helping professionals explain complex concepts without using jargon. Navigate through the tabs to learn the theory, try the interactive demo, and see our experimental results. """) with gr.Tabs(): # ==================== TAB 1: LEARN SMC ==================== with gr.TabItem("1. Learn SMC"): gr.Markdown(""" # Understanding Sequential Monte Carlo Sequential Monte Carlo (SMC) is a powerful technique for solving problems where you need to navigate through a space of possibilities while satisfying constraints. Let's understand it through both theory and our practical application. """) # Infographic gr.Markdown("## The Big Picture") gr.Image(INFOGRAPHIC_PATH, label="How AI Learns to See the Future: An Introduction to SMC", show_label=True) gr.Markdown("---") # Section 1: The Problem gr.Markdown(""" ## 1. The Problem: Standard AI's "Greedy" Trap ### What's Wrong with Normal Text Generation? Most AI language models work **greedily** - they pick the best next word based on immediate probability, without considering long-term consequences. This creates a fundamental problem: **The Greedy Trap:** - The model chooses what seems best *right now* - It can't "see" that this choice leads to a dead end - Once committed, it can't backtrack ### Our Example: The Curse of Knowledge When a lawyer tries to explain "Force Majeure" to a client, a standard AI naturally reaches for legal terminology because those words are statistically most likely in that context: ``` Standard AI: "Force Majeure is a contractual provision that excuses liability when extraordinary circumstances prevent fulfillment..." ``` The AI picked "liability," "contractual," and "provision" because they're the most probable next words - but now it's stuck using jargon the client won't understand! **This is like choosing the path in a maze that looks shortest, only to hit a dead end.** """) gr.Markdown("---") # Section 2: The Breakthrough gr.Markdown(""" ## 2. The Breakthrough: Introducing SMC ### The Key Insight: Explore Multiple Futures Simultaneously Instead of committing to one path, SMC maintains **thousands of "particles"** - each representing a different possible future. Think of it as sending out scouts in every direction. ### How It Works in Our Translator: ``` Standard AI: One path → "Force Majeure is a contractual..." → STUCK WITH JARGON SMC Approach: Path A → "Imagine you promised your friend..." ✓ Keep exploring Path B → "This is a liability clause..." ✗ Contains jargon Path C → "Think of it like a 'nobody's fault'..." ✓ Keep exploring Path D → "The contractual provision states..." ✗ Contains jargon Path E → "It's like when a big storm..." ✓ Keep exploring ``` **We explore multiple possibilities in parallel, keeping the promising ones and discarding the rest.** """) gr.Markdown("---") # Section 3: The Process gr.Markdown(""" ## 3. The Process: How SMC Finds the Optimal Path SMC follows a three-step cycle that repeats until we reach our goal: ### Step 1: EXPLORE (Expand) Each surviving particle generates multiple possible continuations. If we have 5 particles and each generates 3 continuations, we now have 15 candidates. ### Step 2: FILTER (Evaluate) We evaluate each candidate against our constraint (no jargon). This is "survival of the fittest" - unpromising paths fade out. **Two Filtering Strategies:** | Strategy | How It Works | Pros | Cons | |----------|--------------|------|------| | **Hard Constraints** | Completely eliminate any particle with jargon | Guarantees jargon-free output | Can kill ALL particles if jargon is unavoidable | | **Soft Constraints** | Reduce weight of particles with jargon (but let them survive) | More robust, allows gradual steering | May have occasional jargon slip through | ### Step 3: RESAMPLE (Select) We select particles for the next round based on their fitness: - **Hard mode:** Random selection from survivors - **Soft mode:** Weighted random selection (better particles more likely to be chosen) ### The Math Behind Soft Constraints: ``` Weight = 0.3 ^ (number of jargon words) 0 jargon words → Weight = 1.0 (100% chance) 1 jargon word → Weight = 0.3 (30% chance) 2 jargon words → Weight = 0.09 (9% chance) 3 jargon words → Weight = 0.027 (2.7% chance) ``` """) gr.Markdown("---") # Section 4: The Impact gr.Markdown(""" ## 4. The Impact: From Prediction to Strategy SMC transforms AI from a **reactive predictor** to a **strategic planner**. ### What This Means for Our Translator: | Approach | Can Plan Ahead? | Handles Constraints? | Success Rate | |----------|-----------------|---------------------|--------------| | Standard Greedy | No - commits immediately | No - uses probable words | N/A (always uses jargon) | | SMC Hard | Yes - explores multiple paths | Yes - prunes violations | 25% (particles often die) | | SMC Soft | Yes - explores multiple paths | Yes - penalizes violations | **100%** | ### Beyond Translation: Where Else Is SMC Used? - **Robotics:** Planning movements while avoiding obstacles - **Autonomous Vehicles:** Predicting traffic and planning routes - **Finance:** Portfolio optimization with risk constraints - **Drug Discovery:** Exploring molecular structures with safety constraints ### The Fundamental Shift: > *"If your AI could plan 10 steps ahead instead of 1, what impossible problem would you have it solve first?"* SMC represents moving from **simple prediction** to **true strategic foresight**. """) gr.Markdown("---") # Connection to Next Tab gr.Markdown(""" ## Ready to Try It Yourself? Now that you understand how SMC works, head to the **"2. Try It: Translator"** tab to see it in action! You can: - Watch particles explore and get filtered in real-time - Compare hard vs soft constraints - Try different professional domains (Legal, Medical, Financial, Technical) """) # ==================== TAB 2: TRY IT ==================== with gr.TabItem("2. Try It: Translator"): gr.Markdown(""" # The Plain-English Translator ## The Problem We're Solving **The Curse of Knowledge:** Experts often struggle to explain concepts without jargon. A standard AI naturally uses technical terms because they're statistically probable. **Our Solution:** Use SMC to explore multiple explanations simultaneously, filtering out any path that uses forbidden terminology. This forces the model to find creative, plain-language alternatives. --- """) with gr.Row(): with gr.Column(scale=2): concept_input = gr.Textbox( label="Concept to Explain", placeholder="e.g., 'Force Majeure clause and why it might void our contract'", lines=2 ) profession_dropdown = gr.Dropdown( choices=["Legal", "Medical", "Financial", "Technical/Engineering"], value="Legal", label="Professional Domain", info="Each domain has its own set of banned jargon terms" ) custom_words = gr.Textbox( label="Additional Banned Words (optional)", placeholder="e.g., contract, clause, party", lines=1 ) model_dropdown = gr.Dropdown( choices=list(AVAILABLE_MODELS.keys()), value="TinyLlama-1.1B (Fast)", label="Model" ) constraint_mode = gr.Radio( choices=["Hard (Prune)", "Soft (Penalize)"], value="Soft (Penalize)", label="Constraint Mode", info="Soft constraints are more robust - see the Learn tab for explanation" ) with gr.Row(): num_particles = gr.Slider( minimum=2, maximum=10, value=5, step=1, label="Particles", info="More = more exploration" ) max_steps = gr.Slider( minimum=5, maximum=30, value=15, step=5, label="Max Steps", info="SMC iterations" ) tokens_per_step = gr.Slider( minimum=5, maximum=30, value=15, step=5, label="Tokens/Step", info="Generation length per iteration" ) translate_btn = gr.Button("Translate to Plain English", variant="primary", size="lg") with gr.Column(scale=1): gr.Markdown(""" ### SMC in Action When you click translate, watch the trace log to see: 1. **Particles explored** - Multiple paths generated 2. **Filtering** - Jargon paths penalized/pruned 3. **Resampling** - Best particles selected 4. **Convergence** - Final jargon-free output **Tip:** Try the same concept with Hard vs Soft constraints to see the difference! """) gr.Markdown("---") gr.Markdown("### Output") smc_output = gr.Textbox(label="Plain-English Explanation", lines=5, show_label=True) with gr.Accordion("SMC Trace Log (See the algorithm in action)", open=True): trace_output = gr.Textbox(label="", lines=20, show_label=False) with gr.Accordion("Banned Words for This Domain", open=False): banned_words_display = gr.Textbox(label="", lines=3, show_label=False) gr.Markdown("---") gr.Markdown("### Example Scenarios") gr.Examples(examples=EXAMPLES, inputs=[concept_input, profession_dropdown, custom_words], label="") # ==================== TAB 3: EXPERIMENTS ==================== with gr.TabItem("3. Our Experiments"): gr.Markdown(""" # What We Learned: An Experimental Journey This tab documents our experimental journey in applying SMC to constrained text generation. We tested multiple approaches and models to understand what works and what doesn't. --- """) gr.Markdown(""" ## The Experimental Setup ### Goal Generate plain-English explanations of professional concepts (Legal, Medical, Financial, Technical) that a 10-year-old could understand - **without using any domain-specific jargon**. ### Benchmark We created 12 test cases (3 per domain) with gold-standard translations from Claude Opus 4.5. Each output was scored on: | Criterion | Points | Description | |-----------|--------|-------------| | Jargon-Free | 25 | No banned terminology used | | Has Example | 25 | Uses relatable analogy | | Appropriate Length | 25 | 20-100 words | | Coherence | 25 | Proper sentence structure | --- """) # Experiment 1: Hard Constraints gr.Markdown(""" ## Experiment 1: Hard Constraints (Prune All Jargon) ### Hypothesis If we completely eliminate any generation path containing jargon, the model will be forced to find jargon-free alternatives. ### Setup - Models: TinyLlama-1.1B, Qwen2-0.5B, Gemma-2-2B - Parameters: 5 particles, 25 max steps, 6 tokens per step - Constraint: **Hard** - any particle with jargon is immediately pruned ### Results """) # Build data from benchmark gemma_data = BENCHMARK_DATA["model_results"]["Gemma-2-2B"] tinyllama_data = BENCHMARK_DATA["model_results"]["TinyLlama-1.1B"] qwen_data = BENCHMARK_DATA["model_results"]["Qwen2-0.5B"] gr.Markdown(f""" | Model | Score | Success Rate | Outcome | |-------|-------|--------------|---------| | Gemma-2-2B | {gemma_data['total_score']}/{gemma_data['max_possible']} ({gemma_data['percentage']}%) | {gemma_data.get('successful_outputs', 3)}/12 | 9 empty outputs | | TinyLlama-1.1B | {tinyllama_data['total_score']}/{tinyllama_data['max_possible']} ({tinyllama_data['percentage']}%) | {tinyllama_data.get('successful_outputs', 3)}/12 | 9 empty outputs | | Qwen2-0.5B | {qwen_data['total_score']}/{qwen_data['max_possible']} ({qwen_data['percentage']}%) | {qwen_data.get('successful_outputs', 2)}/12 | 10 empty outputs | ### What Happened? **75% of test cases produced empty outputs!** The problem: When explaining medical concepts, the model naturally reaches for words like "benign," "lesion," and "diagnosis." With hard constraints, EVERY generation path contained at least one banned word, causing **total particle death**. ### Key Learning Hard constraints are too aggressive. Domain-specific vocabulary is so deeply embedded in model weights that it's nearly impossible to avoid entirely through pruning alone. --- """) # Experiment 2: Soft Constraints gr.Markdown(""" ## Experiment 2: Soft Constraints (Weighted Resampling) ### Hypothesis Instead of killing particles with jargon, we should **penalize** them with lower weights. This allows gradual steering toward jargon-free outputs while preventing particle death. ### The Key Insight ``` Weight = penalty_factor ^ (jargon_count) With penalty_factor = 0.3: - 0 jargon words → weight = 1.0 - 1 jargon word → weight = 0.3 - 2 jargon words → weight = 0.09 ``` Particles with jargon can **survive** but are less likely to be selected for the next generation. Over time, the population naturally shifts toward jargon-free outputs. ### Setup - Model: Qwen2.5-7B (via Ollama) - Parameters: 5 particles, 15 max steps, 25 tokens per step - Constraint: **Soft** - penalty factor 0.3 ### Results """) qwen25_soft_data = BENCHMARK_DATA["model_results"].get("Qwen2.5-7B-SoftConstraint", {}) gr.Markdown(f""" | Model | Score | Success Rate | Jargon Violations | |-------|-------|--------------|-------------------| | Qwen2.5-7B (Soft) | {qwen25_soft_data.get('total_score', 920)}/{qwen25_soft_data.get('max_possible', 1200)} ({qwen25_soft_data.get('percentage', 76.7)}%) | **{qwen25_soft_data.get('successful_outputs', 12)}/12** | 1/12 | ### The Transformation | Metric | Hard Constraints | Soft Constraints | |--------|------------------|------------------| | Success Rate | 25% (3/12) | **100% (12/12)** | | Average Score | ~44% | **76.7%** | | Empty Outputs | 9/12 | **0/12** | ### What Changed? - Particles with jargon no longer die instantly - The population gradually evolves toward jargon-free outputs - Even if early generations contain jargon, later generations learn to avoid it - The one jargon violation ("synchronous") was unavoidable given the topic --- """) # Comparison Browser gr.Markdown("## Compare Results Across Models") gr.Markdown("Select an example to see how different approaches performed:") all_examples = [] for domain in ["Legal", "Medical", "Financial", "Technical/Engineering"]: for concept in BENCHMARK_DATA["claude_opus_benchmarks"][domain].keys(): all_examples.append(f"{domain}: {concept[:55]}...") example_dropdown = gr.Dropdown(choices=all_examples, value=all_examples[0], label="Select Example") first_domain = "Legal" first_concept = list(BENCHMARK_DATA["claude_opus_benchmarks"]["Legal"].keys())[0] initial_claude = BENCHMARK_DATA["claude_opus_benchmarks"][first_domain][first_concept]["translation"] initial_qwen25 = BENCHMARK_DATA["model_results"].get("Qwen2.5-7B-SoftConstraint", {}).get("results", {}).get(first_domain, {}).get(first_concept, {}).get("output", "") or "(Not available)" initial_gemma = BENCHMARK_DATA["model_results"]["Gemma-2-2B"]["results"][first_domain][first_concept].get("output", "") or "(Hard constraints killed all particles)" with gr.Row(): with gr.Column(): gr.Markdown("**Claude Opus 4.5 (Gold Standard)**") claude_output = gr.Textbox(value=initial_claude, lines=4, interactive=False, show_label=False) with gr.Column(): gr.Markdown("**Qwen2.5-7B (Soft Constraints)**") qwen25_output = gr.Textbox(value=initial_qwen25, lines=4, interactive=False, show_label=False) with gr.Row(): with gr.Column(): gr.Markdown("**Gemma-2-2B (Hard Constraints)**") gemma_output = gr.Textbox(value=initial_gemma, lines=4, interactive=False, show_label=False) with gr.Column(): gr.Markdown("**TinyLlama-1.1B (Hard Constraints)**") initial_tiny = BENCHMARK_DATA["model_results"]["TinyLlama-1.1B"]["results"][first_domain][first_concept].get("output", "") or "(Hard constraints killed all particles)" tinyllama_output = gr.Textbox(value=initial_tiny, lines=4, interactive=False, show_label=False) def update_example_outputs(selection): domain = selection.split(":")[0] concept_preview = selection.split(": ")[1].replace("...", "") for concept in BENCHMARK_DATA["claude_opus_benchmarks"][domain].keys(): if concept.startswith(concept_preview.strip()): claude = BENCHMARK_DATA["claude_opus_benchmarks"][domain][concept]["translation"] qwen25 = BENCHMARK_DATA["model_results"].get("Qwen2.5-7B-SoftConstraint", {}).get("results", {}).get(domain, {}).get(concept, {}).get("output", "") or "(Not available)" gemma = BENCHMARK_DATA["model_results"]["Gemma-2-2B"]["results"][domain].get(concept, {}).get("output", "") or "(Hard constraints killed all particles)" tiny = BENCHMARK_DATA["model_results"]["TinyLlama-1.1B"]["results"][domain].get(concept, {}).get("output", "") or "(Hard constraints killed all particles)" return claude, qwen25, gemma, tiny return "Not found", "Not found", "Not found", "Not found" example_dropdown.change( fn=update_example_outputs, inputs=[example_dropdown], outputs=[claude_output, qwen25_output, gemma_output, tinyllama_output] ) gr.Markdown("---") # Key Takeaways gr.Markdown(""" ## Key Takeaways ### What We Learned About SMC for Constrained Generation 1. **Soft constraints dramatically outperform hard constraints** - Hard pruning causes particle death when constraints conflict with model priors - Weighted resampling allows graceful degradation and recovery 2. **Penalty factor matters** - 0.3 (70% reduction per jargon word) provided good balance - Too aggressive (0.1) → still causes particle death - Too lenient (0.5) → jargon persists too long 3. **Model size affects vocabulary diversity** - Larger models (7B+) have more alternative phrasings available - Smaller models get stuck more easily because they have fewer "escape routes" 4. **SMC enables strategic generation** - Standard greedy generation commits immediately and can't backtrack - SMC explores multiple futures and converges on the best path ### Broader Implications This technique applies beyond jargon filtering: - **Content moderation:** Generate text avoiding harmful content - **Style transfer:** Guide generation toward specific writing styles - **Factual grounding:** Penalize generations that contradict known facts - **Length control:** Soft constraints on verbosity --- *Experiments conducted December 2025. Models tested via HuggingFace Transformers and Ollama.* """) # Event handlers translate_btn.click( fn=smc_translate, inputs=[concept_input, profession_dropdown, custom_words, model_dropdown, num_particles, max_steps, tokens_per_step, constraint_mode], outputs=[smc_output, trace_output, banned_words_display] ) if __name__ == "__main__": demo.launch(theme=gr.themes.Soft())