apingali
Transform space into SMC educational learning resource
3cd35a6
"""
Learning Sequential Monte Carlo (SMC) Through the Plain-English Translator
An interactive educational space that teaches Sequential Monte Carlo methods
using a practical application: helping professionals explain complex concepts
without using industry jargon.
"""
import torch
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import random
import json
import os
import math
# Mock spaces module for local development (only needed on HuggingFace Spaces)
try:
import spaces
except ImportError:
class spaces:
@staticmethod
def GPU(func):
return func
# Load benchmark data
BENCHMARK_DATA_PATH = os.path.join(os.path.dirname(__file__), "benchmark_data.json")
with open(BENCHMARK_DATA_PATH, "r") as f:
BENCHMARK_DATA = json.load(f)
# Path to infographic
INFOGRAPHIC_PATH = os.path.join(os.path.dirname(__file__), "Sequential_monte_carlo.png")
# ============================================================================
# MODEL SETUP
# ============================================================================
AVAILABLE_MODELS = {
"TinyLlama-1.1B (Fast)": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
"Qwen2-0.5B (Fastest)": "Qwen/Qwen2-0.5B-Instruct",
"Qwen2.5-7B (Best Quality)": "Qwen/Qwen2.5-7B-Instruct",
"Qwen3-8B (Latest)": "Qwen/Qwen3-8B",
"Gemma-2-2B (Requires HF Login)": "google/gemma-2-2b-it",
}
loaded_models = {}
loaded_tokenizers = {}
def load_model(model_name: str):
model_id = AVAILABLE_MODELS.get(model_name, "TinyLlama/TinyLlama-1.1B-Chat-v1.0")
if model_id not in loaded_tokenizers:
loaded_tokenizers[model_id] = AutoTokenizer.from_pretrained(model_id)
if model_id not in loaded_models:
loaded_models[model_id] = AutoModelForCausalLM.from_pretrained(
model_id, device_map="auto", torch_dtype=torch.float16
)
return loaded_tokenizers[model_id], loaded_models[model_id]
# ============================================================================
# JARGON DICTIONARIES
# ============================================================================
JARGON_DICTIONARIES = {
"Legal": [
"liability", "liable", "indemnify", "indemnification", "breach",
"statute", "damages", "negligence", "herein", "aforementioned",
"plaintiff", "defendant", "jurisdiction", "arbitration", "tort",
"fiduciary", "escrow", "lien", "deposition", "stipulation",
"injunction", "subpoena", "affidavit", "adjudicate", "appellant"
],
"Medical": [
"prognosis", "diagnosis", "etiology", "pathology", "contraindicated",
"idiopathic", "nosocomial", "comorbidity", "prophylactic", "benign",
"malignant", "metastasis", "hemorrhage", "ischemia", "infarction",
"edema", "necrosis", "lesion", "syndrome", "acute", "chronic",
"bilateral", "unilateral", "subcutaneous", "intravenous"
],
"Financial": [
"amortization", "liquidity", "collateral", "derivative", "equity",
"fiduciary", "hedge", "leverage", "portfolio", "securities",
"dividend", "depreciation", "liability", "asset", "accrual",
"arbitrage", "capitalization", "yield", "maturity", "principal",
"compound", "annuity", "underwriting", "insolvency", "solvency"
],
"Technical/Engineering": [
"algorithm", "bandwidth", "latency", "throughput", "scalability",
"deprecated", "refactor", "polymorphism", "encapsulation", "abstraction",
"iteration", "recursion", "synchronous", "asynchronous", "protocol",
"middleware", "backend", "frontend", "deployment", "infrastructure",
"microservices", "containerization", "orchestration", "API", "SDK"
]
}
# ============================================================================
# SMC CORE FUNCTIONS
# ============================================================================
def is_safe(text: str, banned_words: list) -> bool:
text_lower = text.lower()
for word in banned_words:
word_lower = word.lower()
if (f" {word_lower} " in f" {text_lower} " or
f" {word_lower}." in f" {text_lower}" or
f" {word_lower}," in f" {text_lower}" or
f" {word_lower}?" in f" {text_lower}" or
f" {word_lower}!" in f" {text_lower}" or
text_lower.startswith(f"{word_lower} ") or
text_lower.endswith(f" {word_lower}")):
return False
return True
def find_jargon_used(text: str, banned_words: list) -> list:
text_lower = text.lower()
found = []
for word in banned_words:
word_lower = word.lower()
if (f" {word_lower} " in f" {text_lower} " or
f" {word_lower}." in f" {text_lower}" or
f" {word_lower}," in f" {text_lower}" or
f" {word_lower}?" in f" {text_lower}" or
f" {word_lower}!" in f" {text_lower}" or
text_lower.startswith(f"{word_lower} ") or
text_lower.endswith(f" {word_lower}")):
found.append(word)
return found
def count_jargon(text: str, banned_words: list) -> int:
return len(find_jargon_used(text, banned_words))
def compute_weight(text: str, banned_words: list, penalty_factor: float = 0.3) -> float:
jargon_count = count_jargon(text, banned_words)
return math.pow(penalty_factor, jargon_count)
def weighted_resample(particles: list, weights: list, num_samples: int) -> list:
if not particles or not weights:
return []
total_weight = sum(weights)
if total_weight == 0:
probs = [1.0 / len(particles)] * len(particles)
else:
probs = [w / total_weight for w in weights]
resampled = random.choices(particles, weights=probs, k=num_samples)
unique = list(dict.fromkeys(resampled))
return unique[:num_samples]
@spaces.GPU
def smc_translate(
concept: str,
profession: str,
custom_banned_words: str = "",
model_name: str = "TinyLlama-1.1B (Fast)",
num_particles: int = 5,
max_steps: int = 20,
tokens_per_step: int = 15,
constraint_mode: str = "Soft (Penalize)",
progress=gr.Progress()
) -> tuple:
tokenizer, model_inst = load_model(model_name)
use_soft_constraints = "Soft" in constraint_mode
banned_words = JARGON_DICTIONARIES.get(profession, []).copy()
if custom_banned_words.strip():
custom_list = [w.strip() for w in custom_banned_words.split(",") if w.strip()]
banned_words.extend(custom_list)
prompt = f"""You are an expert {profession.lower()} professional explaining a concept to a client with no background in your field.
Rules:
- Explain as if talking to a curious 10-year-old
- Use a concrete, relatable real-world example to illustrate the concept
- Avoid technical jargon - use everyday words instead
- Keep it concise: 2-3 sentences max
Concept to explain: {concept}
Simple explanation with example:"""
particles = [prompt]
trace_log = []
trace_log.append(f"{'='*60}")
trace_log.append(f"SMC PLAIN-ENGLISH TRANSLATOR - TRACE LOG")
trace_log.append(f"{'='*60}")
trace_log.append(f"Model: {model_name}")
trace_log.append(f"Constraint Mode: {constraint_mode}")
trace_log.append(f"Concept: {concept}")
trace_log.append(f"Domain: {profession}")
trace_log.append(f"Banned words: {len(banned_words)} terms")
trace_log.append(f"Particles: {num_particles} | Steps: {max_steps} | Tokens/step: {tokens_per_step}")
trace_log.append(f"{'='*60}")
trace_log.append("")
for step in progress.tqdm(range(max_steps), desc="SMC Iteration"):
candidates = []
# STEP 1: EXPLORE - Generate multiple continuations
for particle in particles:
inputs = tokenizer(particle, return_tensors="pt").to(model_inst.device)
with torch.no_grad():
outputs = model_inst.generate(
**inputs,
max_new_tokens=tokens_per_step,
num_return_sequences=3,
do_sample=True,
temperature=0.9 if use_soft_constraints else 0.8,
top_p=0.95 if use_soft_constraints else 0.9,
pad_token_id=tokenizer.eos_token_id
)
for out in outputs:
decoded = tokenizer.decode(out, skip_special_tokens=True)
candidates.append(decoded)
if not candidates:
trace_log.append(f"Step {step+1}: No candidates generated - stopping")
break
# STEP 2: FILTER/WEIGHT - Apply constraints
if use_soft_constraints:
weights = [compute_weight(c, banned_words, penalty_factor=0.3) for c in candidates]
jargon_counts = [count_jargon(c, banned_words) for c in candidates]
clean_count = sum(1 for c in jargon_counts if c == 0)
trace_log.append(f"Step {step+1}: {len(candidates)} particles explored")
trace_log.append(f" {clean_count} jargon-free | Weights: [{min(weights):.2f} - {max(weights):.2f}]")
# STEP 3: RESAMPLE - Weighted selection
particles = weighted_resample(candidates, weights, num_particles)
if not particles:
trace_log.append(f" Resampling failed - stopping")
break
trace_log.append(f" Resampled to {len(particles)} particles")
else:
valid_candidates = []
pruned_count = 0
for candidate in candidates:
if is_safe(candidate, banned_words):
valid_candidates.append(candidate)
else:
pruned_count += 1
trace_log.append(f"Step {step+1}: {len(candidates)} particles explored")
trace_log.append(f" {len(valid_candidates)} survived | {pruned_count} pruned (contained jargon)")
if valid_candidates:
unique_candidates = list(set(valid_candidates))
random.shuffle(unique_candidates)
particles = unique_candidates[:num_particles]
else:
trace_log.append(f" ALL PARTICLES DIED - jargon unavoidable!")
break
# Check for completion
current_text = particles[0].split("Simple explanation with example:")[-1].strip()
if current_text.endswith(('.', '!', '?')) and len(current_text) > 80:
trace_log.append(f"\nNatural completion reached at step {step+1}")
break
trace_log.append("")
trace_log.append(f"{'='*60}")
# Get best result
if particles:
if use_soft_constraints:
best_idx = 0
best_jargon_count = float('inf')
for i, p in enumerate(particles):
jc = count_jargon(p, banned_words)
if jc < best_jargon_count:
best_jargon_count = jc
best_idx = i
final_text = particles[best_idx].split("Simple explanation with example:")[-1].strip()
else:
final_text = particles[0].split("Simple explanation with example:")[-1].strip()
else:
final_text = "(All generation paths used jargon - try soft constraints!)"
final_jargon = find_jargon_used(final_text, banned_words)
if final_jargon:
trace_log.append(f"RESULT: Contains jargon: {final_jargon}")
else:
trace_log.append(f"RESULT: Jargon-free output achieved!")
trace_log.append(f"{'='*60}")
return final_text, "\n".join(trace_log), ", ".join(banned_words)
# ============================================================================
# EXAMPLES
# ============================================================================
EXAMPLES = [
["Force Majeure clause and why it might void our contract", "Legal", ""],
["Why we need to add an indemnification clause to protect your business", "Legal", ""],
["Your MRI shows a benign lesion that we should monitor", "Medical", ""],
["The etiology of your chronic fatigue syndrome", "Medical", ""],
["How compound interest and amortization affect your mortgage payments", "Financial", ""],
["Why we recommend diversifying your portfolio with low-liquidity assets", "Financial", ""],
["Why our API has high latency and how microservices could help", "Technical/Engineering", ""],
["The difference between synchronous and asynchronous processing", "Technical/Engineering", ""],
]
# ============================================================================
# GRADIO INTERFACE
# ============================================================================
with gr.Blocks(title="Learn SMC: The Plain-English Translator") as demo:
# ==================== HEADER ====================
gr.Markdown("""
# Learning Sequential Monte Carlo (SMC)
## An Interactive Guide Using the Plain-English Translator
Welcome! This space teaches you about **Sequential Monte Carlo** methods through a practical application:
helping professionals explain complex concepts without using jargon.
Navigate through the tabs to learn the theory, try the interactive demo, and see our experimental results.
""")
with gr.Tabs():
# ==================== TAB 1: LEARN SMC ====================
with gr.TabItem("1. Learn SMC"):
gr.Markdown("""
# Understanding Sequential Monte Carlo
Sequential Monte Carlo (SMC) is a powerful technique for solving problems where you need to
navigate through a space of possibilities while satisfying constraints. Let's understand it
through both theory and our practical application.
""")
# Infographic
gr.Markdown("## The Big Picture")
gr.Image(INFOGRAPHIC_PATH, label="How AI Learns to See the Future: An Introduction to SMC", show_label=True)
gr.Markdown("---")
# Section 1: The Problem
gr.Markdown("""
## 1. The Problem: Standard AI's "Greedy" Trap
### What's Wrong with Normal Text Generation?
Most AI language models work **greedily** - they pick the best next word based on immediate probability,
without considering long-term consequences. This creates a fundamental problem:
**The Greedy Trap:**
- The model chooses what seems best *right now*
- It can't "see" that this choice leads to a dead end
- Once committed, it can't backtrack
### Our Example: The Curse of Knowledge
When a lawyer tries to explain "Force Majeure" to a client, a standard AI naturally reaches for
legal terminology because those words are statistically most likely in that context:
```
Standard AI: "Force Majeure is a contractual provision that excuses liability
when extraordinary circumstances prevent fulfillment..."
```
The AI picked "liability," "contractual," and "provision" because they're the most probable
next words - but now it's stuck using jargon the client won't understand!
**This is like choosing the path in a maze that looks shortest, only to hit a dead end.**
""")
gr.Markdown("---")
# Section 2: The Breakthrough
gr.Markdown("""
## 2. The Breakthrough: Introducing SMC
### The Key Insight: Explore Multiple Futures Simultaneously
Instead of committing to one path, SMC maintains **thousands of "particles"** - each representing
a different possible future. Think of it as sending out scouts in every direction.
### How It Works in Our Translator:
```
Standard AI: One path → "Force Majeure is a contractual..." → STUCK WITH JARGON
SMC Approach: Path A → "Imagine you promised your friend..." ✓ Keep exploring
Path B → "This is a liability clause..." ✗ Contains jargon
Path C → "Think of it like a 'nobody's fault'..." ✓ Keep exploring
Path D → "The contractual provision states..." ✗ Contains jargon
Path E → "It's like when a big storm..." ✓ Keep exploring
```
**We explore multiple possibilities in parallel, keeping the promising ones and discarding the rest.**
""")
gr.Markdown("---")
# Section 3: The Process
gr.Markdown("""
## 3. The Process: How SMC Finds the Optimal Path
SMC follows a three-step cycle that repeats until we reach our goal:
### Step 1: EXPLORE (Expand)
Each surviving particle generates multiple possible continuations.
If we have 5 particles and each generates 3 continuations, we now have 15 candidates.
### Step 2: FILTER (Evaluate)
We evaluate each candidate against our constraint (no jargon).
This is "survival of the fittest" - unpromising paths fade out.
**Two Filtering Strategies:**
| Strategy | How It Works | Pros | Cons |
|----------|--------------|------|------|
| **Hard Constraints** | Completely eliminate any particle with jargon | Guarantees jargon-free output | Can kill ALL particles if jargon is unavoidable |
| **Soft Constraints** | Reduce weight of particles with jargon (but let them survive) | More robust, allows gradual steering | May have occasional jargon slip through |
### Step 3: RESAMPLE (Select)
We select particles for the next round based on their fitness:
- **Hard mode:** Random selection from survivors
- **Soft mode:** Weighted random selection (better particles more likely to be chosen)
### The Math Behind Soft Constraints:
```
Weight = 0.3 ^ (number of jargon words)
0 jargon words → Weight = 1.0 (100% chance)
1 jargon word → Weight = 0.3 (30% chance)
2 jargon words → Weight = 0.09 (9% chance)
3 jargon words → Weight = 0.027 (2.7% chance)
```
""")
gr.Markdown("---")
# Section 4: The Impact
gr.Markdown("""
## 4. The Impact: From Prediction to Strategy
SMC transforms AI from a **reactive predictor** to a **strategic planner**.
### What This Means for Our Translator:
| Approach | Can Plan Ahead? | Handles Constraints? | Success Rate |
|----------|-----------------|---------------------|--------------|
| Standard Greedy | No - commits immediately | No - uses probable words | N/A (always uses jargon) |
| SMC Hard | Yes - explores multiple paths | Yes - prunes violations | 25% (particles often die) |
| SMC Soft | Yes - explores multiple paths | Yes - penalizes violations | **100%** |
### Beyond Translation: Where Else Is SMC Used?
- **Robotics:** Planning movements while avoiding obstacles
- **Autonomous Vehicles:** Predicting traffic and planning routes
- **Finance:** Portfolio optimization with risk constraints
- **Drug Discovery:** Exploring molecular structures with safety constraints
### The Fundamental Shift:
> *"If your AI could plan 10 steps ahead instead of 1, what impossible problem would you have it solve first?"*
SMC represents moving from **simple prediction** to **true strategic foresight**.
""")
gr.Markdown("---")
# Connection to Next Tab
gr.Markdown("""
## Ready to Try It Yourself?
Now that you understand how SMC works, head to the **"2. Try It: Translator"** tab
to see it in action! You can:
- Watch particles explore and get filtered in real-time
- Compare hard vs soft constraints
- Try different professional domains (Legal, Medical, Financial, Technical)
""")
# ==================== TAB 2: TRY IT ====================
with gr.TabItem("2. Try It: Translator"):
gr.Markdown("""
# The Plain-English Translator
## The Problem We're Solving
**The Curse of Knowledge:** Experts often struggle to explain concepts without jargon.
A standard AI naturally uses technical terms because they're statistically probable.
**Our Solution:** Use SMC to explore multiple explanations simultaneously,
filtering out any path that uses forbidden terminology. This forces the model
to find creative, plain-language alternatives.
---
""")
with gr.Row():
with gr.Column(scale=2):
concept_input = gr.Textbox(
label="Concept to Explain",
placeholder="e.g., 'Force Majeure clause and why it might void our contract'",
lines=2
)
profession_dropdown = gr.Dropdown(
choices=["Legal", "Medical", "Financial", "Technical/Engineering"],
value="Legal",
label="Professional Domain",
info="Each domain has its own set of banned jargon terms"
)
custom_words = gr.Textbox(
label="Additional Banned Words (optional)",
placeholder="e.g., contract, clause, party",
lines=1
)
model_dropdown = gr.Dropdown(
choices=list(AVAILABLE_MODELS.keys()),
value="TinyLlama-1.1B (Fast)",
label="Model"
)
constraint_mode = gr.Radio(
choices=["Hard (Prune)", "Soft (Penalize)"],
value="Soft (Penalize)",
label="Constraint Mode",
info="Soft constraints are more robust - see the Learn tab for explanation"
)
with gr.Row():
num_particles = gr.Slider(
minimum=2, maximum=10, value=5, step=1,
label="Particles",
info="More = more exploration"
)
max_steps = gr.Slider(
minimum=5, maximum=30, value=15, step=5,
label="Max Steps",
info="SMC iterations"
)
tokens_per_step = gr.Slider(
minimum=5, maximum=30, value=15, step=5,
label="Tokens/Step",
info="Generation length per iteration"
)
translate_btn = gr.Button("Translate to Plain English", variant="primary", size="lg")
with gr.Column(scale=1):
gr.Markdown("""
### SMC in Action
When you click translate, watch the trace log to see:
1. **Particles explored** - Multiple paths generated
2. **Filtering** - Jargon paths penalized/pruned
3. **Resampling** - Best particles selected
4. **Convergence** - Final jargon-free output
**Tip:** Try the same concept with Hard vs Soft constraints
to see the difference!
""")
gr.Markdown("---")
gr.Markdown("### Output")
smc_output = gr.Textbox(label="Plain-English Explanation", lines=5, show_label=True)
with gr.Accordion("SMC Trace Log (See the algorithm in action)", open=True):
trace_output = gr.Textbox(label="", lines=20, show_label=False)
with gr.Accordion("Banned Words for This Domain", open=False):
banned_words_display = gr.Textbox(label="", lines=3, show_label=False)
gr.Markdown("---")
gr.Markdown("### Example Scenarios")
gr.Examples(examples=EXAMPLES, inputs=[concept_input, profession_dropdown, custom_words], label="")
# ==================== TAB 3: EXPERIMENTS ====================
with gr.TabItem("3. Our Experiments"):
gr.Markdown("""
# What We Learned: An Experimental Journey
This tab documents our experimental journey in applying SMC to constrained text generation.
We tested multiple approaches and models to understand what works and what doesn't.
---
""")
gr.Markdown("""
## The Experimental Setup
### Goal
Generate plain-English explanations of professional concepts (Legal, Medical, Financial, Technical)
that a 10-year-old could understand - **without using any domain-specific jargon**.
### Benchmark
We created 12 test cases (3 per domain) with gold-standard translations from Claude Opus 4.5.
Each output was scored on:
| Criterion | Points | Description |
|-----------|--------|-------------|
| Jargon-Free | 25 | No banned terminology used |
| Has Example | 25 | Uses relatable analogy |
| Appropriate Length | 25 | 20-100 words |
| Coherence | 25 | Proper sentence structure |
---
""")
# Experiment 1: Hard Constraints
gr.Markdown("""
## Experiment 1: Hard Constraints (Prune All Jargon)
### Hypothesis
If we completely eliminate any generation path containing jargon, the model will be forced
to find jargon-free alternatives.
### Setup
- Models: TinyLlama-1.1B, Qwen2-0.5B, Gemma-2-2B
- Parameters: 5 particles, 25 max steps, 6 tokens per step
- Constraint: **Hard** - any particle with jargon is immediately pruned
### Results
""")
# Build data from benchmark
gemma_data = BENCHMARK_DATA["model_results"]["Gemma-2-2B"]
tinyllama_data = BENCHMARK_DATA["model_results"]["TinyLlama-1.1B"]
qwen_data = BENCHMARK_DATA["model_results"]["Qwen2-0.5B"]
gr.Markdown(f"""
| Model | Score | Success Rate | Outcome |
|-------|-------|--------------|---------|
| Gemma-2-2B | {gemma_data['total_score']}/{gemma_data['max_possible']} ({gemma_data['percentage']}%) | {gemma_data.get('successful_outputs', 3)}/12 | 9 empty outputs |
| TinyLlama-1.1B | {tinyllama_data['total_score']}/{tinyllama_data['max_possible']} ({tinyllama_data['percentage']}%) | {tinyllama_data.get('successful_outputs', 3)}/12 | 9 empty outputs |
| Qwen2-0.5B | {qwen_data['total_score']}/{qwen_data['max_possible']} ({qwen_data['percentage']}%) | {qwen_data.get('successful_outputs', 2)}/12 | 10 empty outputs |
### What Happened?
**75% of test cases produced empty outputs!**
The problem: When explaining medical concepts, the model naturally reaches for words like
"benign," "lesion," and "diagnosis." With hard constraints, EVERY generation path
contained at least one banned word, causing **total particle death**.
### Key Learning
Hard constraints are too aggressive. Domain-specific vocabulary is so deeply embedded
in model weights that it's nearly impossible to avoid entirely through pruning alone.
---
""")
# Experiment 2: Soft Constraints
gr.Markdown("""
## Experiment 2: Soft Constraints (Weighted Resampling)
### Hypothesis
Instead of killing particles with jargon, we should **penalize** them with lower weights.
This allows gradual steering toward jargon-free outputs while preventing particle death.
### The Key Insight
```
Weight = penalty_factor ^ (jargon_count)
With penalty_factor = 0.3:
- 0 jargon words → weight = 1.0
- 1 jargon word → weight = 0.3
- 2 jargon words → weight = 0.09
```
Particles with jargon can **survive** but are less likely to be selected for the next generation.
Over time, the population naturally shifts toward jargon-free outputs.
### Setup
- Model: Qwen2.5-7B (via Ollama)
- Parameters: 5 particles, 15 max steps, 25 tokens per step
- Constraint: **Soft** - penalty factor 0.3
### Results
""")
qwen25_soft_data = BENCHMARK_DATA["model_results"].get("Qwen2.5-7B-SoftConstraint", {})
gr.Markdown(f"""
| Model | Score | Success Rate | Jargon Violations |
|-------|-------|--------------|-------------------|
| Qwen2.5-7B (Soft) | {qwen25_soft_data.get('total_score', 920)}/{qwen25_soft_data.get('max_possible', 1200)} ({qwen25_soft_data.get('percentage', 76.7)}%) | **{qwen25_soft_data.get('successful_outputs', 12)}/12** | 1/12 |
### The Transformation
| Metric | Hard Constraints | Soft Constraints |
|--------|------------------|------------------|
| Success Rate | 25% (3/12) | **100% (12/12)** |
| Average Score | ~44% | **76.7%** |
| Empty Outputs | 9/12 | **0/12** |
### What Changed?
- Particles with jargon no longer die instantly
- The population gradually evolves toward jargon-free outputs
- Even if early generations contain jargon, later generations learn to avoid it
- The one jargon violation ("synchronous") was unavoidable given the topic
---
""")
# Comparison Browser
gr.Markdown("## Compare Results Across Models")
gr.Markdown("Select an example to see how different approaches performed:")
all_examples = []
for domain in ["Legal", "Medical", "Financial", "Technical/Engineering"]:
for concept in BENCHMARK_DATA["claude_opus_benchmarks"][domain].keys():
all_examples.append(f"{domain}: {concept[:55]}...")
example_dropdown = gr.Dropdown(choices=all_examples, value=all_examples[0], label="Select Example")
first_domain = "Legal"
first_concept = list(BENCHMARK_DATA["claude_opus_benchmarks"]["Legal"].keys())[0]
initial_claude = BENCHMARK_DATA["claude_opus_benchmarks"][first_domain][first_concept]["translation"]
initial_qwen25 = BENCHMARK_DATA["model_results"].get("Qwen2.5-7B-SoftConstraint", {}).get("results", {}).get(first_domain, {}).get(first_concept, {}).get("output", "") or "(Not available)"
initial_gemma = BENCHMARK_DATA["model_results"]["Gemma-2-2B"]["results"][first_domain][first_concept].get("output", "") or "(Hard constraints killed all particles)"
with gr.Row():
with gr.Column():
gr.Markdown("**Claude Opus 4.5 (Gold Standard)**")
claude_output = gr.Textbox(value=initial_claude, lines=4, interactive=False, show_label=False)
with gr.Column():
gr.Markdown("**Qwen2.5-7B (Soft Constraints)**")
qwen25_output = gr.Textbox(value=initial_qwen25, lines=4, interactive=False, show_label=False)
with gr.Row():
with gr.Column():
gr.Markdown("**Gemma-2-2B (Hard Constraints)**")
gemma_output = gr.Textbox(value=initial_gemma, lines=4, interactive=False, show_label=False)
with gr.Column():
gr.Markdown("**TinyLlama-1.1B (Hard Constraints)**")
initial_tiny = BENCHMARK_DATA["model_results"]["TinyLlama-1.1B"]["results"][first_domain][first_concept].get("output", "") or "(Hard constraints killed all particles)"
tinyllama_output = gr.Textbox(value=initial_tiny, lines=4, interactive=False, show_label=False)
def update_example_outputs(selection):
domain = selection.split(":")[0]
concept_preview = selection.split(": ")[1].replace("...", "")
for concept in BENCHMARK_DATA["claude_opus_benchmarks"][domain].keys():
if concept.startswith(concept_preview.strip()):
claude = BENCHMARK_DATA["claude_opus_benchmarks"][domain][concept]["translation"]
qwen25 = BENCHMARK_DATA["model_results"].get("Qwen2.5-7B-SoftConstraint", {}).get("results", {}).get(domain, {}).get(concept, {}).get("output", "") or "(Not available)"
gemma = BENCHMARK_DATA["model_results"]["Gemma-2-2B"]["results"][domain].get(concept, {}).get("output", "") or "(Hard constraints killed all particles)"
tiny = BENCHMARK_DATA["model_results"]["TinyLlama-1.1B"]["results"][domain].get(concept, {}).get("output", "") or "(Hard constraints killed all particles)"
return claude, qwen25, gemma, tiny
return "Not found", "Not found", "Not found", "Not found"
example_dropdown.change(
fn=update_example_outputs,
inputs=[example_dropdown],
outputs=[claude_output, qwen25_output, gemma_output, tinyllama_output]
)
gr.Markdown("---")
# Key Takeaways
gr.Markdown("""
## Key Takeaways
### What We Learned About SMC for Constrained Generation
1. **Soft constraints dramatically outperform hard constraints**
- Hard pruning causes particle death when constraints conflict with model priors
- Weighted resampling allows graceful degradation and recovery
2. **Penalty factor matters**
- 0.3 (70% reduction per jargon word) provided good balance
- Too aggressive (0.1) → still causes particle death
- Too lenient (0.5) → jargon persists too long
3. **Model size affects vocabulary diversity**
- Larger models (7B+) have more alternative phrasings available
- Smaller models get stuck more easily because they have fewer "escape routes"
4. **SMC enables strategic generation**
- Standard greedy generation commits immediately and can't backtrack
- SMC explores multiple futures and converges on the best path
### Broader Implications
This technique applies beyond jargon filtering:
- **Content moderation:** Generate text avoiding harmful content
- **Style transfer:** Guide generation toward specific writing styles
- **Factual grounding:** Penalize generations that contradict known facts
- **Length control:** Soft constraints on verbosity
---
*Experiments conducted December 2025. Models tested via HuggingFace Transformers and Ollama.*
""")
# Event handlers
translate_btn.click(
fn=smc_translate,
inputs=[concept_input, profession_dropdown, custom_words, model_dropdown, num_particles, max_steps, tokens_per_step, constraint_mode],
outputs=[smc_output, trace_output, banned_words_display]
)
if __name__ == "__main__":
demo.launch(theme=gr.themes.Soft())