Spaces:
Sleeping
Sleeping
apingali commited on
Commit ·
3cd35a6
1
Parent(s): 67bc0e3
Transform space into SMC educational learning resource
Browse files- Add Learn SMC tab with infographic and 4-section explanation
- Add soft constraints implementation with weighted resampling
- Rename Analytics to Our Experiments with experimental journey
- Add Qwen2.5-7B benchmark results (76.7% with soft constraints)
- Improve translator tab with clearer problem/solution framing
- .gitattributes +1 -0
- Sequential_monte_carlo.png +3 -0
- app.py +492 -283
- benchmark_data.json +109 -4
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
*.png filter=lfs diff=lfs merge=lfs -text
|
Sequential_monte_carlo.png
ADDED
|
Git LFS Details
|
app.py
CHANGED
|
@@ -1,60 +1,63 @@
|
|
| 1 |
"""
|
| 2 |
-
|
| 3 |
-
A Sequential Monte Carlo approach to translating professional jargon into plain language.
|
| 4 |
|
| 5 |
-
|
| 6 |
-
|
|
|
|
| 7 |
"""
|
| 8 |
|
| 9 |
import torch
|
| 10 |
import gradio as gr
|
| 11 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 12 |
import random
|
| 13 |
-
import spaces
|
| 14 |
import json
|
| 15 |
import os
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
# Load benchmark data
|
| 18 |
BENCHMARK_DATA_PATH = os.path.join(os.path.dirname(__file__), "benchmark_data.json")
|
| 19 |
with open(BENCHMARK_DATA_PATH, "r") as f:
|
| 20 |
BENCHMARK_DATA = json.load(f)
|
| 21 |
|
|
|
|
|
|
|
|
|
|
| 22 |
# ============================================================================
|
| 23 |
# MODEL SETUP
|
| 24 |
# ============================================================================
|
| 25 |
|
| 26 |
-
# Available models - users can select from these
|
| 27 |
AVAILABLE_MODELS = {
|
| 28 |
-
"TinyLlama-1.1B (
|
| 29 |
-
"Qwen2-0.5B (
|
| 30 |
-
"
|
|
|
|
|
|
|
| 31 |
}
|
| 32 |
|
| 33 |
-
# Cache for loaded models
|
| 34 |
loaded_models = {}
|
| 35 |
loaded_tokenizers = {}
|
| 36 |
|
| 37 |
def load_model(model_name: str):
|
| 38 |
-
"""
|
| 39 |
-
Lazy load the model to avoid memory issues during startup.
|
| 40 |
-
Models are cached after first load.
|
| 41 |
-
"""
|
| 42 |
model_id = AVAILABLE_MODELS.get(model_name, "TinyLlama/TinyLlama-1.1B-Chat-v1.0")
|
| 43 |
-
|
| 44 |
if model_id not in loaded_tokenizers:
|
| 45 |
loaded_tokenizers[model_id] = AutoTokenizer.from_pretrained(model_id)
|
| 46 |
-
|
| 47 |
if model_id not in loaded_models:
|
| 48 |
loaded_models[model_id] = AutoModelForCausalLM.from_pretrained(
|
| 49 |
-
model_id,
|
| 50 |
-
device_map="auto",
|
| 51 |
-
torch_dtype=torch.float16
|
| 52 |
)
|
| 53 |
-
|
| 54 |
return loaded_tokenizers[model_id], loaded_models[model_id]
|
| 55 |
|
| 56 |
# ============================================================================
|
| 57 |
-
# JARGON DICTIONARIES
|
| 58 |
# ============================================================================
|
| 59 |
|
| 60 |
JARGON_DICTIONARIES = {
|
|
@@ -89,18 +92,13 @@ JARGON_DICTIONARIES = {
|
|
| 89 |
}
|
| 90 |
|
| 91 |
# ============================================================================
|
| 92 |
-
# SMC CORE
|
| 93 |
# ============================================================================
|
| 94 |
|
| 95 |
def is_safe(text: str, banned_words: list) -> bool:
|
| 96 |
-
"""
|
| 97 |
-
Checks if the generated text contains any banned jargon.
|
| 98 |
-
Returns True if the text is 'safe' (no jargon found).
|
| 99 |
-
"""
|
| 100 |
text_lower = text.lower()
|
| 101 |
for word in banned_words:
|
| 102 |
word_lower = word.lower()
|
| 103 |
-
# Check for the word as a standalone word with various endings
|
| 104 |
if (f" {word_lower} " in f" {text_lower} " or
|
| 105 |
f" {word_lower}." in f" {text_lower}" or
|
| 106 |
f" {word_lower}," in f" {text_lower}" or
|
|
@@ -112,7 +110,6 @@ def is_safe(text: str, banned_words: list) -> bool:
|
|
| 112 |
return True
|
| 113 |
|
| 114 |
def find_jargon_used(text: str, banned_words: list) -> list:
|
| 115 |
-
"""Returns a list of banned words found in the text."""
|
| 116 |
text_lower = text.lower()
|
| 117 |
found = []
|
| 118 |
for word in banned_words:
|
|
@@ -127,197 +124,366 @@ def find_jargon_used(text: str, banned_words: list) -> list:
|
|
| 127 |
found.append(word)
|
| 128 |
return found
|
| 129 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
@spaces.GPU
|
| 131 |
def smc_translate(
|
| 132 |
concept: str,
|
| 133 |
profession: str,
|
| 134 |
custom_banned_words: str = "",
|
| 135 |
-
model_name: str = "TinyLlama-1.1B (
|
| 136 |
num_particles: int = 5,
|
| 137 |
max_steps: int = 20,
|
| 138 |
-
tokens_per_step: int =
|
|
|
|
| 139 |
progress=gr.Progress()
|
| 140 |
) -> tuple:
|
| 141 |
-
"""
|
| 142 |
-
Sequential Monte Carlo translation with particle filtering.
|
| 143 |
-
|
| 144 |
-
The key insight: Instead of generating text greedily (one token at a time),
|
| 145 |
-
we maintain multiple 'particles' (candidate generations) and prune any that
|
| 146 |
-
use forbidden jargon. This forces the model to find alternative phrasings.
|
| 147 |
-
"""
|
| 148 |
tokenizer, model_inst = load_model(model_name)
|
|
|
|
| 149 |
|
| 150 |
-
# Build banned words list
|
| 151 |
banned_words = JARGON_DICTIONARIES.get(profession, []).copy()
|
| 152 |
if custom_banned_words.strip():
|
| 153 |
custom_list = [w.strip() for w in custom_banned_words.split(",") if w.strip()]
|
| 154 |
banned_words.extend(custom_list)
|
| 155 |
|
| 156 |
-
# Construct the prompt
|
| 157 |
prompt = f"""You are an expert {profession.lower()} professional explaining a concept to a client with no background in your field.
|
| 158 |
|
| 159 |
Rules:
|
| 160 |
- Explain as if talking to a curious 10-year-old
|
| 161 |
- Use a concrete, relatable real-world example to illustrate the concept
|
| 162 |
-
- Avoid
|
| 163 |
- Keep it concise: 2-3 sentences max
|
| 164 |
|
| 165 |
Concept to explain: {concept}
|
| 166 |
|
| 167 |
Simple explanation with example:"""
|
| 168 |
|
| 169 |
-
# Initialize particles
|
| 170 |
particles = [prompt]
|
| 171 |
trace_log = []
|
| 172 |
-
trace_log.append(f"
|
| 173 |
-
trace_log.append(f"
|
| 174 |
-
trace_log.append(f"
|
| 175 |
-
trace_log.append(f"
|
| 176 |
-
trace_log.append(f"
|
| 177 |
-
trace_log.append("
|
| 178 |
-
|
| 179 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 180 |
candidates = []
|
| 181 |
|
| 182 |
-
#
|
| 183 |
for particle in particles:
|
| 184 |
inputs = tokenizer(particle, return_tensors="pt").to(model_inst.device)
|
| 185 |
-
|
| 186 |
with torch.no_grad():
|
| 187 |
outputs = model_inst.generate(
|
| 188 |
**inputs,
|
| 189 |
max_new_tokens=tokens_per_step,
|
| 190 |
num_return_sequences=3,
|
| 191 |
do_sample=True,
|
| 192 |
-
temperature=0.8,
|
| 193 |
-
top_p=0.9,
|
| 194 |
pad_token_id=tokenizer.eos_token_id
|
| 195 |
)
|
| 196 |
-
|
| 197 |
for out in outputs:
|
| 198 |
decoded = tokenizer.decode(out, skip_special_tokens=True)
|
| 199 |
candidates.append(decoded)
|
| 200 |
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
if pruned_count > 0:
|
| 221 |
-
trace_log.append(f"✅ Step {step+1}: Kept {len(particles)} particles, pruned {pruned_count}")
|
| 222 |
else:
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 226 |
|
| 227 |
-
# Check for
|
| 228 |
current_text = particles[0].split("Simple explanation with example:")[-1].strip()
|
| 229 |
-
if current_text.endswith(('.', '!', '?')) and len(current_text) >
|
| 230 |
-
trace_log.append(f"
|
| 231 |
break
|
| 232 |
|
| 233 |
-
|
| 234 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 235 |
|
| 236 |
-
# Final jargon check
|
| 237 |
final_jargon = find_jargon_used(final_text, banned_words)
|
| 238 |
if final_jargon:
|
| 239 |
-
trace_log.append(f"
|
| 240 |
else:
|
| 241 |
-
trace_log.append(f"
|
| 242 |
-
|
| 243 |
-
trace_output = "\n".join(trace_log)
|
| 244 |
|
| 245 |
-
return final_text,
|
| 246 |
-
|
| 247 |
-
def greedy_baseline(concept: str, profession: str) -> str:
|
| 248 |
-
"""
|
| 249 |
-
Standard greedy generation for comparison.
|
| 250 |
-
Shows how a normal LLM would respond (likely with jargon).
|
| 251 |
-
"""
|
| 252 |
-
tokenizer, model_inst = load_model()
|
| 253 |
-
|
| 254 |
-
prompt = f"""You are an expert {profession.lower()} professional who needs to explain a concept to a client who has no background in your field. Explain it as if talking to a curious 10-year-old.
|
| 255 |
-
|
| 256 |
-
Concept to explain: {concept}
|
| 257 |
-
|
| 258 |
-
Simple explanation:"""
|
| 259 |
-
|
| 260 |
-
inputs = tokenizer(prompt, return_tensors="pt").to(model_inst.device)
|
| 261 |
-
|
| 262 |
-
with torch.no_grad():
|
| 263 |
-
outputs = model_inst.generate(
|
| 264 |
-
**inputs,
|
| 265 |
-
max_new_tokens=150,
|
| 266 |
-
do_sample=True,
|
| 267 |
-
temperature=0.7,
|
| 268 |
-
pad_token_id=tokenizer.eos_token_id
|
| 269 |
-
)
|
| 270 |
-
|
| 271 |
-
decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 272 |
-
return decoded.split("Simple explanation:")[-1].strip()
|
| 273 |
|
| 274 |
# ============================================================================
|
| 275 |
-
#
|
| 276 |
# ============================================================================
|
| 277 |
|
| 278 |
EXAMPLES = [
|
| 279 |
-
# Legal examples
|
| 280 |
["Force Majeure clause and why it might void our contract", "Legal", ""],
|
| 281 |
["Why we need to add an indemnification clause to protect your business", "Legal", ""],
|
| 282 |
-
["What happens if the other party breaches the non-compete agreement", "Legal", ""],
|
| 283 |
-
|
| 284 |
-
# Medical examples
|
| 285 |
["Your MRI shows a benign lesion that we should monitor", "Medical", ""],
|
| 286 |
["The etiology of your chronic fatigue syndrome", "Medical", ""],
|
| 287 |
-
["Why we're recommending prophylactic treatment given your comorbidities", "Medical", ""],
|
| 288 |
-
|
| 289 |
-
# Financial examples
|
| 290 |
["How compound interest and amortization affect your mortgage payments", "Financial", ""],
|
| 291 |
["Why we recommend diversifying your portfolio with low-liquidity assets", "Financial", ""],
|
| 292 |
-
["The tax implications of depreciation on your rental property", "Financial", ""],
|
| 293 |
-
|
| 294 |
-
# Technical examples
|
| 295 |
["Why our API has high latency and how microservices could help", "Technical/Engineering", ""],
|
| 296 |
["The difference between synchronous and asynchronous processing", "Technical/Engineering", ""],
|
| 297 |
-
["Why we need to refactor the legacy codebase before adding new features", "Technical/Engineering", ""],
|
| 298 |
]
|
| 299 |
|
| 300 |
# ============================================================================
|
| 301 |
# GRADIO INTERFACE
|
| 302 |
# ============================================================================
|
| 303 |
|
| 304 |
-
with gr.Blocks(title="The Plain-English Translator") as demo:
|
| 305 |
|
|
|
|
| 306 |
gr.Markdown("""
|
| 307 |
-
#
|
| 308 |
-
##
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 309 |
""")
|
| 310 |
|
| 311 |
with gr.Tabs():
|
| 312 |
-
|
| 313 |
-
|
|
|
|
| 314 |
gr.Markdown("""
|
| 315 |
-
|
| 316 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 317 |
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 321 |
|
| 322 |
---
|
| 323 |
""")
|
|
@@ -333,255 +499,298 @@ with gr.Blocks(title="The Plain-English Translator") as demo:
|
|
| 333 |
profession_dropdown = gr.Dropdown(
|
| 334 |
choices=["Legal", "Medical", "Financial", "Technical/Engineering"],
|
| 335 |
value="Legal",
|
| 336 |
-
label="Professional Domain"
|
|
|
|
| 337 |
)
|
| 338 |
|
| 339 |
custom_words = gr.Textbox(
|
| 340 |
-
label="Additional Banned Words (
|
| 341 |
placeholder="e.g., contract, clause, party",
|
| 342 |
lines=1
|
| 343 |
)
|
| 344 |
|
| 345 |
model_dropdown = gr.Dropdown(
|
| 346 |
choices=list(AVAILABLE_MODELS.keys()),
|
| 347 |
-
value="TinyLlama-1.1B (
|
| 348 |
-
label="Model"
|
| 349 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 350 |
)
|
| 351 |
|
| 352 |
with gr.Row():
|
| 353 |
num_particles = gr.Slider(
|
| 354 |
minimum=2, maximum=10, value=5, step=1,
|
| 355 |
-
label="
|
| 356 |
-
info="More
|
| 357 |
)
|
| 358 |
max_steps = gr.Slider(
|
| 359 |
-
minimum=
|
| 360 |
-
label="Max
|
| 361 |
-
info="
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 362 |
)
|
| 363 |
|
| 364 |
-
translate_btn = gr.Button("
|
| 365 |
|
| 366 |
with gr.Column(scale=1):
|
| 367 |
gr.Markdown("""
|
| 368 |
-
###
|
| 369 |
|
| 370 |
-
|
| 371 |
-
2. **Expand**: Generate a few tokens for each particle
|
| 372 |
-
3. **Filter**: Prune any particle that uses banned jargon
|
| 373 |
-
4. **Resample**: Keep the surviving particles and repeat
|
| 374 |
|
| 375 |
-
|
| 376 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 377 |
""")
|
| 378 |
|
| 379 |
gr.Markdown("---")
|
| 380 |
|
| 381 |
-
|
| 382 |
-
|
| 383 |
-
gr.Markdown("### ✅ SMC Plain-English Output")
|
| 384 |
-
smc_output = gr.Textbox(
|
| 385 |
-
label="",
|
| 386 |
-
lines=8,
|
| 387 |
-
show_label=False
|
| 388 |
-
)
|
| 389 |
|
| 390 |
-
with gr.Accordion("
|
| 391 |
-
trace_output = gr.Textbox(
|
| 392 |
-
label="",
|
| 393 |
-
lines=15,
|
| 394 |
-
show_label=False
|
| 395 |
-
)
|
| 396 |
|
| 397 |
-
with gr.Accordion("
|
| 398 |
-
banned_words_display = gr.Textbox(
|
| 399 |
-
label="",
|
| 400 |
-
lines=3,
|
| 401 |
-
show_label=False
|
| 402 |
-
)
|
| 403 |
|
| 404 |
gr.Markdown("---")
|
|
|
|
|
|
|
| 405 |
|
| 406 |
-
gr.Markdown("### 📚 Example Scenarios")
|
| 407 |
-
gr.Examples(
|
| 408 |
-
examples=EXAMPLES,
|
| 409 |
-
inputs=[concept_input, profession_dropdown, custom_words],
|
| 410 |
-
label=""
|
| 411 |
-
)
|
| 412 |
|
|
|
|
|
|
|
| 413 |
gr.Markdown("""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 414 |
---
|
| 415 |
-
*Built with 🤗 Transformers and Gradio*
|
| 416 |
""")
|
| 417 |
|
| 418 |
-
# ==================== ANALYTICS TAB ====================
|
| 419 |
-
with gr.TabItem("📊 Analytics"):
|
| 420 |
gr.Markdown("""
|
| 421 |
-
##
|
| 422 |
|
| 423 |
-
|
| 424 |
-
|
|
|
|
| 425 |
|
| 426 |
-
|
| 427 |
-
|
| 428 |
-
|
| 429 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 430 |
|
| 431 |
---
|
| 432 |
""")
|
| 433 |
|
| 434 |
-
#
|
| 435 |
-
gr.Markdown("
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 436 |
|
| 437 |
-
# Build
|
| 438 |
gemma_data = BENCHMARK_DATA["model_results"]["Gemma-2-2B"]
|
| 439 |
tinyllama_data = BENCHMARK_DATA["model_results"]["TinyLlama-1.1B"]
|
| 440 |
qwen_data = BENCHMARK_DATA["model_results"]["Qwen2-0.5B"]
|
| 441 |
|
| 442 |
-
|
| 443 |
-
|
| 444 |
-
|
| 445 |
-
|
| 446 |
-
|
| 447 |
-
|
| 448 |
-
| **Gemma-2-2B** | {gemma_data['total_score']}/{gemma_data['max_possible']} | {gemma_data['percentage']}% | C |
|
| 449 |
-
| **TinyLlama-1.1B** | {tinyllama_data['total_score']}/{tinyllama_data['max_possible']} | {tinyllama_data['percentage']}% | C |
|
| 450 |
-
| **Qwen2-0.5B** | {qwen_data['total_score']}/{qwen_data['max_possible']} | {qwen_data['percentage']}% | C- |
|
| 451 |
-
""")
|
| 452 |
|
| 453 |
-
|
|
|
|
| 454 |
|
| 455 |
-
|
| 456 |
-
|
| 457 |
-
|
| 458 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 459 |
|
| 460 |
-
|
|
|
|
|
|
|
| 461 |
|
| 462 |
-
|
| 463 |
-
|
| 464 |
-
|
|
|
|
|
|
|
| 465 |
|
| 466 |
-
|
| 467 |
-
|
| 468 |
-
|
| 469 |
-
|
| 470 |
-
|
| 471 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 472 |
|
| 473 |
---
|
| 474 |
""")
|
| 475 |
|
| 476 |
-
#
|
| 477 |
-
gr.Markdown("##
|
|
|
|
| 478 |
|
| 479 |
-
# Build example choices from data
|
| 480 |
all_examples = []
|
| 481 |
for domain in ["Legal", "Medical", "Financial", "Technical/Engineering"]:
|
| 482 |
for concept in BENCHMARK_DATA["claude_opus_benchmarks"][domain].keys():
|
| 483 |
-
all_examples.append(f"{domain}: {concept[:
|
| 484 |
|
| 485 |
-
example_dropdown = gr.Dropdown(
|
| 486 |
-
choices=all_examples,
|
| 487 |
-
value=all_examples[0],
|
| 488 |
-
label="Select Example to Compare"
|
| 489 |
-
)
|
| 490 |
|
| 491 |
-
# Get initial values for the first example
|
| 492 |
first_domain = "Legal"
|
| 493 |
first_concept = list(BENCHMARK_DATA["claude_opus_benchmarks"]["Legal"].keys())[0]
|
| 494 |
initial_claude = BENCHMARK_DATA["claude_opus_benchmarks"][first_domain][first_concept]["translation"]
|
| 495 |
-
|
| 496 |
-
|
| 497 |
-
initial_qwen = BENCHMARK_DATA["model_results"]["Qwen2-0.5B"]["results"][first_domain][first_concept].get("output", "") or "(SMC pruned all paths)"
|
| 498 |
|
| 499 |
with gr.Row():
|
| 500 |
with gr.Column():
|
| 501 |
-
gr.Markdown("**Claude Opus 4.5 (
|
| 502 |
-
claude_output = gr.Textbox(value=initial_claude, lines=
|
| 503 |
with gr.Column():
|
| 504 |
-
gr.Markdown("**
|
| 505 |
-
|
| 506 |
|
| 507 |
with gr.Row():
|
| 508 |
with gr.Column():
|
| 509 |
-
gr.Markdown("**
|
| 510 |
-
|
| 511 |
with gr.Column():
|
| 512 |
-
gr.Markdown("**
|
| 513 |
-
|
|
|
|
| 514 |
|
| 515 |
def update_example_outputs(selection):
|
| 516 |
-
# Parse selection to get domain and concept
|
| 517 |
domain = selection.split(":")[0]
|
| 518 |
concept_preview = selection.split(": ")[1].replace("...", "")
|
| 519 |
-
|
| 520 |
-
# Find matching concept
|
| 521 |
for concept in BENCHMARK_DATA["claude_opus_benchmarks"][domain].keys():
|
| 522 |
if concept.startswith(concept_preview.strip()):
|
| 523 |
claude = BENCHMARK_DATA["claude_opus_benchmarks"][domain][concept]["translation"]
|
| 524 |
-
|
| 525 |
-
|
| 526 |
-
|
| 527 |
-
|
| 528 |
-
tiny_result = BENCHMARK_DATA["model_results"]["TinyLlama-1.1B"]["results"][domain].get(concept, {})
|
| 529 |
-
tiny = tiny_result.get("output", "") or "(SMC pruned all paths)"
|
| 530 |
-
|
| 531 |
-
qwen_result = BENCHMARK_DATA["model_results"]["Qwen2-0.5B"]["results"][domain].get(concept, {})
|
| 532 |
-
qwen = qwen_result.get("output", "") or "(SMC pruned all paths)"
|
| 533 |
-
|
| 534 |
-
return claude, gemma, tiny, qwen
|
| 535 |
-
|
| 536 |
return "Not found", "Not found", "Not found", "Not found"
|
| 537 |
|
| 538 |
example_dropdown.change(
|
| 539 |
fn=update_example_outputs,
|
| 540 |
inputs=[example_dropdown],
|
| 541 |
-
outputs=[claude_output,
|
| 542 |
)
|
| 543 |
|
| 544 |
gr.Markdown("---")
|
| 545 |
|
| 546 |
-
#
|
| 547 |
-
gr.Markdown("
|
|
|
|
| 548 |
|
| 549 |
-
|
| 550 |
-
with gr.Accordion(f"📁 {domain} ({len(BENCHMARK_DATA['claude_opus_benchmarks'][domain])} examples)", open=False):
|
| 551 |
-
for concept, data in BENCHMARK_DATA["claude_opus_benchmarks"][domain].items():
|
| 552 |
-
gr.Markdown(f"**{concept}**")
|
| 553 |
-
gr.Textbox(value=data["translation"], lines=3, interactive=False, show_label=False)
|
| 554 |
|
| 555 |
-
|
|
|
|
|
|
|
| 556 |
|
| 557 |
-
|
| 558 |
-
|
| 559 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 560 |
|
| 561 |
-
**
|
| 562 |
-
|
| 563 |
-
|
| 564 |
-
- ✅ **Legal domain had best success** - more paraphrase flexibility
|
| 565 |
|
| 566 |
-
|
| 567 |
-
- ❌ **Aggressive pruning** - 75% of examples couldn't complete
|
| 568 |
-
- ❌ **Domain-specific vocabulary** is deeply embedded in model weights
|
| 569 |
-
- ❌ **Smaller models** have less vocabulary diversity for alternatives
|
| 570 |
|
| 571 |
-
|
| 572 |
-
|
| 573 |
-
|
| 574 |
-
|
| 575 |
-
|
| 576 |
|
| 577 |
---
|
| 578 |
-
*
|
| 579 |
""")
|
| 580 |
|
| 581 |
-
# Event handlers
|
| 582 |
translate_btn.click(
|
| 583 |
fn=smc_translate,
|
| 584 |
-
inputs=[concept_input, profession_dropdown, custom_words, model_dropdown, num_particles, max_steps],
|
| 585 |
outputs=[smc_output, trace_output, banned_words_display]
|
| 586 |
)
|
| 587 |
|
|
|
|
| 1 |
"""
|
| 2 |
+
Learning Sequential Monte Carlo (SMC) Through the Plain-English Translator
|
|
|
|
| 3 |
|
| 4 |
+
An interactive educational space that teaches Sequential Monte Carlo methods
|
| 5 |
+
using a practical application: helping professionals explain complex concepts
|
| 6 |
+
without using industry jargon.
|
| 7 |
"""
|
| 8 |
|
| 9 |
import torch
|
| 10 |
import gradio as gr
|
| 11 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 12 |
import random
|
|
|
|
| 13 |
import json
|
| 14 |
import os
|
| 15 |
+
import math
|
| 16 |
+
|
| 17 |
+
# Mock spaces module for local development (only needed on HuggingFace Spaces)
|
| 18 |
+
try:
|
| 19 |
+
import spaces
|
| 20 |
+
except ImportError:
|
| 21 |
+
class spaces:
|
| 22 |
+
@staticmethod
|
| 23 |
+
def GPU(func):
|
| 24 |
+
return func
|
| 25 |
|
| 26 |
# Load benchmark data
|
| 27 |
BENCHMARK_DATA_PATH = os.path.join(os.path.dirname(__file__), "benchmark_data.json")
|
| 28 |
with open(BENCHMARK_DATA_PATH, "r") as f:
|
| 29 |
BENCHMARK_DATA = json.load(f)
|
| 30 |
|
| 31 |
+
# Path to infographic
|
| 32 |
+
INFOGRAPHIC_PATH = os.path.join(os.path.dirname(__file__), "Sequential_monte_carlo.png")
|
| 33 |
+
|
| 34 |
# ============================================================================
|
| 35 |
# MODEL SETUP
|
| 36 |
# ============================================================================
|
| 37 |
|
|
|
|
| 38 |
AVAILABLE_MODELS = {
|
| 39 |
+
"TinyLlama-1.1B (Fast)": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
|
| 40 |
+
"Qwen2-0.5B (Fastest)": "Qwen/Qwen2-0.5B-Instruct",
|
| 41 |
+
"Qwen2.5-7B (Best Quality)": "Qwen/Qwen2.5-7B-Instruct",
|
| 42 |
+
"Qwen3-8B (Latest)": "Qwen/Qwen3-8B",
|
| 43 |
+
"Gemma-2-2B (Requires HF Login)": "google/gemma-2-2b-it",
|
| 44 |
}
|
| 45 |
|
|
|
|
| 46 |
loaded_models = {}
|
| 47 |
loaded_tokenizers = {}
|
| 48 |
|
| 49 |
def load_model(model_name: str):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
model_id = AVAILABLE_MODELS.get(model_name, "TinyLlama/TinyLlama-1.1B-Chat-v1.0")
|
|
|
|
| 51 |
if model_id not in loaded_tokenizers:
|
| 52 |
loaded_tokenizers[model_id] = AutoTokenizer.from_pretrained(model_id)
|
|
|
|
| 53 |
if model_id not in loaded_models:
|
| 54 |
loaded_models[model_id] = AutoModelForCausalLM.from_pretrained(
|
| 55 |
+
model_id, device_map="auto", torch_dtype=torch.float16
|
|
|
|
|
|
|
| 56 |
)
|
|
|
|
| 57 |
return loaded_tokenizers[model_id], loaded_models[model_id]
|
| 58 |
|
| 59 |
# ============================================================================
|
| 60 |
+
# JARGON DICTIONARIES
|
| 61 |
# ============================================================================
|
| 62 |
|
| 63 |
JARGON_DICTIONARIES = {
|
|
|
|
| 92 |
}
|
| 93 |
|
| 94 |
# ============================================================================
|
| 95 |
+
# SMC CORE FUNCTIONS
|
| 96 |
# ============================================================================
|
| 97 |
|
| 98 |
def is_safe(text: str, banned_words: list) -> bool:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 99 |
text_lower = text.lower()
|
| 100 |
for word in banned_words:
|
| 101 |
word_lower = word.lower()
|
|
|
|
| 102 |
if (f" {word_lower} " in f" {text_lower} " or
|
| 103 |
f" {word_lower}." in f" {text_lower}" or
|
| 104 |
f" {word_lower}," in f" {text_lower}" or
|
|
|
|
| 110 |
return True
|
| 111 |
|
| 112 |
def find_jargon_used(text: str, banned_words: list) -> list:
|
|
|
|
| 113 |
text_lower = text.lower()
|
| 114 |
found = []
|
| 115 |
for word in banned_words:
|
|
|
|
| 124 |
found.append(word)
|
| 125 |
return found
|
| 126 |
|
| 127 |
+
def count_jargon(text: str, banned_words: list) -> int:
|
| 128 |
+
return len(find_jargon_used(text, banned_words))
|
| 129 |
+
|
| 130 |
+
def compute_weight(text: str, banned_words: list, penalty_factor: float = 0.3) -> float:
|
| 131 |
+
jargon_count = count_jargon(text, banned_words)
|
| 132 |
+
return math.pow(penalty_factor, jargon_count)
|
| 133 |
+
|
| 134 |
+
def weighted_resample(particles: list, weights: list, num_samples: int) -> list:
|
| 135 |
+
if not particles or not weights:
|
| 136 |
+
return []
|
| 137 |
+
total_weight = sum(weights)
|
| 138 |
+
if total_weight == 0:
|
| 139 |
+
probs = [1.0 / len(particles)] * len(particles)
|
| 140 |
+
else:
|
| 141 |
+
probs = [w / total_weight for w in weights]
|
| 142 |
+
resampled = random.choices(particles, weights=probs, k=num_samples)
|
| 143 |
+
unique = list(dict.fromkeys(resampled))
|
| 144 |
+
return unique[:num_samples]
|
| 145 |
+
|
| 146 |
@spaces.GPU
|
| 147 |
def smc_translate(
|
| 148 |
concept: str,
|
| 149 |
profession: str,
|
| 150 |
custom_banned_words: str = "",
|
| 151 |
+
model_name: str = "TinyLlama-1.1B (Fast)",
|
| 152 |
num_particles: int = 5,
|
| 153 |
max_steps: int = 20,
|
| 154 |
+
tokens_per_step: int = 15,
|
| 155 |
+
constraint_mode: str = "Soft (Penalize)",
|
| 156 |
progress=gr.Progress()
|
| 157 |
) -> tuple:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 158 |
tokenizer, model_inst = load_model(model_name)
|
| 159 |
+
use_soft_constraints = "Soft" in constraint_mode
|
| 160 |
|
|
|
|
| 161 |
banned_words = JARGON_DICTIONARIES.get(profession, []).copy()
|
| 162 |
if custom_banned_words.strip():
|
| 163 |
custom_list = [w.strip() for w in custom_banned_words.split(",") if w.strip()]
|
| 164 |
banned_words.extend(custom_list)
|
| 165 |
|
|
|
|
| 166 |
prompt = f"""You are an expert {profession.lower()} professional explaining a concept to a client with no background in your field.
|
| 167 |
|
| 168 |
Rules:
|
| 169 |
- Explain as if talking to a curious 10-year-old
|
| 170 |
- Use a concrete, relatable real-world example to illustrate the concept
|
| 171 |
+
- Avoid technical jargon - use everyday words instead
|
| 172 |
- Keep it concise: 2-3 sentences max
|
| 173 |
|
| 174 |
Concept to explain: {concept}
|
| 175 |
|
| 176 |
Simple explanation with example:"""
|
| 177 |
|
|
|
|
| 178 |
particles = [prompt]
|
| 179 |
trace_log = []
|
| 180 |
+
trace_log.append(f"{'='*60}")
|
| 181 |
+
trace_log.append(f"SMC PLAIN-ENGLISH TRANSLATOR - TRACE LOG")
|
| 182 |
+
trace_log.append(f"{'='*60}")
|
| 183 |
+
trace_log.append(f"Model: {model_name}")
|
| 184 |
+
trace_log.append(f"Constraint Mode: {constraint_mode}")
|
| 185 |
+
trace_log.append(f"Concept: {concept}")
|
| 186 |
+
trace_log.append(f"Domain: {profession}")
|
| 187 |
+
trace_log.append(f"Banned words: {len(banned_words)} terms")
|
| 188 |
+
trace_log.append(f"Particles: {num_particles} | Steps: {max_steps} | Tokens/step: {tokens_per_step}")
|
| 189 |
+
trace_log.append(f"{'='*60}")
|
| 190 |
+
trace_log.append("")
|
| 191 |
+
|
| 192 |
+
for step in progress.tqdm(range(max_steps), desc="SMC Iteration"):
|
| 193 |
candidates = []
|
| 194 |
|
| 195 |
+
# STEP 1: EXPLORE - Generate multiple continuations
|
| 196 |
for particle in particles:
|
| 197 |
inputs = tokenizer(particle, return_tensors="pt").to(model_inst.device)
|
|
|
|
| 198 |
with torch.no_grad():
|
| 199 |
outputs = model_inst.generate(
|
| 200 |
**inputs,
|
| 201 |
max_new_tokens=tokens_per_step,
|
| 202 |
num_return_sequences=3,
|
| 203 |
do_sample=True,
|
| 204 |
+
temperature=0.9 if use_soft_constraints else 0.8,
|
| 205 |
+
top_p=0.95 if use_soft_constraints else 0.9,
|
| 206 |
pad_token_id=tokenizer.eos_token_id
|
| 207 |
)
|
|
|
|
| 208 |
for out in outputs:
|
| 209 |
decoded = tokenizer.decode(out, skip_special_tokens=True)
|
| 210 |
candidates.append(decoded)
|
| 211 |
|
| 212 |
+
if not candidates:
|
| 213 |
+
trace_log.append(f"Step {step+1}: No candidates generated - stopping")
|
| 214 |
+
break
|
| 215 |
|
| 216 |
+
# STEP 2: FILTER/WEIGHT - Apply constraints
|
| 217 |
+
if use_soft_constraints:
|
| 218 |
+
weights = [compute_weight(c, banned_words, penalty_factor=0.3) for c in candidates]
|
| 219 |
+
jargon_counts = [count_jargon(c, banned_words) for c in candidates]
|
| 220 |
+
clean_count = sum(1 for c in jargon_counts if c == 0)
|
| 221 |
+
trace_log.append(f"Step {step+1}: {len(candidates)} particles explored")
|
| 222 |
+
trace_log.append(f" {clean_count} jargon-free | Weights: [{min(weights):.2f} - {max(weights):.2f}]")
|
| 223 |
+
|
| 224 |
+
# STEP 3: RESAMPLE - Weighted selection
|
| 225 |
+
particles = weighted_resample(candidates, weights, num_particles)
|
| 226 |
+
if not particles:
|
| 227 |
+
trace_log.append(f" Resampling failed - stopping")
|
| 228 |
+
break
|
| 229 |
+
trace_log.append(f" Resampled to {len(particles)} particles")
|
|
|
|
|
|
|
|
|
|
| 230 |
else:
|
| 231 |
+
valid_candidates = []
|
| 232 |
+
pruned_count = 0
|
| 233 |
+
for candidate in candidates:
|
| 234 |
+
if is_safe(candidate, banned_words):
|
| 235 |
+
valid_candidates.append(candidate)
|
| 236 |
+
else:
|
| 237 |
+
pruned_count += 1
|
| 238 |
+
|
| 239 |
+
trace_log.append(f"Step {step+1}: {len(candidates)} particles explored")
|
| 240 |
+
trace_log.append(f" {len(valid_candidates)} survived | {pruned_count} pruned (contained jargon)")
|
| 241 |
+
|
| 242 |
+
if valid_candidates:
|
| 243 |
+
unique_candidates = list(set(valid_candidates))
|
| 244 |
+
random.shuffle(unique_candidates)
|
| 245 |
+
particles = unique_candidates[:num_particles]
|
| 246 |
+
else:
|
| 247 |
+
trace_log.append(f" ALL PARTICLES DIED - jargon unavoidable!")
|
| 248 |
+
break
|
| 249 |
|
| 250 |
+
# Check for completion
|
| 251 |
current_text = particles[0].split("Simple explanation with example:")[-1].strip()
|
| 252 |
+
if current_text.endswith(('.', '!', '?')) and len(current_text) > 80:
|
| 253 |
+
trace_log.append(f"\nNatural completion reached at step {step+1}")
|
| 254 |
break
|
| 255 |
|
| 256 |
+
trace_log.append("")
|
| 257 |
+
trace_log.append(f"{'='*60}")
|
| 258 |
+
|
| 259 |
+
# Get best result
|
| 260 |
+
if particles:
|
| 261 |
+
if use_soft_constraints:
|
| 262 |
+
best_idx = 0
|
| 263 |
+
best_jargon_count = float('inf')
|
| 264 |
+
for i, p in enumerate(particles):
|
| 265 |
+
jc = count_jargon(p, banned_words)
|
| 266 |
+
if jc < best_jargon_count:
|
| 267 |
+
best_jargon_count = jc
|
| 268 |
+
best_idx = i
|
| 269 |
+
final_text = particles[best_idx].split("Simple explanation with example:")[-1].strip()
|
| 270 |
+
else:
|
| 271 |
+
final_text = particles[0].split("Simple explanation with example:")[-1].strip()
|
| 272 |
+
else:
|
| 273 |
+
final_text = "(All generation paths used jargon - try soft constraints!)"
|
| 274 |
|
|
|
|
| 275 |
final_jargon = find_jargon_used(final_text, banned_words)
|
| 276 |
if final_jargon:
|
| 277 |
+
trace_log.append(f"RESULT: Contains jargon: {final_jargon}")
|
| 278 |
else:
|
| 279 |
+
trace_log.append(f"RESULT: Jargon-free output achieved!")
|
| 280 |
+
trace_log.append(f"{'='*60}")
|
|
|
|
| 281 |
|
| 282 |
+
return final_text, "\n".join(trace_log), ", ".join(banned_words)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 283 |
|
| 284 |
# ============================================================================
|
| 285 |
+
# EXAMPLES
|
| 286 |
# ============================================================================
|
| 287 |
|
| 288 |
EXAMPLES = [
|
|
|
|
| 289 |
["Force Majeure clause and why it might void our contract", "Legal", ""],
|
| 290 |
["Why we need to add an indemnification clause to protect your business", "Legal", ""],
|
|
|
|
|
|
|
|
|
|
| 291 |
["Your MRI shows a benign lesion that we should monitor", "Medical", ""],
|
| 292 |
["The etiology of your chronic fatigue syndrome", "Medical", ""],
|
|
|
|
|
|
|
|
|
|
| 293 |
["How compound interest and amortization affect your mortgage payments", "Financial", ""],
|
| 294 |
["Why we recommend diversifying your portfolio with low-liquidity assets", "Financial", ""],
|
|
|
|
|
|
|
|
|
|
| 295 |
["Why our API has high latency and how microservices could help", "Technical/Engineering", ""],
|
| 296 |
["The difference between synchronous and asynchronous processing", "Technical/Engineering", ""],
|
|
|
|
| 297 |
]
|
| 298 |
|
| 299 |
# ============================================================================
|
| 300 |
# GRADIO INTERFACE
|
| 301 |
# ============================================================================
|
| 302 |
|
| 303 |
+
with gr.Blocks(title="Learn SMC: The Plain-English Translator") as demo:
|
| 304 |
|
| 305 |
+
# ==================== HEADER ====================
|
| 306 |
gr.Markdown("""
|
| 307 |
+
# Learning Sequential Monte Carlo (SMC)
|
| 308 |
+
## An Interactive Guide Using the Plain-English Translator
|
| 309 |
+
|
| 310 |
+
Welcome! This space teaches you about **Sequential Monte Carlo** methods through a practical application:
|
| 311 |
+
helping professionals explain complex concepts without using jargon.
|
| 312 |
+
|
| 313 |
+
Navigate through the tabs to learn the theory, try the interactive demo, and see our experimental results.
|
| 314 |
""")
|
| 315 |
|
| 316 |
with gr.Tabs():
|
| 317 |
+
|
| 318 |
+
# ==================== TAB 1: LEARN SMC ====================
|
| 319 |
+
with gr.TabItem("1. Learn SMC"):
|
| 320 |
gr.Markdown("""
|
| 321 |
+
# Understanding Sequential Monte Carlo
|
| 322 |
+
|
| 323 |
+
Sequential Monte Carlo (SMC) is a powerful technique for solving problems where you need to
|
| 324 |
+
navigate through a space of possibilities while satisfying constraints. Let's understand it
|
| 325 |
+
through both theory and our practical application.
|
| 326 |
+
""")
|
| 327 |
|
| 328 |
+
# Infographic
|
| 329 |
+
gr.Markdown("## The Big Picture")
|
| 330 |
+
gr.Image(INFOGRAPHIC_PATH, label="How AI Learns to See the Future: An Introduction to SMC", show_label=True)
|
| 331 |
+
|
| 332 |
+
gr.Markdown("---")
|
| 333 |
+
|
| 334 |
+
# Section 1: The Problem
|
| 335 |
+
gr.Markdown("""
|
| 336 |
+
## 1. The Problem: Standard AI's "Greedy" Trap
|
| 337 |
+
|
| 338 |
+
### What's Wrong with Normal Text Generation?
|
| 339 |
+
|
| 340 |
+
Most AI language models work **greedily** - they pick the best next word based on immediate probability,
|
| 341 |
+
without considering long-term consequences. This creates a fundamental problem:
|
| 342 |
+
|
| 343 |
+
**The Greedy Trap:**
|
| 344 |
+
- The model chooses what seems best *right now*
|
| 345 |
+
- It can't "see" that this choice leads to a dead end
|
| 346 |
+
- Once committed, it can't backtrack
|
| 347 |
+
|
| 348 |
+
### Our Example: The Curse of Knowledge
|
| 349 |
+
|
| 350 |
+
When a lawyer tries to explain "Force Majeure" to a client, a standard AI naturally reaches for
|
| 351 |
+
legal terminology because those words are statistically most likely in that context:
|
| 352 |
+
|
| 353 |
+
```
|
| 354 |
+
Standard AI: "Force Majeure is a contractual provision that excuses liability
|
| 355 |
+
when extraordinary circumstances prevent fulfillment..."
|
| 356 |
+
```
|
| 357 |
+
|
| 358 |
+
The AI picked "liability," "contractual," and "provision" because they're the most probable
|
| 359 |
+
next words - but now it's stuck using jargon the client won't understand!
|
| 360 |
+
|
| 361 |
+
**This is like choosing the path in a maze that looks shortest, only to hit a dead end.**
|
| 362 |
+
""")
|
| 363 |
+
|
| 364 |
+
gr.Markdown("---")
|
| 365 |
+
|
| 366 |
+
# Section 2: The Breakthrough
|
| 367 |
+
gr.Markdown("""
|
| 368 |
+
## 2. The Breakthrough: Introducing SMC
|
| 369 |
+
|
| 370 |
+
### The Key Insight: Explore Multiple Futures Simultaneously
|
| 371 |
+
|
| 372 |
+
Instead of committing to one path, SMC maintains **thousands of "particles"** - each representing
|
| 373 |
+
a different possible future. Think of it as sending out scouts in every direction.
|
| 374 |
+
|
| 375 |
+
### How It Works in Our Translator:
|
| 376 |
+
|
| 377 |
+
```
|
| 378 |
+
Standard AI: One path → "Force Majeure is a contractual..." → STUCK WITH JARGON
|
| 379 |
+
|
| 380 |
+
SMC Approach: Path A → "Imagine you promised your friend..." ✓ Keep exploring
|
| 381 |
+
Path B → "This is a liability clause..." ✗ Contains jargon
|
| 382 |
+
Path C → "Think of it like a 'nobody's fault'..." ✓ Keep exploring
|
| 383 |
+
Path D → "The contractual provision states..." ✗ Contains jargon
|
| 384 |
+
Path E → "It's like when a big storm..." ✓ Keep exploring
|
| 385 |
+
```
|
| 386 |
+
|
| 387 |
+
**We explore multiple possibilities in parallel, keeping the promising ones and discarding the rest.**
|
| 388 |
+
""")
|
| 389 |
+
|
| 390 |
+
gr.Markdown("---")
|
| 391 |
+
|
| 392 |
+
# Section 3: The Process
|
| 393 |
+
gr.Markdown("""
|
| 394 |
+
## 3. The Process: How SMC Finds the Optimal Path
|
| 395 |
+
|
| 396 |
+
SMC follows a three-step cycle that repeats until we reach our goal:
|
| 397 |
+
|
| 398 |
+
### Step 1: EXPLORE (Expand)
|
| 399 |
+
Each surviving particle generates multiple possible continuations.
|
| 400 |
+
If we have 5 particles and each generates 3 continuations, we now have 15 candidates.
|
| 401 |
+
|
| 402 |
+
### Step 2: FILTER (Evaluate)
|
| 403 |
+
We evaluate each candidate against our constraint (no jargon).
|
| 404 |
+
This is "survival of the fittest" - unpromising paths fade out.
|
| 405 |
+
|
| 406 |
+
**Two Filtering Strategies:**
|
| 407 |
+
|
| 408 |
+
| Strategy | How It Works | Pros | Cons |
|
| 409 |
+
|----------|--------------|------|------|
|
| 410 |
+
| **Hard Constraints** | Completely eliminate any particle with jargon | Guarantees jargon-free output | Can kill ALL particles if jargon is unavoidable |
|
| 411 |
+
| **Soft Constraints** | Reduce weight of particles with jargon (but let them survive) | More robust, allows gradual steering | May have occasional jargon slip through |
|
| 412 |
+
|
| 413 |
+
### Step 3: RESAMPLE (Select)
|
| 414 |
+
We select particles for the next round based on their fitness:
|
| 415 |
+
- **Hard mode:** Random selection from survivors
|
| 416 |
+
- **Soft mode:** Weighted random selection (better particles more likely to be chosen)
|
| 417 |
+
|
| 418 |
+
### The Math Behind Soft Constraints:
|
| 419 |
+
```
|
| 420 |
+
Weight = 0.3 ^ (number of jargon words)
|
| 421 |
+
|
| 422 |
+
0 jargon words → Weight = 1.0 (100% chance)
|
| 423 |
+
1 jargon word → Weight = 0.3 (30% chance)
|
| 424 |
+
2 jargon words → Weight = 0.09 (9% chance)
|
| 425 |
+
3 jargon words → Weight = 0.027 (2.7% chance)
|
| 426 |
+
```
|
| 427 |
+
""")
|
| 428 |
+
|
| 429 |
+
gr.Markdown("---")
|
| 430 |
+
|
| 431 |
+
# Section 4: The Impact
|
| 432 |
+
gr.Markdown("""
|
| 433 |
+
## 4. The Impact: From Prediction to Strategy
|
| 434 |
+
|
| 435 |
+
SMC transforms AI from a **reactive predictor** to a **strategic planner**.
|
| 436 |
+
|
| 437 |
+
### What This Means for Our Translator:
|
| 438 |
+
|
| 439 |
+
| Approach | Can Plan Ahead? | Handles Constraints? | Success Rate |
|
| 440 |
+
|----------|-----------------|---------------------|--------------|
|
| 441 |
+
| Standard Greedy | No - commits immediately | No - uses probable words | N/A (always uses jargon) |
|
| 442 |
+
| SMC Hard | Yes - explores multiple paths | Yes - prunes violations | 25% (particles often die) |
|
| 443 |
+
| SMC Soft | Yes - explores multiple paths | Yes - penalizes violations | **100%** |
|
| 444 |
+
|
| 445 |
+
### Beyond Translation: Where Else Is SMC Used?
|
| 446 |
+
|
| 447 |
+
- **Robotics:** Planning movements while avoiding obstacles
|
| 448 |
+
- **Autonomous Vehicles:** Predicting traffic and planning routes
|
| 449 |
+
- **Finance:** Portfolio optimization with risk constraints
|
| 450 |
+
- **Drug Discovery:** Exploring molecular structures with safety constraints
|
| 451 |
+
|
| 452 |
+
### The Fundamental Shift:
|
| 453 |
+
|
| 454 |
+
> *"If your AI could plan 10 steps ahead instead of 1, what impossible problem would you have it solve first?"*
|
| 455 |
+
|
| 456 |
+
SMC represents moving from **simple prediction** to **true strategic foresight**.
|
| 457 |
+
""")
|
| 458 |
+
|
| 459 |
+
gr.Markdown("---")
|
| 460 |
+
|
| 461 |
+
# Connection to Next Tab
|
| 462 |
+
gr.Markdown("""
|
| 463 |
+
## Ready to Try It Yourself?
|
| 464 |
+
|
| 465 |
+
Now that you understand how SMC works, head to the **"2. Try It: Translator"** tab
|
| 466 |
+
to see it in action! You can:
|
| 467 |
+
|
| 468 |
+
- Watch particles explore and get filtered in real-time
|
| 469 |
+
- Compare hard vs soft constraints
|
| 470 |
+
- Try different professional domains (Legal, Medical, Financial, Technical)
|
| 471 |
+
""")
|
| 472 |
+
|
| 473 |
+
|
| 474 |
+
# ==================== TAB 2: TRY IT ====================
|
| 475 |
+
with gr.TabItem("2. Try It: Translator"):
|
| 476 |
+
gr.Markdown("""
|
| 477 |
+
# The Plain-English Translator
|
| 478 |
+
|
| 479 |
+
## The Problem We're Solving
|
| 480 |
+
|
| 481 |
+
**The Curse of Knowledge:** Experts often struggle to explain concepts without jargon.
|
| 482 |
+
A standard AI naturally uses technical terms because they're statistically probable.
|
| 483 |
+
|
| 484 |
+
**Our Solution:** Use SMC to explore multiple explanations simultaneously,
|
| 485 |
+
filtering out any path that uses forbidden terminology. This forces the model
|
| 486 |
+
to find creative, plain-language alternatives.
|
| 487 |
|
| 488 |
---
|
| 489 |
""")
|
|
|
|
| 499 |
profession_dropdown = gr.Dropdown(
|
| 500 |
choices=["Legal", "Medical", "Financial", "Technical/Engineering"],
|
| 501 |
value="Legal",
|
| 502 |
+
label="Professional Domain",
|
| 503 |
+
info="Each domain has its own set of banned jargon terms"
|
| 504 |
)
|
| 505 |
|
| 506 |
custom_words = gr.Textbox(
|
| 507 |
+
label="Additional Banned Words (optional)",
|
| 508 |
placeholder="e.g., contract, clause, party",
|
| 509 |
lines=1
|
| 510 |
)
|
| 511 |
|
| 512 |
model_dropdown = gr.Dropdown(
|
| 513 |
choices=list(AVAILABLE_MODELS.keys()),
|
| 514 |
+
value="TinyLlama-1.1B (Fast)",
|
| 515 |
+
label="Model"
|
| 516 |
+
)
|
| 517 |
+
|
| 518 |
+
constraint_mode = gr.Radio(
|
| 519 |
+
choices=["Hard (Prune)", "Soft (Penalize)"],
|
| 520 |
+
value="Soft (Penalize)",
|
| 521 |
+
label="Constraint Mode",
|
| 522 |
+
info="Soft constraints are more robust - see the Learn tab for explanation"
|
| 523 |
)
|
| 524 |
|
| 525 |
with gr.Row():
|
| 526 |
num_particles = gr.Slider(
|
| 527 |
minimum=2, maximum=10, value=5, step=1,
|
| 528 |
+
label="Particles",
|
| 529 |
+
info="More = more exploration"
|
| 530 |
)
|
| 531 |
max_steps = gr.Slider(
|
| 532 |
+
minimum=5, maximum=30, value=15, step=5,
|
| 533 |
+
label="Max Steps",
|
| 534 |
+
info="SMC iterations"
|
| 535 |
+
)
|
| 536 |
+
tokens_per_step = gr.Slider(
|
| 537 |
+
minimum=5, maximum=30, value=15, step=5,
|
| 538 |
+
label="Tokens/Step",
|
| 539 |
+
info="Generation length per iteration"
|
| 540 |
)
|
| 541 |
|
| 542 |
+
translate_btn = gr.Button("Translate to Plain English", variant="primary", size="lg")
|
| 543 |
|
| 544 |
with gr.Column(scale=1):
|
| 545 |
gr.Markdown("""
|
| 546 |
+
### SMC in Action
|
| 547 |
|
| 548 |
+
When you click translate, watch the trace log to see:
|
|
|
|
|
|
|
|
|
|
| 549 |
|
| 550 |
+
1. **Particles explored** - Multiple paths generated
|
| 551 |
+
2. **Filtering** - Jargon paths penalized/pruned
|
| 552 |
+
3. **Resampling** - Best particles selected
|
| 553 |
+
4. **Convergence** - Final jargon-free output
|
| 554 |
+
|
| 555 |
+
**Tip:** Try the same concept with Hard vs Soft constraints
|
| 556 |
+
to see the difference!
|
| 557 |
""")
|
| 558 |
|
| 559 |
gr.Markdown("---")
|
| 560 |
|
| 561 |
+
gr.Markdown("### Output")
|
| 562 |
+
smc_output = gr.Textbox(label="Plain-English Explanation", lines=5, show_label=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 563 |
|
| 564 |
+
with gr.Accordion("SMC Trace Log (See the algorithm in action)", open=True):
|
| 565 |
+
trace_output = gr.Textbox(label="", lines=20, show_label=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 566 |
|
| 567 |
+
with gr.Accordion("Banned Words for This Domain", open=False):
|
| 568 |
+
banned_words_display = gr.Textbox(label="", lines=3, show_label=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 569 |
|
| 570 |
gr.Markdown("---")
|
| 571 |
+
gr.Markdown("### Example Scenarios")
|
| 572 |
+
gr.Examples(examples=EXAMPLES, inputs=[concept_input, profession_dropdown, custom_words], label="")
|
| 573 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 574 |
|
| 575 |
+
# ==================== TAB 3: EXPERIMENTS ====================
|
| 576 |
+
with gr.TabItem("3. Our Experiments"):
|
| 577 |
gr.Markdown("""
|
| 578 |
+
# What We Learned: An Experimental Journey
|
| 579 |
+
|
| 580 |
+
This tab documents our experimental journey in applying SMC to constrained text generation.
|
| 581 |
+
We tested multiple approaches and models to understand what works and what doesn't.
|
| 582 |
+
|
| 583 |
---
|
|
|
|
| 584 |
""")
|
| 585 |
|
|
|
|
|
|
|
| 586 |
gr.Markdown("""
|
| 587 |
+
## The Experimental Setup
|
| 588 |
|
| 589 |
+
### Goal
|
| 590 |
+
Generate plain-English explanations of professional concepts (Legal, Medical, Financial, Technical)
|
| 591 |
+
that a 10-year-old could understand - **without using any domain-specific jargon**.
|
| 592 |
|
| 593 |
+
### Benchmark
|
| 594 |
+
We created 12 test cases (3 per domain) with gold-standard translations from Claude Opus 4.5.
|
| 595 |
+
Each output was scored on:
|
| 596 |
+
|
| 597 |
+
| Criterion | Points | Description |
|
| 598 |
+
|-----------|--------|-------------|
|
| 599 |
+
| Jargon-Free | 25 | No banned terminology used |
|
| 600 |
+
| Has Example | 25 | Uses relatable analogy |
|
| 601 |
+
| Appropriate Length | 25 | 20-100 words |
|
| 602 |
+
| Coherence | 25 | Proper sentence structure |
|
| 603 |
|
| 604 |
---
|
| 605 |
""")
|
| 606 |
|
| 607 |
+
# Experiment 1: Hard Constraints
|
| 608 |
+
gr.Markdown("""
|
| 609 |
+
## Experiment 1: Hard Constraints (Prune All Jargon)
|
| 610 |
+
|
| 611 |
+
### Hypothesis
|
| 612 |
+
If we completely eliminate any generation path containing jargon, the model will be forced
|
| 613 |
+
to find jargon-free alternatives.
|
| 614 |
+
|
| 615 |
+
### Setup
|
| 616 |
+
- Models: TinyLlama-1.1B, Qwen2-0.5B, Gemma-2-2B
|
| 617 |
+
- Parameters: 5 particles, 25 max steps, 6 tokens per step
|
| 618 |
+
- Constraint: **Hard** - any particle with jargon is immediately pruned
|
| 619 |
+
|
| 620 |
+
### Results
|
| 621 |
+
""")
|
| 622 |
|
| 623 |
+
# Build data from benchmark
|
| 624 |
gemma_data = BENCHMARK_DATA["model_results"]["Gemma-2-2B"]
|
| 625 |
tinyllama_data = BENCHMARK_DATA["model_results"]["TinyLlama-1.1B"]
|
| 626 |
qwen_data = BENCHMARK_DATA["model_results"]["Qwen2-0.5B"]
|
| 627 |
|
| 628 |
+
gr.Markdown(f"""
|
| 629 |
+
| Model | Score | Success Rate | Outcome |
|
| 630 |
+
|-------|-------|--------------|---------|
|
| 631 |
+
| Gemma-2-2B | {gemma_data['total_score']}/{gemma_data['max_possible']} ({gemma_data['percentage']}%) | {gemma_data.get('successful_outputs', 3)}/12 | 9 empty outputs |
|
| 632 |
+
| TinyLlama-1.1B | {tinyllama_data['total_score']}/{tinyllama_data['max_possible']} ({tinyllama_data['percentage']}%) | {tinyllama_data.get('successful_outputs', 3)}/12 | 9 empty outputs |
|
| 633 |
+
| Qwen2-0.5B | {qwen_data['total_score']}/{qwen_data['max_possible']} ({qwen_data['percentage']}%) | {qwen_data.get('successful_outputs', 2)}/12 | 10 empty outputs |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 634 |
|
| 635 |
+
### What Happened?
|
| 636 |
+
**75% of test cases produced empty outputs!**
|
| 637 |
|
| 638 |
+
The problem: When explaining medical concepts, the model naturally reaches for words like
|
| 639 |
+
"benign," "lesion," and "diagnosis." With hard constraints, EVERY generation path
|
| 640 |
+
contained at least one banned word, causing **total particle death**.
|
| 641 |
+
|
| 642 |
+
### Key Learning
|
| 643 |
+
Hard constraints are too aggressive. Domain-specific vocabulary is so deeply embedded
|
| 644 |
+
in model weights that it's nearly impossible to avoid entirely through pruning alone.
|
| 645 |
+
|
| 646 |
+
---
|
| 647 |
+
""")
|
| 648 |
+
|
| 649 |
+
# Experiment 2: Soft Constraints
|
| 650 |
+
gr.Markdown("""
|
| 651 |
+
## Experiment 2: Soft Constraints (Weighted Resampling)
|
| 652 |
+
|
| 653 |
+
### Hypothesis
|
| 654 |
+
Instead of killing particles with jargon, we should **penalize** them with lower weights.
|
| 655 |
+
This allows gradual steering toward jargon-free outputs while preventing particle death.
|
| 656 |
|
| 657 |
+
### The Key Insight
|
| 658 |
+
```
|
| 659 |
+
Weight = penalty_factor ^ (jargon_count)
|
| 660 |
|
| 661 |
+
With penalty_factor = 0.3:
|
| 662 |
+
- 0 jargon words → weight = 1.0
|
| 663 |
+
- 1 jargon word → weight = 0.3
|
| 664 |
+
- 2 jargon words → weight = 0.09
|
| 665 |
+
```
|
| 666 |
|
| 667 |
+
Particles with jargon can **survive** but are less likely to be selected for the next generation.
|
| 668 |
+
Over time, the population naturally shifts toward jargon-free outputs.
|
| 669 |
+
|
| 670 |
+
### Setup
|
| 671 |
+
- Model: Qwen2.5-7B (via Ollama)
|
| 672 |
+
- Parameters: 5 particles, 15 max steps, 25 tokens per step
|
| 673 |
+
- Constraint: **Soft** - penalty factor 0.3
|
| 674 |
+
|
| 675 |
+
### Results
|
| 676 |
+
""")
|
| 677 |
+
|
| 678 |
+
qwen25_soft_data = BENCHMARK_DATA["model_results"].get("Qwen2.5-7B-SoftConstraint", {})
|
| 679 |
+
|
| 680 |
+
gr.Markdown(f"""
|
| 681 |
+
| Model | Score | Success Rate | Jargon Violations |
|
| 682 |
+
|-------|-------|--------------|-------------------|
|
| 683 |
+
| Qwen2.5-7B (Soft) | {qwen25_soft_data.get('total_score', 920)}/{qwen25_soft_data.get('max_possible', 1200)} ({qwen25_soft_data.get('percentage', 76.7)}%) | **{qwen25_soft_data.get('successful_outputs', 12)}/12** | 1/12 |
|
| 684 |
+
|
| 685 |
+
### The Transformation
|
| 686 |
+
| Metric | Hard Constraints | Soft Constraints |
|
| 687 |
+
|--------|------------------|------------------|
|
| 688 |
+
| Success Rate | 25% (3/12) | **100% (12/12)** |
|
| 689 |
+
| Average Score | ~44% | **76.7%** |
|
| 690 |
+
| Empty Outputs | 9/12 | **0/12** |
|
| 691 |
+
|
| 692 |
+
### What Changed?
|
| 693 |
+
- Particles with jargon no longer die instantly
|
| 694 |
+
- The population gradually evolves toward jargon-free outputs
|
| 695 |
+
- Even if early generations contain jargon, later generations learn to avoid it
|
| 696 |
+
- The one jargon violation ("synchronous") was unavoidable given the topic
|
| 697 |
|
| 698 |
---
|
| 699 |
""")
|
| 700 |
|
| 701 |
+
# Comparison Browser
|
| 702 |
+
gr.Markdown("## Compare Results Across Models")
|
| 703 |
+
gr.Markdown("Select an example to see how different approaches performed:")
|
| 704 |
|
|
|
|
| 705 |
all_examples = []
|
| 706 |
for domain in ["Legal", "Medical", "Financial", "Technical/Engineering"]:
|
| 707 |
for concept in BENCHMARK_DATA["claude_opus_benchmarks"][domain].keys():
|
| 708 |
+
all_examples.append(f"{domain}: {concept[:55]}...")
|
| 709 |
|
| 710 |
+
example_dropdown = gr.Dropdown(choices=all_examples, value=all_examples[0], label="Select Example")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 711 |
|
|
|
|
| 712 |
first_domain = "Legal"
|
| 713 |
first_concept = list(BENCHMARK_DATA["claude_opus_benchmarks"]["Legal"].keys())[0]
|
| 714 |
initial_claude = BENCHMARK_DATA["claude_opus_benchmarks"][first_domain][first_concept]["translation"]
|
| 715 |
+
initial_qwen25 = BENCHMARK_DATA["model_results"].get("Qwen2.5-7B-SoftConstraint", {}).get("results", {}).get(first_domain, {}).get(first_concept, {}).get("output", "") or "(Not available)"
|
| 716 |
+
initial_gemma = BENCHMARK_DATA["model_results"]["Gemma-2-2B"]["results"][first_domain][first_concept].get("output", "") or "(Hard constraints killed all particles)"
|
|
|
|
| 717 |
|
| 718 |
with gr.Row():
|
| 719 |
with gr.Column():
|
| 720 |
+
gr.Markdown("**Claude Opus 4.5 (Gold Standard)**")
|
| 721 |
+
claude_output = gr.Textbox(value=initial_claude, lines=4, interactive=False, show_label=False)
|
| 722 |
with gr.Column():
|
| 723 |
+
gr.Markdown("**Qwen2.5-7B (Soft Constraints)**")
|
| 724 |
+
qwen25_output = gr.Textbox(value=initial_qwen25, lines=4, interactive=False, show_label=False)
|
| 725 |
|
| 726 |
with gr.Row():
|
| 727 |
with gr.Column():
|
| 728 |
+
gr.Markdown("**Gemma-2-2B (Hard Constraints)**")
|
| 729 |
+
gemma_output = gr.Textbox(value=initial_gemma, lines=4, interactive=False, show_label=False)
|
| 730 |
with gr.Column():
|
| 731 |
+
gr.Markdown("**TinyLlama-1.1B (Hard Constraints)**")
|
| 732 |
+
initial_tiny = BENCHMARK_DATA["model_results"]["TinyLlama-1.1B"]["results"][first_domain][first_concept].get("output", "") or "(Hard constraints killed all particles)"
|
| 733 |
+
tinyllama_output = gr.Textbox(value=initial_tiny, lines=4, interactive=False, show_label=False)
|
| 734 |
|
| 735 |
def update_example_outputs(selection):
|
|
|
|
| 736 |
domain = selection.split(":")[0]
|
| 737 |
concept_preview = selection.split(": ")[1].replace("...", "")
|
|
|
|
|
|
|
| 738 |
for concept in BENCHMARK_DATA["claude_opus_benchmarks"][domain].keys():
|
| 739 |
if concept.startswith(concept_preview.strip()):
|
| 740 |
claude = BENCHMARK_DATA["claude_opus_benchmarks"][domain][concept]["translation"]
|
| 741 |
+
qwen25 = BENCHMARK_DATA["model_results"].get("Qwen2.5-7B-SoftConstraint", {}).get("results", {}).get(domain, {}).get(concept, {}).get("output", "") or "(Not available)"
|
| 742 |
+
gemma = BENCHMARK_DATA["model_results"]["Gemma-2-2B"]["results"][domain].get(concept, {}).get("output", "") or "(Hard constraints killed all particles)"
|
| 743 |
+
tiny = BENCHMARK_DATA["model_results"]["TinyLlama-1.1B"]["results"][domain].get(concept, {}).get("output", "") or "(Hard constraints killed all particles)"
|
| 744 |
+
return claude, qwen25, gemma, tiny
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 745 |
return "Not found", "Not found", "Not found", "Not found"
|
| 746 |
|
| 747 |
example_dropdown.change(
|
| 748 |
fn=update_example_outputs,
|
| 749 |
inputs=[example_dropdown],
|
| 750 |
+
outputs=[claude_output, qwen25_output, gemma_output, tinyllama_output]
|
| 751 |
)
|
| 752 |
|
| 753 |
gr.Markdown("---")
|
| 754 |
|
| 755 |
+
# Key Takeaways
|
| 756 |
+
gr.Markdown("""
|
| 757 |
+
## Key Takeaways
|
| 758 |
|
| 759 |
+
### What We Learned About SMC for Constrained Generation
|
|
|
|
|
|
|
|
|
|
|
|
|
| 760 |
|
| 761 |
+
1. **Soft constraints dramatically outperform hard constraints**
|
| 762 |
+
- Hard pruning causes particle death when constraints conflict with model priors
|
| 763 |
+
- Weighted resampling allows graceful degradation and recovery
|
| 764 |
|
| 765 |
+
2. **Penalty factor matters**
|
| 766 |
+
- 0.3 (70% reduction per jargon word) provided good balance
|
| 767 |
+
- Too aggressive (0.1) → still causes particle death
|
| 768 |
+
- Too lenient (0.5) → jargon persists too long
|
| 769 |
+
|
| 770 |
+
3. **Model size affects vocabulary diversity**
|
| 771 |
+
- Larger models (7B+) have more alternative phrasings available
|
| 772 |
+
- Smaller models get stuck more easily because they have fewer "escape routes"
|
| 773 |
|
| 774 |
+
4. **SMC enables strategic generation**
|
| 775 |
+
- Standard greedy generation commits immediately and can't backtrack
|
| 776 |
+
- SMC explores multiple futures and converges on the best path
|
|
|
|
| 777 |
|
| 778 |
+
### Broader Implications
|
|
|
|
|
|
|
|
|
|
| 779 |
|
| 780 |
+
This technique applies beyond jargon filtering:
|
| 781 |
+
- **Content moderation:** Generate text avoiding harmful content
|
| 782 |
+
- **Style transfer:** Guide generation toward specific writing styles
|
| 783 |
+
- **Factual grounding:** Penalize generations that contradict known facts
|
| 784 |
+
- **Length control:** Soft constraints on verbosity
|
| 785 |
|
| 786 |
---
|
| 787 |
+
*Experiments conducted December 2025. Models tested via HuggingFace Transformers and Ollama.*
|
| 788 |
""")
|
| 789 |
|
| 790 |
+
# Event handlers
|
| 791 |
translate_btn.click(
|
| 792 |
fn=smc_translate,
|
| 793 |
+
inputs=[concept_input, profession_dropdown, custom_words, model_dropdown, num_particles, max_steps, tokens_per_step, constraint_mode],
|
| 794 |
outputs=[smc_output, trace_output, banned_words_display]
|
| 795 |
)
|
| 796 |
|
benchmark_data.json
CHANGED
|
@@ -1,10 +1,24 @@
|
|
| 1 |
{
|
| 2 |
"metadata": {
|
| 3 |
"benchmark_date": "2025-12-26",
|
| 4 |
-
"
|
| 5 |
-
"
|
| 6 |
-
|
| 7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
},
|
| 9 |
"scoring": {
|
| 10 |
"jargon_free": 25,
|
|
@@ -73,6 +87,97 @@
|
|
| 73 |
}
|
| 74 |
},
|
| 75 |
"model_results": {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
"Gemma-2-2B": {
|
| 77 |
"total_score": 550,
|
| 78 |
"max_possible": 1200,
|
|
|
|
| 1 |
{
|
| 2 |
"metadata": {
|
| 3 |
"benchmark_date": "2025-12-26",
|
| 4 |
+
"constraint_modes": {
|
| 5 |
+
"hard": {
|
| 6 |
+
"description": "Prune all particles containing jargon",
|
| 7 |
+
"smc_params": {
|
| 8 |
+
"num_particles": 5,
|
| 9 |
+
"max_steps": 25,
|
| 10 |
+
"tokens_per_step": 6
|
| 11 |
+
}
|
| 12 |
+
},
|
| 13 |
+
"soft": {
|
| 14 |
+
"description": "Weighted resampling: particles with jargon get reduced weights",
|
| 15 |
+
"penalty_factor": 0.3,
|
| 16 |
+
"smc_params": {
|
| 17 |
+
"num_particles": 5,
|
| 18 |
+
"max_steps": 15,
|
| 19 |
+
"tokens_per_step": 25
|
| 20 |
+
}
|
| 21 |
+
}
|
| 22 |
},
|
| 23 |
"scoring": {
|
| 24 |
"jargon_free": 25,
|
|
|
|
| 87 |
}
|
| 88 |
},
|
| 89 |
"model_results": {
|
| 90 |
+
"Qwen2.5-7B-SoftConstraint": {
|
| 91 |
+
"total_score": 920,
|
| 92 |
+
"max_possible": 1200,
|
| 93 |
+
"percentage": 76.7,
|
| 94 |
+
"jargon_violations": 1,
|
| 95 |
+
"successful_outputs": 12,
|
| 96 |
+
"pruned_outputs": 0,
|
| 97 |
+
"constraint_mode": "soft",
|
| 98 |
+
"results": {
|
| 99 |
+
"Legal": {
|
| 100 |
+
"Force Majeure clause and why it might void our contract": {
|
| 101 |
+
"output": "Imagine you promised to help your neighbor clean their garage on Saturday. But then a huge storm comes and makes everything too wet. A Force Majeure clause is like saying 'if something really big and unexpected happens that makes it impossible to keep our promise, then neither of us gets in trouble for breaking it.'",
|
| 102 |
+
"jargon_found": [],
|
| 103 |
+
"score": 75,
|
| 104 |
+
"status": "success"
|
| 105 |
+
},
|
| 106 |
+
"Why we need to add an indemnification clause to protect your business": {
|
| 107 |
+
"output": "Imagine you're selling lemonade at a fair. You want to make sure that if someone gets sick from your lemonade, you won't have to pay their doctor bills or get into big trouble. So, you ask the person making your lemons to promise they'll take responsibility if anything goes wrong with the ingredients they provided.",
|
| 108 |
+
"jargon_found": [],
|
| 109 |
+
"score": 75,
|
| 110 |
+
"status": "success"
|
| 111 |
+
},
|
| 112 |
+
"What happens if the other party breaches the non-compete agreement": {
|
| 113 |
+
"output": "Imagine you and your best friend promise not to join another soccer team while you're both on the same one. If your friend joins another team without telling you or getting permission, they've broken their promise. In this case, you might feel sad and upset, and the grown-ups might need to help figure out how to make things fair again.",
|
| 114 |
+
"jargon_found": [],
|
| 115 |
+
"score": 75,
|
| 116 |
+
"status": "success"
|
| 117 |
+
}
|
| 118 |
+
},
|
| 119 |
+
"Medical": {
|
| 120 |
+
"Your MRI shows a benign lesion that we should monitor": {
|
| 121 |
+
"output": "Imagine your body is like a house. Sometimes, an MRI can find a small spot that's no big deal—like finding a harmless spider in one of your rooms. We just want to check on it now and then to make sure it stays small and doesn't cause any trouble.",
|
| 122 |
+
"jargon_found": [],
|
| 123 |
+
"score": 75,
|
| 124 |
+
"status": "success"
|
| 125 |
+
},
|
| 126 |
+
"The etiology of your chronic fatigue syndrome": {
|
| 127 |
+
"output": "Imagine your body is like a car. Sometimes, just like how a car might need maintenance or have an engine problem, your body can feel really tired all the time. We're trying to figure out what's making your 'engine' run slow—it could be from a past sickness, stress, or how your body's defense system is working.",
|
| 128 |
+
"jargon_found": [],
|
| 129 |
+
"score": 75,
|
| 130 |
+
"status": "success"
|
| 131 |
+
},
|
| 132 |
+
"Why we're recommending prophylactic treatment given your comorbidities": {
|
| 133 |
+
"output": "Imagine your body is like a house, and sometimes tiny germs try to get inside through the windows. Your immune system is like the house's security system. Because you have a few health conditions at once, your security system isn't as strong, so we want to give you medicine now to help keep those germs out before they cause trouble.",
|
| 134 |
+
"jargon_found": [],
|
| 135 |
+
"score": 75,
|
| 136 |
+
"status": "success"
|
| 137 |
+
}
|
| 138 |
+
},
|
| 139 |
+
"Financial": {
|
| 140 |
+
"How compound interest and amortization affect your mortgage payments": {
|
| 141 |
+
"output": "Imagine you have a piggy bank where you put in $100 every month to pay off your toy store credit. At first, most of your money goes to paying back the store for letting you borrow money. But over time, more and more of your $100 actually goes toward paying off what you originally owed.",
|
| 142 |
+
"jargon_found": [],
|
| 143 |
+
"score": 100,
|
| 144 |
+
"status": "success"
|
| 145 |
+
},
|
| 146 |
+
"Why we recommend diversifying your portfolio with low-liquidity assets": {
|
| 147 |
+
"output": "Imagine you have a toy box full of different toys. Some toys, like building blocks, are easy to play with anytime—you can grab them quickly. Other toys, like a special puzzle that takes time to set up, are harder to use right away but can be really fun and valuable. We suggest having both kinds so if one type isn't working well, you still have the other.",
|
| 148 |
+
"jargon_found": [],
|
| 149 |
+
"score": 75,
|
| 150 |
+
"status": "success"
|
| 151 |
+
},
|
| 152 |
+
"The tax implications of depreciation on your rental property": {
|
| 153 |
+
"output": "Imagine you have a toy castle that you use for your tea parties. Over time, parts of the castle get old and need fixing. The government lets you say 'my castle is worth less now because it's wearing out,' and that means you don't have to give as much of your allowance in taxes.",
|
| 154 |
+
"jargon_found": [],
|
| 155 |
+
"score": 75,
|
| 156 |
+
"status": "success"
|
| 157 |
+
}
|
| 158 |
+
},
|
| 159 |
+
"Technical/Engineering": {
|
| 160 |
+
"Why our API has high latency and how microservices could help": {
|
| 161 |
+
"output": "Imagine you're at a big birthday party, and you want to ask your friend what their favorite ice cream flavor is. If everyone has to wait in one long line to talk to the same person, it takes forever. But if you split up into smaller groups where each person handles one type of question, everyone gets their answers much faster.",
|
| 162 |
+
"jargon_found": [],
|
| 163 |
+
"score": 75,
|
| 164 |
+
"status": "success"
|
| 165 |
+
},
|
| 166 |
+
"The difference between synchronous and asynchronous processing": {
|
| 167 |
+
"output": "Imagine you're helping mommy or daddy make lemonade. With one way, you follow each step one after another: first, you cut the lemons, then you squeeze them, then you add sugar. With the other way, you can start squeezing while someone else adds sugar—everyone works at the same time!",
|
| 168 |
+
"jargon_found": ["synchronous"],
|
| 169 |
+
"score": 70,
|
| 170 |
+
"status": "success"
|
| 171 |
+
},
|
| 172 |
+
"Why we need to refactor the legacy codebase before adding new features": {
|
| 173 |
+
"output": "Imagine you have an old toy house made of wooden blocks that's been your favorite for years. It's starting to get wobbly. Instead of just adding more rooms on top (which could make it fall down), we need to fix the wobbly parts first so everything stays strong when we add new pieces.",
|
| 174 |
+
"jargon_found": [],
|
| 175 |
+
"score": 75,
|
| 176 |
+
"status": "success"
|
| 177 |
+
}
|
| 178 |
+
}
|
| 179 |
+
}
|
| 180 |
+
},
|
| 181 |
"Gemma-2-2B": {
|
| 182 |
"total_score": 550,
|
| 183 |
"max_possible": 1200,
|