Spaces:

RishiRP
/

Talk2TaskDemo1

Sleeping

App Files Files Community

RishiRP commited on Sep 25, 2025

Commit

2c209e6

verified ·

1 Parent(s): 5a71496

Update app.py

Browse files

Files changed (1) hide show

app.py +242 -217

app.py CHANGED Viewed

@@ -27,14 +27,15 @@ SPACE_CACHE = Path.home() / ".cache" / "huggingface"
 SPACE_CACHE.mkdir(parents=True, exist_ok=True)
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 GEN_CONFIG = GenerationConfig(
     temperature=0.0,
     top_p=1.0,
     do_sample=False,
-    max_new_tokens=96,  # small for speed; adjust if needed
 )
-# Official UBS label set (strict)
 OFFICIAL_LABELS = [
     "plan_contact",
     "schedule_meeting",
@@ -47,98 +48,104 @@ OFFICIAL_LABELS = [
 ]
 OFFICIAL_LABELS_TEXT = "\n".join(OFFICIAL_LABELS)
-# Per-label keyword cues (static prompt context to improve recall)
-LABEL_KEYWORDS: Dict[str, List[str]] = {
-    "plan_contact": [
-        "call back", "get back to you", "i'll get back", "follow up",
-        "reach out", "contact later", "check in", "touch base", "remind",
-        "send a note", "drop you a note", "email you", "ping you"
-    ],
-    "schedule_meeting": [
-        "meet", "let's meet", "meeting", "book a meeting", "set up a meeting",
-        "schedule a call", "schedule something", "appointment", "calendar",
-        "time slot", "slot", "next week", "tomorrow", "this afternoon",
-        "find a time", "set a time", "book time"
-    ],
-    "update_contact_info_non_postal": [
-        "phone change", "new phone", "changed phone", "email change", "new email",
-        "update contact details", "update mobile", "alternate phone", "alternate email",
-        "wrong email", "wrong phone", "new mobile"
-    ],
-    "update_contact_info_postal_address": [
-        "moved to", "new address", "postal address", "mailing address",
-        "change of address", "residential address", "address change"
-    ],
-    "update_kyc_activity": [
-        "activity update", "economic activity", "employment status",
-        "occupation", "job change", "changed jobs", "business activity"
-    ],
-    "update_kyc_origin_of_assets": [
-        "source of funds", "origin of assets", "where money comes from",
-        "inheritance", "salary", "business income", "asset origin",
-        "gifted funds", "proceeds from sale"
-    ],
-    "update_kyc_purpose_of_businessrelation": [
-        "purpose of relationship", "why the account", "reason for banking",
-        "investment purpose", "relationship purpose", "purpose of the relationship"
-    ],
-    "update_kyc_total_assets": [
-        "total assets", "net worth", "assets under ownership",
-        "portfolio size", "how much you own", "aggregate assets"
-    ],
 }
-# Regex cues to catch phrasing variants
-REGEX_CUES: Dict[str, List[str]] = {
-    "schedule_meeting": [
-        r"\b(let'?s\s+)?meet(s|ing)?\b",
-        r"\bbook( a)? (time|slot|meeting)\b",
-        r"\bschedule( a)? (call|meeting)\b",
-        r"\b(next week|tomorrow|this (afternoon|evening|morning))\b",
-        r"\bfind a time\b",
-    ],
     "plan_contact": [
-        r"\b(i'?ll|get|got)\s+back to you\b",
         r"\bfollow\s*up\b",
         r"\breach out\b",
         r"\btouch base\b",
-        r"\bping you\b",
     ],
 }
 # =========================
-# Instructions (concise; concatenated to avoid string issues)
 # =========================
-SYSTEM_PROMPT = (
-    "You are a precise banking assistant that extracts ACTIONABLE TASKS from "
-    "client–advisor transcripts. Be conservative with hallucinations but "
-    "prioritise RECALL: if unsure and the transcript plausibly implies an "
-    "action, include the label and explain briefly.\n\n"
-    "Output STRICT JSON only:\n\n"
-    "{\n"
-    '  "labels": ["<Label1>", "..."],\n'
-    '  "tasks": [\n'
-    '    {"label": "<Label1>", "explanation": "<why>", "evidence": "<quoted text/snippet>"}\n'
-    "  ]\n"
-    "}\n\n"
-    "Rules:\n"
-    "- Use ONLY allowed labels supplied to you. Case-insensitive during reasoning, "
-    "  but output the canonical label text exactly.\n"
-    "- If none truly apply, return empty lists.\n"
-    "- Keep explanations concise; put the minimal evidence snippet that justifies the task.\n"
-)
 USER_PROMPT_TEMPLATE = (
-    "Transcript (cleaned):\n"
     "```\n{transcript}\n```\n\n"
     "Allowed Labels (canonical; use only these):\n"
     "{allowed_labels_list}\n\n"
-    "Context cues (keywords/phrases that often indicate each label):\n"
-    "{keyword_context}\n\n"
-    "Instructions:\n"
-    "- Identify EVERY concrete task implied by the conversation.\n"
-    "- Choose ONE label from Allowed Labels for each task (or none if truly inapplicable).\n"
-    "- Return STRICT JSON only in the exact schema described by the system prompt.\n"
 )
 # =========================
@@ -171,14 +178,12 @@ def robust_json_extract(text: str) -> Dict[str, Any]:
 def restrict_to_allowed(pred: Dict[str, Any], allowed: List[str]) -> Dict[str, Any]:
     out = {"labels": [], "tasks": []}
     allowed_map = canonicalize_map(allowed)
-    # labels
     filt_labels = []
     for l in pred.get("labels", []) or []:
         k = str(l).strip().lower()
         if k in allowed_map:
             filt_labels.append(allowed_map[k])
     filt_labels = normalize_labels(filt_labels)
-    # tasks
     filt_tasks = []
     for t in pred.get("tasks", []) or []:
         if not isinstance(t, dict):
@@ -186,6 +191,11 @@ def restrict_to_allowed(pred: Dict[str, Any], allowed: List[str]) -> Dict[str, A
         k = str(t.get("label", "")).strip().lower()
         if k in allowed_map:
             new_t = dict(t); new_t["label"] = allowed_map[k]
             filt_tasks.append(new_t)
     merged = normalize_labels(list(set(filt_labels) | {tt["label"] for tt in filt_tasks}))
     out["labels"] = merged
@@ -193,7 +203,7 @@ def restrict_to_allowed(pred: Dict[str, Any], allowed: List[str]) -> Dict[str, A
     return out
 # =========================
-# Default pre-processing (toggleable)
 # =========================
 _DISCLAIMER_PATTERNS = [
     r"(?is)^\s*(?:disclaimer|legal notice|confidentiality notice).+?(?:\n{2,}|$)",
@@ -206,7 +216,7 @@ _FOOTER_PATTERNS = [
 ]
 _TIMESTAMP_SPEAKER = [
     r"\[\d{1,2}:\d{2}(:\d{2})?\]",     # [00:01] or [00:01:02]
-    r"^\s*(advisor|client)\s*:\s*",    # Advisor: / Client:
     r"^\s*(speaker\s*\d+)\s*:\s*",     # Speaker 1:
 ]
@@ -214,7 +224,6 @@ def clean_transcript(text: str) -> str:
     if not text:
         return text
     s = text
-    # remove timestamps/speaker prefixes line-wise
     lines = []
     for ln in s.splitlines():
         ln2 = ln
@@ -222,19 +231,15 @@ def clean_transcript(text: str) -> str:
             ln2 = re.sub(pat, "", ln2, flags=re.IGNORECASE)
         lines.append(ln2)
     s = "\n".join(lines)
-    # remove top disclaimers
     for pat in _DISCLAIMER_PATTERNS:
         s = re.sub(pat, "", s).strip()
-    # remove trailing footers
     for pat in _FOOTER_PATTERNS:
         s = re.sub(pat, "", s)
-    # collapse whitespace
     s = re.sub(r"[ \t]+", " ", s)
     s = re.sub(r"\n{3,}", "\n\n", s).strip()
     return s
 def read_text_file_any(file_input) -> str:
-    """Works for gr.File(type='filepath') and raw strings/Path and file-like."""
     if not file_input:
         return ""
     if isinstance(file_input, (str, Path)):
@@ -268,7 +273,7 @@ def truncate_tokens(tokenizer, text: str, max_tokens: int) -> str:
     return tokenizer.decode(toks[-max_tokens:], skip_special_tokens=True)
 # =========================
-# HF model wrapper
 # =========================
 class ModelWrapper:
     def __init__(self, repo_id: str, hf_token: Optional[str], load_in_4bit: bool):
@@ -306,7 +311,7 @@ class ModelWrapper:
     @torch.inference_mode()
     def generate(self, system_prompt: str, user_prompt: str) -> str:
-        # Build inputs as input_ids=... (avoid **tensor bug)
         if hasattr(self.tokenizer, "apply_chat_template"):
             messages = [
                 {"role": "system", "content": system_prompt},
@@ -351,7 +356,7 @@ def get_model(repo_id: str, hf_token: Optional[str], load_in_4bit: bool) -> Mode
     return _MODEL_CACHE[key]
 # =========================
-# Official evaluation (from README)
 # =========================
 def evaluate_predictions(y_true: List[List[str]], y_pred: List[List[str]]) -> float:
     ALLOWED_LABELS = OFFICIAL_LABELS
@@ -395,62 +400,32 @@ def evaluate_predictions(y_true: List[List[str]], y_pred: List[List[str]]) -> fl
     return float(max(0.0, min(1.0, np.mean(per_sample))))
 # =========================
-# Fallback: regex + keywords if model returns empty
 # =========================
-def keyword_fallback(text: str, allowed: List[str]) -> Dict[str, Any]:
     low = text.lower()
-    labels = []
-    tasks = []
-    # Regex first
     for lab in allowed:
-        patterns = REGEX_CUES.get(lab, [])
-        found = None
-        for pat in patterns:
             m = re.search(pat, low)
             if m:
                 i = m.start()
-                start = max(0, i - 40); end = min(len(text), i + len(m.group(0)) + 40)
-                found = text[start:end].strip()
                 break
-        if found:
-            labels.append(lab)
-            tasks.append({
-                "label": lab,
-                "explanation": "Regex cue matched in transcript.",
-                "evidence": found
-            })
-    # Keyword contains() as backstop
-    for lab in allowed:
-        if lab in labels:
-            continue
-        hits = []
-        for kw in LABEL_KEYWORDS.get(lab, []):
-            k = kw.lower()
-            i = low.find(k)
-            if i != -1:
-                start = max(0, i - 40); end = min(len(text), i + len(k) + 40)
-                hits.append(text[start:end].strip())
-        if hits:
-            labels.append(lab)
-            tasks.append({
-                "label": lab,
-                "explanation": "Keyword match in transcript.",
-                "evidence": hits[0]
-            })
     return {"labels": normalize_labels(labels), "tasks": tasks}
 # =========================
 # Inference helpers
 # =========================
-def build_keyword_context(allowed: List[str]) -> str:
-    parts = []
-    for lab in allowed:
-        kws = LABEL_KEYWORDS.get(lab, [])
-        parts.append(f"- {lab}: " + (", ".join(kws) if kws else "(no default cues)"))
-    return "\n".join(parts)
 def warmup_model(model_repo: str, use_4bit: bool, hf_token: str) -> str:
     t0 = _now_ms()
@@ -463,12 +438,15 @@ def warmup_model(model_repo: str, use_4bit: bool, hf_token: str) -> str:
 def run_single(
     transcript_text: str,
-    transcript_file,            # filepath or file-like
     gt_json_text: str,
-    gt_json_file,               # filepath or file-like
     use_cleaning: bool,
-    use_keyword_fallback: bool,
     allowed_labels_text: str,
     model_repo: str,
     use_4bit: bool,
     max_input_tokens: int,
@@ -477,7 +455,7 @@ def run_single(
     t0 = _now_ms()
-    # Transcript
     raw_text = ""
     if transcript_file:
         raw_text = read_text_file_any(transcript_file)
@@ -487,10 +465,28 @@ def run_single(
     text = clean_transcript(raw_text) if use_cleaning else raw_text
-    # Allowed labels (pre-filled defaults)
     user_allowed = [ln.strip() for ln in (allowed_labels_text or "").splitlines() if ln.strip()]
     allowed = normalize_labels(user_allowed or OFFICIAL_LABELS)
     # Model
     try:
         model = get_model(model_repo, (hf_token or "").strip() or None, use_4bit)
@@ -501,12 +497,12 @@ def run_single(
     trunc = truncate_tokens(model.tokenizer, text, max_input_tokens)
     # Build prompt
     allowed_list_str = "\n".join(f"- {l}" for l in allowed)
-    keyword_ctx = build_keyword_context(allowed)
     user_prompt = USER_PROMPT_TEMPLATE.format(
         transcript=trunc,
         allowed_labels_list=allowed_list_str,
-        keyword_context=keyword_ctx,
     )
     # Token info + prompt preview
@@ -518,7 +514,7 @@ def run_single(
     # Generate
     t1 = _now_ms()
     try:
-        out = model.generate(SYSTEM_PROMPT, user_prompt)
     except Exception as e:
         return "", "", f"Generation error: {e}", "", "", "", prompt_preview_text, token_info_text, ""
     t2 = _now_ms()
@@ -526,33 +522,27 @@ def run_single(
     parsed = robust_json_extract(out)
     filtered = restrict_to_allowed(parsed, allowed)
-    # Fallback if empty
-    if use_keyword_fallback and not filtered.get("labels"):
-        fb = keyword_fallback(trunc, allowed)
         if fb["labels"]:
-            filtered = fb
     # Diagnostics
     diag = "\n".join([
         f"Device: {DEVICE} (4-bit: {'Yes' if (use_4bit and DEVICE=='cuda') else 'No'})",
         f"Model: {model_repo}",
         f"Input cleaned: {'Yes' if use_cleaning else 'No'}",
-        f"Keyword fallback: {'Yes' if use_keyword_fallback else 'No'}",
-        f"Tokens (input, approx): ≤ {max_input_tokens}",
         f"Latency: prep {t1-t0} ms, gen {t2-t1} ms, total {t2-t0} ms",
         f"Allowed labels: {', '.join(allowed)}",
     ])
-    # Context & instructions preview shown in UI
-    context_preview = (
-        "### Allowed Labels\n"
-        + "\n".join(f"- {l}" for l in allowed)
-        + "\n\n### Keyword cues per label\n"
-        + keyword_ctx
-    )
-    instructions_preview = "```\n" + SYSTEM_PROMPT + "\n```"
-    # Summary & JSON
     labs = filtered.get("labels", [])
     tasks = filtered.get("tasks", [])
     summary = "Detected labels:\n" + ("\n".join(f"- {l}" for l in labs) if labs else "(none)")
@@ -565,7 +555,7 @@ def run_single(
         summary += "\n\nTasks: (none)"
     json_out = json.dumps(filtered, indent=2, ensure_ascii=False)
-    # Optional single-file scoring if GT provided
     metrics = ""
     if gt_json_file or (gt_json_text and gt_json_text.strip()):
         truth_obj = None
@@ -598,6 +588,10 @@ def run_single(
         else:
             metrics = "Ground truth JSON missing or invalid; expected {'labels': [...]}."
     return summary, json_out, diag, out.strip(), context_preview, instructions_preview, metrics, prompt_preview_text, token_info_text
 # =========================
@@ -612,9 +606,12 @@ def read_zip_from_path(path: str, exdir: Path) -> List[Path]:
     return [p for p in exdir.rglob("*") if p.is_file()]
 def run_batch(
-    zip_path,                # filepath string
     use_cleaning: bool,
-    use_keyword_fallback: bool,
     model_repo: str,
     use_4bit: bool,
     max_input_tokens: int,
@@ -625,6 +622,25 @@ def run_batch(
     if not zip_path:
         return ("No ZIP provided.", "", pd.DataFrame(), "")
     work = Path("/tmp/batch")
     if work.exists():
         for p in sorted(work.rglob("*"), reverse=True):
@@ -650,14 +666,15 @@ def run_batch(
     if not stems:
         return ("No .txt transcripts found in ZIP.", "", pd.DataFrame(), "")
     try:
         model = get_model(model_repo, (hf_token or "").strip() or None, use_4bit)
     except Exception as e:
         return (f"Model load failed: {e}", "", pd.DataFrame(), "")
     allowed = OFFICIAL_LABELS[:]
     allowed_list_str = "\n".join(f"- {l}" for l in allowed)
-    keyword_ctx = build_keyword_context(allowed)
     y_true, y_pred = [], []
     rows = []
@@ -666,25 +683,29 @@ def run_batch(
     for stem in stems:
         raw = txts[stem].read_text(encoding="utf-8", errors="ignore")
         text = clean_transcript(raw) if use_cleaning else raw
         trunc = truncate_tokens(model.tokenizer, text, max_input_tokens)
         user_prompt = USER_PROMPT_TEMPLATE.format(
             transcript=trunc,
             allowed_labels_list=allowed_list_str,
-            keyword_context=keyword_ctx,
         )
         t0 = _now_ms()
-        out = model.generate(SYSTEM_PROMPT, user_prompt)
         t1 = _now_ms()
         parsed = robust_json_extract(out)
         filtered = restrict_to_allowed(parsed, allowed)
-        if use_keyword_fallback and not filtered.get("labels"):
-            fb = keyword_fallback(trunc, allowed)
             if fb["labels"]:
-                filtered = fb
         pred_labels = filtered.get("labels", [])
         y_pred.append(pred_labels)
@@ -721,8 +742,8 @@ def run_batch(
         f"Device: {DEVICE} (4-bit: {'Yes' if (use_4bit and DEVICE=='cuda') else 'No'})",
         f"Model: {model_repo}",
         f"Input cleaned: {'Yes' if use_cleaning else 'No'}",
-        f"Keyword fallback: {'Yes' if use_keyword_fallback else 'No'}",
-        f"Tokens (input, approx): ≤ {max_input_tokens}",
         f"Batch time: {_now_ms()-t_start} ms",
     ]
     if have_truth and score is not None:
@@ -739,7 +760,6 @@ def run_batch(
         ]
     diag_str = "\n".join(diag)
-    # save CSV for download
     out_csv = Path("/tmp/batch_results.csv")
     df.to_csv(out_csv, index=False, encoding="utf-8")
     return ("Batch done.", diag_str, df, str(out_csv))
@@ -748,24 +768,26 @@ def run_batch(
 # UI
 # =========================
 MODEL_CHOICES = [
-    "swiss-ai/Apertus-8B-Instruct-2509",
-    "meta-llama/Meta-Llama-3-8B-Instruct",
-    "mistralai/Mistral-7B-Instruct-v0.3",
 ]
 custom_css = """
 :root { --radius: 14px; }
-.gradio-container { font-family: Inter, ui-sans-serif, system-ui; }
-.card { border: 1px solid rgba(255,255,255,.08); border-radius: var(--radius); padding: 14px 16px; background: rgba(255,255,255,.02); box-shadow: 0 1px 10px rgba(0,0,0,.12) inset; }
-.header { font-weight: 700; font-size: 22px; margin-bottom: 4px; }
-.subtle { color: rgba(255,255,255,.65); font-size: 14px; margin-bottom: 12px; }
-hr.sep { border: none; border-top: 1px solid rgba(255,255,255,.08); margin: 10px 0 16px; }
 .gr-button { border-radius: 12px !important; }
 """
 with gr.Blocks(theme=gr.themes.Soft(), css=custom_css, fill_height=True) as demo:
-    gr.Markdown("<div class='header'>Talk2Task — Task Extraction (UBS Challenge)</div>")
-    gr.Markdown("<div class='subtle'>False negatives are penalised 2× more than false positives in the official score. This UI biases for recall, shows the exact instructions & context, and supports single or batch evaluation.</div>")
     with gr.Tab("Single transcript"):
         with gr.Row():
@@ -776,7 +798,7 @@ with gr.Blocks(theme=gr.themes.Soft(), css=custom_css, fill_height=True) as demo
                     file_types=[".txt", ".md", ".json"],
                     type="filepath",
                 )
-                text = gr.Textbox(label="Or paste transcript", lines=10)
                 gr.Markdown("<hr class='sep'/>")
                 gr.Markdown("<div class='header'>Ground truth JSON (optional)</div>")
@@ -788,26 +810,22 @@ with gr.Blocks(theme=gr.themes.Soft(), css=custom_css, fill_height=True) as demo
                 gt_text = gr.Textbox(label="Or paste ground truth JSON", lines=6, placeholder='{\"labels\": [\"schedule_meeting\"]}')
                 gr.Markdown("</div>")  # close card
-                gr.Markdown("<div class='card'><div class='header'>Preprocessing & heuristics</div>")
-                use_cleaning = gr.Checkbox(
-                    label="Apply default cleaning (remove disclaimers, timestamps, speakers, footers)",
-                    value=True,
-                )
-                use_keyword_fallback = gr.Checkbox(
-                    label="Keyword fallback if model returns empty",
-                    value=True,
-                )
                 gr.Markdown("</div>")
                 gr.Markdown("<div class='card'><div class='header'>Allowed labels</div>")
-                labels_text = gr.Textbox(
-                    label="Allowed Labels (one per line)",
-                    value=OFFICIAL_LABELS_TEXT,  # prefilled
-                    lines=8,
-                )
                 reset_btn = gr.Button("Reset to official labels")
                 gr.Markdown("</div>")
             with gr.Column(scale=2):
                 gr.Markdown("<div class='card'><div class='header'>Model & run</div>")
                 repo = gr.Dropdown(label="Model", choices=MODEL_CHOICES, value=MODEL_CHOICES[0])
@@ -830,48 +848,48 @@ with gr.Blocks(theme=gr.themes.Soft(), css=custom_css, fill_height=True) as demo
         with gr.Row():
             with gr.Column():
                 with gr.Accordion("Instructions used (system prompt)", open=False):
-                    instr_md = gr.Markdown("```\n" + SYSTEM_PROMPT + "\n```")
             with gr.Column():
-                with gr.Accordion("Context used (allowed labels + keyword cues)", open=True):
                     context_md = gr.Markdown("")
-        # reset button behavior
         def _reset_labels():
             return OFFICIAL_LABELS_TEXT
         reset_btn.click(fn=_reset_labels, inputs=None, outputs=labels_text)
-        # warm-up
-        warm_btn.click(
-            fn=warmup_model,
-            inputs=[repo, use_4bit, hf_token],
-            outputs=diag,
-        )
-        # single run
-        def _pack_context_md(allowed: str) -> str:
-            allowed_list = [ln.strip() for ln in (allowed or OFFICIAL_LABELS_TEXT).splitlines() if ln.strip()]
-            ctx = build_keyword_context(allowed_list)
-            return "### Allowed Labels\n" + "\n".join(f"- {l}" for l in allowed_list) + "\n\n### Keyword cues per label\n" + ctx
         run_btn.click(
             fn=run_single,
             inputs=[
-                text, file, gt_text, gt_file, use_cleaning, use_keyword_fallback,
-                labels_text, repo, use_4bit, max_tokens, hf_token
             ],
             outputs=[summary, json_out, diag, raw, context_md, instr_md, gr.Textbox(visible=False), prompt_preview, token_info],
         )
-        # initial context preview
-        context_md.value = _pack_context_md(OFFICIAL_LABELS_TEXT)
     with gr.Tab("Batch evaluation"):
         with gr.Row():
             with gr.Column(scale=3):
                 gr.Markdown("<div class='card'><div class='header'>ZIP input</div>")
                 zip_in = gr.File(label="ZIP with transcripts (.txt) and truths (.json)", file_types=[".zip"], type="filepath")
                 use_cleaning_b = gr.Checkbox(label="Apply default cleaning", value=True)
-                use_keyword_fallback_b = gr.Checkbox(label="Keyword fallback if model returns empty", value=True)
                 gr.Markdown("</div>")
             with gr.Column(scale=2):
                 gr.Markdown("<div class='card'><div class='header'>Model & run</div>")
@@ -879,6 +897,9 @@ with gr.Blocks(theme=gr.themes.Soft(), css=custom_css, fill_height=True) as demo
                 use_4bit_b = gr.Checkbox(label="Use 4-bit (GPU only)", value=True)
                 max_tokens_b = gr.Slider(label="Max input tokens", minimum=1024, maximum=8192, step=512, value=2048)
                 hf_token_b = gr.Textbox(label="HF_TOKEN (only for gated models)", type="password", value=os.environ.get("HF_TOKEN",""))
                 limit_files = gr.Slider(label="Process at most N files (0 = all)", minimum=0, maximum=2000, step=10, value=0)
                 run_batch_btn = gr.Button("Run Batch", variant="primary")
                 gr.Markdown("</div>")
@@ -893,7 +914,11 @@ with gr.Blocks(theme=gr.themes.Soft(), css=custom_css, fill_height=True) as demo
         run_batch_btn.click(
             fn=run_batch,
-            inputs=[zip_in, use_cleaning_b, use_keyword_fallback_b, repo_b, use_4bit_b, max_tokens_b, hf_token_b, limit_files],
             outputs=[status, diag_b, df_out, csv_out],
         )

 SPACE_CACHE.mkdir(parents=True, exist_ok=True)
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+# Fast, deterministic, compact outputs for lower latency
 GEN_CONFIG = GenerationConfig(
     temperature=0.0,
     top_p=1.0,
     do_sample=False,
+    max_new_tokens=128,  # increase if your JSON is getting truncated
 )
+# Official UBS labels (canonical)
 OFFICIAL_LABELS = [
     "plan_contact",
     "schedule_meeting",
 ]
 OFFICIAL_LABELS_TEXT = "\n".join(OFFICIAL_LABELS)
+# =========================
+# Editable defaults (shown in UI)
+# =========================
+DEFAULT_SYSTEM_INSTRUCTIONS = (
+    "You extract ACTIONABLE TASKS from client–advisor transcripts. "
+    "The transcript may be in German, French, Italian, or English. "
+    "Prioritize RECALL: if a label plausibly applies, include it. "
+    "Use ONLY the canonical labels provided. "
+    "Return STRICT JSON only with keys 'labels' and 'tasks'. "
+    "Each task must include 'label', a brief 'explanation', and a short 'evidence' quote from the transcript."
+)
+# Very short, language-agnostic semantics to keep prompt small
+DEFAULT_LABEL_GLOSSARY = {
+    "plan_contact": "Commitment to contact later (advisor/client will reach out, follow-up promised).",
+    "schedule_meeting": "Scheduling or confirming a meeting/call/appointment (time/date/slot/virtual).",
+    "update_contact_info_non_postal": "Change or confirmation of phone/email (non-postal contact details).",
+    "update_contact_info_postal_address": "Change or confirmation of postal/residential/mailing address.",
+    "update_kyc_activity": "Change/confirmation of occupation, employment status, or economic activity.",
+    "update_kyc_origin_of_assets": "Discussion/confirmation of source of funds / origin of assets.",
+    "update_kyc_purpose_of_businessrelation": "Purpose of the banking relationship/account usage.",
+    "update_kyc_total_assets": "Discussion/confirmation of total assets/net worth.",
 }
+# Tiny multilingual fallback rules (optional) to guarantee recall if model is empty.
+# Keep small to avoid false positives and keep maintenance low.
+DEFAULT_FALLBACK_CUES = {
     "plan_contact": [
+        # EN
+        r"\b(get|got|will|we'?ll|i'?ll)\s+back to you\b",
         r"\bfollow\s*up\b",
         r"\breach out\b",
         r"\btouch base\b",
+        r"\bcontact (you|me|us)\b",
+        # DE
+        r"\bin verbindung setzen\b",
+        r"\brückmeldung\b",
+        r"\bich\s+melde\b|\bwir\s+melden\b",
+        r"\bnachfassen\b",
+        # FR
+        r"\bje vous recontacte\b|\bnous vous recontacterons\b",
+        r"\bprendre contact\b|\breprendre contact\b",
+        # IT
+        r"\bla ricontatter[oò]\b|\bci metteremo in contatto\b",
+        r"\btenersi in contatto\b",
+    ],
+    "schedule_meeting": [
+        # EN
+        r"\b(let'?s\s+)?meet(ing|s)?\b",
+        r"\bschedule( a)? (call|meeting|appointment)\b",
+        r"\bbook( a)? (slot|time|meeting)\b",
+        r"\b(next week|tomorrow|this (afternoon|morning|evening))\b",
+        r"\bconfirm( the)? (time|meeting|appointment)\b",
+        # DE
+        r"\btermin(e|s)?\b|\bvereinbaren\b|\bansetzen\b|\babstimmen\b|\bbesprechung(en)?\b|\bvirtuell(e|en)?\b",
+        r"\bnächste(n|r)? woche\b|\b(dienstag|montag|mittwoch|donnerstag|freitag)\b|\bnachmittag|vormittag|morgen\b",
+        # FR
+        r"\brendez[- ]?vous\b|\bréunion\b|\bfixer\b|\bplanifier\b|\bcalendrier\b|\bse rencontrer\b|\bse voir\b",
+        r"\bla semaine prochaine\b|\bdemain\b|\bcet (après-midi|apres-midi|après midi|apres midi|matin|soir)\b",
+        # IT
+        r"\bappuntamento\b|\briunione\b|\borganizzare\b|\bprogrammare\b|\bincontrarci\b|\bcalendario\b",
+        r"\bla prossima settimana\b|\bdomani\b|\b(questo|questa)\s*(pomeriggio|mattina|sera)\b",
+    ],
+    "update_kyc_origin_of_assets": [
+        # EN
+        r"\bsource of funds\b|\borigin of assets\b|\bproof of (funds|assets)\b",
+        # DE
+        r"\bvermögensursprung(e|s)?\b|\bherkunft der mittel\b|\bnachweis\b",
+        # FR
+        r"\borigine des fonds\b|\borigine du patrimoine\b|\bjustificatif(s)?\b",
+        # IT
+        r"\borigine dei fondi\b|\borigine del patrimonio\b|\bprova dei fondi\b|\bgiustificativo\b",
+    ],
+    "update_kyc_activity": [
+        # EN
+        r"\bemployment status\b|\boccupation\b|\bjob change\b|\bsalary history\b",
+        # DE
+        r"\bbeschäftigungsstatus\b|\bberuf\b|\bjobwechsel\b|\bgehaltshistorie\b|\btätigkeit\b",
+        # FR
+        r"\bstatut professionnel\b|\bprofession\b|\bchangement d'emploi\b|\bhistorique salarial\b|\bactivité\b",
+        # IT
+        r"\bstato occupazionale\b|\bprofessione\b|\bcambio di lavoro\b|\bstoria salariale\b|\battivit[aà]\b",
     ],
 }
 # =========================
+# Prompt templates (minimal multilingual)
 # =========================
 USER_PROMPT_TEMPLATE = (
+    "Transcript (may be DE/FR/IT/EN):\n"
     "```\n{transcript}\n```\n\n"
     "Allowed Labels (canonical; use only these):\n"
     "{allowed_labels_list}\n\n"
+    "Label Glossary (concise semantics):\n"
+    "{glossary}\n\n"
+    "Return STRICT JSON ONLY in this exact schema:\n"
+    '{\n  "labels": ["<Label1>", "..."],\n'
+    '  "tasks": [{"label": "<Label1>", "explanation": "<why>", "evidence": "<quote>"}]\n}\n'
 )
 # =========================
 def restrict_to_allowed(pred: Dict[str, Any], allowed: List[str]) -> Dict[str, Any]:
     out = {"labels": [], "tasks": []}
     allowed_map = canonicalize_map(allowed)
     filt_labels = []
     for l in pred.get("labels", []) or []:
         k = str(l).strip().lower()
         if k in allowed_map:
             filt_labels.append(allowed_map[k])
     filt_labels = normalize_labels(filt_labels)
     filt_tasks = []
     for t in pred.get("tasks", []) or []:
         if not isinstance(t, dict):
         k = str(t.get("label", "")).strip().lower()
         if k in allowed_map:
             new_t = dict(t); new_t["label"] = allowed_map[k]
+            new_t = {
+                "label": new_t["label"],
+                "explanation": str(new_t.get("explanation", ""))[:300],
+                "evidence": str(new_t.get("evidence", ""))[:300],
+            }
             filt_tasks.append(new_t)
     merged = normalize_labels(list(set(filt_labels) | {tt["label"] for tt in filt_tasks}))
     out["labels"] = merged
     return out
 # =========================
+# Pre-processing
 # =========================
 _DISCLAIMER_PATTERNS = [
     r"(?is)^\s*(?:disclaimer|legal notice|confidentiality notice).+?(?:\n{2,}|$)",
 ]
 _TIMESTAMP_SPEAKER = [
     r"\[\d{1,2}:\d{2}(:\d{2})?\]",     # [00:01] or [00:01:02]
+    r"^\s*(advisor|client|client advisor)\s*:\s*",    # Advisor:, Client:
     r"^\s*(speaker\s*\d+)\s*:\s*",     # Speaker 1:
 ]
     if not text:
         return text
     s = text
     lines = []
     for ln in s.splitlines():
         ln2 = ln
             ln2 = re.sub(pat, "", ln2, flags=re.IGNORECASE)
         lines.append(ln2)
     s = "\n".join(lines)
     for pat in _DISCLAIMER_PATTERNS:
         s = re.sub(pat, "", s).strip()
     for pat in _FOOTER_PATTERNS:
         s = re.sub(pat, "", s)
     s = re.sub(r"[ \t]+", " ", s)
     s = re.sub(r"\n{3,}", "\n\n", s).strip()
     return s
 def read_text_file_any(file_input) -> str:
     if not file_input:
         return ""
     if isinstance(file_input, (str, Path)):
     return tokenizer.decode(toks[-max_tokens:], skip_special_tokens=True)
 # =========================
+# HF model wrapper (main LLM)
 # =========================
 class ModelWrapper:
     def __init__(self, repo_id: str, hf_token: Optional[str], load_in_4bit: bool):
     @torch.inference_mode()
     def generate(self, system_prompt: str, user_prompt: str) -> str:
+        # Build inputs as input_ids=... (avoid earlier **tensor bug)
         if hasattr(self.tokenizer, "apply_chat_template"):
             messages = [
                 {"role": "system", "content": system_prompt},
     return _MODEL_CACHE[key]
 # =========================
+# Evaluation (official weighted score)
 # =========================
 def evaluate_predictions(y_true: List[List[str]], y_pred: List[List[str]]) -> float:
     ALLOWED_LABELS = OFFICIAL_LABELS
     return float(max(0.0, min(1.0, np.mean(per_sample))))
 # =========================
+# Multilingual fallback (regex on original text)
 # =========================
+def multilingual_fallback(text: str, allowed: List[str], cues: Dict[str, List[str]]) -> Dict[str, Any]:
     low = text.lower()
+    labels, tasks = [], []
     for lab in allowed:
+        for pat in cues.get(lab, []):
             m = re.search(pat, low)
             if m:
                 i = m.start()
+                start = max(0, i - 60); end = min(len(text), i + len(m.group(0)) + 60)
+                if lab not in labels:
+                    labels.append(lab)
+                    tasks.append({
+                        "label": lab,
+                        "explanation": "Rule hit (multilingual fallback)",
+                        "evidence": text[start:end].strip()
+                    })
                 break
     return {"labels": normalize_labels(labels), "tasks": tasks}
 # =========================
 # Inference helpers
 # =========================
+def build_glossary_str(glossary: Dict[str, str], allowed: List[str]) -> str:
+    return "\n".join([f"- {lab}: {glossary.get(lab, '')}" for lab in allowed])
 def warmup_model(model_repo: str, use_4bit: bool, hf_token: str) -> str:
     t0 = _now_ms()
 def run_single(
     transcript_text: str,
+    transcript_file,
     gt_json_text: str,
+    gt_json_file,
     use_cleaning: bool,
+    use_fallback: bool,
     allowed_labels_text: str,
+    sys_instructions_text: str,
+    glossary_json_text: str,
+    fallback_json_text: str,
     model_repo: str,
     use_4bit: bool,
     max_input_tokens: int,
     t0 = _now_ms()
+    # Load transcript
     raw_text = ""
     if transcript_file:
         raw_text = read_text_file_any(transcript_file)
     text = clean_transcript(raw_text) if use_cleaning else raw_text
+    # Allowed labels
     user_allowed = [ln.strip() for ln in (allowed_labels_text or "").splitlines() if ln.strip()]
     allowed = normalize_labels(user_allowed or OFFICIAL_LABELS)
+    # Editable configs
+    try:
+        sys_instructions = (sys_instructions_text or DEFAULT_SYSTEM_INSTRUCTIONS).strip()
+        if not sys_instructions:
+            sys_instructions = DEFAULT_SYSTEM_INSTRUCTIONS
+    except Exception:
+        sys_instructions = DEFAULT_SYSTEM_INSTRUCTIONS
+    try:
+        label_glossary = json.loads(glossary_json_text) if glossary_json_text else DEFAULT_LABEL_GLOSSARY
+    except Exception:
+        label_glossary = DEFAULT_LABEL_GLOSSARY
+    try:
+        fallback_cues = json.loads(fallback_json_text) if fallback_json_text else DEFAULT_FALLBACK_CUES
+    except Exception:
+        fallback_cues = DEFAULT_FALLBACK_CUES
     # Model
     try:
         model = get_model(model_repo, (hf_token or "").strip() or None, use_4bit)
     trunc = truncate_tokens(model.tokenizer, text, max_input_tokens)
     # Build prompt
+    glossary_str = build_glossary_str(label_glossary, allowed)
     allowed_list_str = "\n".join(f"- {l}" for l in allowed)
     user_prompt = USER_PROMPT_TEMPLATE.format(
         transcript=trunc,
         allowed_labels_list=allowed_list_str,
+        glossary=glossary_str,
     )
     # Token info + prompt preview
     # Generate
     t1 = _now_ms()
     try:
+        out = model.generate(sys_instructions, user_prompt)
     except Exception as e:
         return "", "", f"Generation error: {e}", "", "", "", prompt_preview_text, token_info_text, ""
     t2 = _now_ms()
     parsed = robust_json_extract(out)
     filtered = restrict_to_allowed(parsed, allowed)
+    # Fallback (multilingual rules) on original text; merge for recall if enabled
+    if use_fallback:
+        fb = multilingual_fallback(trunc, allowed, fallback_cues)
         if fb["labels"]:
+            merged_labels = sorted(list(set(filtered.get("labels", [])) | set(fb["labels"])))
+            existing = {tt.get("label") for tt in filtered.get("tasks", [])}
+            merged_tasks = filtered.get("tasks", []) + [t for t in fb["tasks"] if t["label"] not in existing]
+            filtered = {"labels": merged_labels, "tasks": merged_tasks}
     # Diagnostics
     diag = "\n".join([
         f"Device: {DEVICE} (4-bit: {'Yes' if (use_4bit and DEVICE=='cuda') else 'No'})",
         f"Model: {model_repo}",
         f"Input cleaned: {'Yes' if use_cleaning else 'No'}",
+        f"Fallback rules: {'Yes' if use_fallback else 'No'}",
+        f"Tokens (input limit): ≤ {max_input_tokens}",
         f"Latency: prep {t1-t0} ms, gen {t2-t1} ms, total {t2-t0} ms",
         f"Allowed labels: {', '.join(allowed)}",
     ])
+    # Summaries
     labs = filtered.get("labels", [])
     tasks = filtered.get("tasks", [])
     summary = "Detected labels:\n" + ("\n".join(f"- {l}" for l in labs) if labs else "(none)")
         summary += "\n\nTasks: (none)"
     json_out = json.dumps(filtered, indent=2, ensure_ascii=False)
+    # Single-file scoring if GT provided
     metrics = ""
     if gt_json_file or (gt_json_text and gt_json_text.strip()):
         truth_obj = None
         else:
             metrics = "Ground truth JSON missing or invalid; expected {'labels': [...]}."
+    # For UI: show effective context (glossary) and instructions
+    context_preview = "### Label Glossary (used)\n" + "\n".join(f"- {k}: {v}" for k, v in label_glossary.items() if k in allowed)
+    instructions_preview = "```\n" + sys_instructions + "\n```"
     return summary, json_out, diag, out.strip(), context_preview, instructions_preview, metrics, prompt_preview_text, token_info_text
 # =========================
     return [p for p in exdir.rglob("*") if p.is_file()]
 def run_batch(
+    zip_path,
     use_cleaning: bool,
+    use_fallback: bool,
+    sys_instructions_text: str,
+    glossary_json_text: str,
+    fallback_json_text: str,
     model_repo: str,
     use_4bit: bool,
     max_input_tokens: int,
     if not zip_path:
         return ("No ZIP provided.", "", pd.DataFrame(), "")
+    # Editable configs
+    try:
+        sys_instructions = (sys_instructions_text or DEFAULT_SYSTEM_INSTRUCTIONS).strip()
+        if not sys_instructions:
+            sys_instructions = DEFAULT_SYSTEM_INSTRUCTIONS
+    except Exception:
+        sys_instructions = DEFAULT_SYSTEM_INSTRUCTIONS
+    try:
+        label_glossary = json.loads(glossary_json_text) if glossary_json_text else DEFAULT_LABEL_GLOSSARY
+    except Exception:
+        label_glossary = DEFAULT_LABEL_GLOSSARY
+    try:
+        fallback_cues = json.loads(fallback_json_text) if fallback_json_text else DEFAULT_FALLBACK_CUES
+    except Exception:
+        fallback_cues = DEFAULT_FALLBACK_CUES
+    # Prepare workspace
     work = Path("/tmp/batch")
     if work.exists():
         for p in sorted(work.rglob("*"), reverse=True):
     if not stems:
         return ("No .txt transcripts found in ZIP.", "", pd.DataFrame(), "")
+    # Model
     try:
         model = get_model(model_repo, (hf_token or "").strip() or None, use_4bit)
     except Exception as e:
         return (f"Model load failed: {e}", "", pd.DataFrame(), "")
     allowed = OFFICIAL_LABELS[:]
+    glossary_str = build_glossary_str(label_glossary, allowed)
     allowed_list_str = "\n".join(f"- {l}" for l in allowed)
     y_true, y_pred = [], []
     rows = []
     for stem in stems:
         raw = txts[stem].read_text(encoding="utf-8", errors="ignore")
         text = clean_transcript(raw) if use_cleaning else raw
         trunc = truncate_tokens(model.tokenizer, text, max_input_tokens)
         user_prompt = USER_PROMPT_TEMPLATE.format(
             transcript=trunc,
             allowed_labels_list=allowed_list_str,
+            glossary=glossary_str,
         )
         t0 = _now_ms()
+        out = model.generate(sys_instructions, user_prompt)
         t1 = _now_ms()
         parsed = robust_json_extract(out)
         filtered = restrict_to_allowed(parsed, allowed)
+        if use_fallback:
+            fb = multilingual_fallback(trunc, allowed, fallback_cues)
             if fb["labels"]:
+                merged_labels = sorted(list(set(filtered.get("labels", [])) | set(fb["labels"])))
+                existing = {tt.get("label") for tt in filtered.get("tasks", [])}
+                merged_tasks = filtered.get("tasks", []) + [t for t in fb["tasks"] if t["label"] not in existing]
+                filtered = {"labels": merged_labels, "tasks": merged_tasks}
         pred_labels = filtered.get("labels", [])
         y_pred.append(pred_labels)
         f"Device: {DEVICE} (4-bit: {'Yes' if (use_4bit and DEVICE=='cuda') else 'No'})",
         f"Model: {model_repo}",
         f"Input cleaned: {'Yes' if use_cleaning else 'No'}",
+        f"Fallback rules: {'Yes' if use_fallback else 'No'}",
+        f"Tokens (input limit): ≤ {max_input_tokens}",
         f"Batch time: {_now_ms()-t_start} ms",
     ]
     if have_truth and score is not None:
         ]
     diag_str = "\n".join(diag)
     out_csv = Path("/tmp/batch_results.csv")
     df.to_csv(out_csv, index=False, encoding="utf-8")
     return ("Batch done.", diag_str, df, str(out_csv))
 # UI
 # =========================
 MODEL_CHOICES = [
+    "swiss-ai/Apertus-8B-Instruct-2509",      # multilingual
+    "meta-llama/Meta-Llama-3-8B-Instruct",    # strong generalist
+    "mistralai/Mistral-7B-Instruct-v0.3",     # light/fast
 ]
+# Light, modern UI (white background, neutral accents)
 custom_css = """
 :root { --radius: 14px; }
+.gradio-container { font-family: Inter, ui-sans-serif, system-ui; background: #ffffff; color: #111827; }
+.card { border: 1px solid #e5e7eb; border-radius: var(--radius); padding: 14px 16px; background: #ffffff; box-shadow: 0 1px 2px rgba(0,0,0,.03); }
+.header { font-weight: 700; font-size: 22px; margin-bottom: 4px; color: #0f172a; }
+.subtle { color: #475569; font-size: 14px; margin-bottom: 12px; }
+hr.sep { border: none; border-top: 1px solid #e5e7eb; margin: 10px 0 16px; }
 .gr-button { border-radius: 12px !important; }
+a, .prose a { color: #0ea5e9; }
 """
 with gr.Blocks(theme=gr.themes.Soft(), css=custom_css, fill_height=True) as demo:
+    gr.Markdown("<div class='header'>Talk2Task — Multilingual Task Extraction (UBS Challenge)</div>")
+    gr.Markdown("<div class='subtle'>Single-pass multilingual extraction (DE/FR/IT/EN) with compact prompts. Optional rule fallback ensures recall. Batch evaluation & scoring included.</div>")
     with gr.Tab("Single transcript"):
         with gr.Row():
                     file_types=[".txt", ".md", ".json"],
                     type="filepath",
                 )
+                text = gr.Textbox(label="Or paste transcript", lines=10, placeholder="Paste transcript in DE/FR/IT/EN…")
                 gr.Markdown("<hr class='sep'/>")
                 gr.Markdown("<div class='header'>Ground truth JSON (optional)</div>")
                 gt_text = gr.Textbox(label="Or paste ground truth JSON", lines=6, placeholder='{\"labels\": [\"schedule_meeting\"]}')
                 gr.Markdown("</div>")  # close card
+                gr.Markdown("<div class='card'><div class='header'>Processing options</div>")
+                use_cleaning = gr.Checkbox(label="Apply default cleaning (remove disclaimers, timestamps, speakers, footers)", value=True)
+                use_fallback = gr.Checkbox(label="Enable multilingual fallback rule layer", value=True)
                 gr.Markdown("</div>")
                 gr.Markdown("<div class='card'><div class='header'>Allowed labels</div>")
+                labels_text = gr.Textbox(label="Allowed Labels (one per line)", value=OFFICIAL_LABELS_TEXT, lines=8)
                 reset_btn = gr.Button("Reset to official labels")
                 gr.Markdown("</div>")
+                gr.Markdown("<div class='card'><div class='header'>Editable instructions & context</div>")
+                sys_instr_tb = gr.Textbox(label="System Instructions (editable)", value=DEFAULT_SYSTEM_INSTRUCTIONS, lines=5)
+                glossary_tb = gr.Code(label="Label Glossary (JSON; editable)", value=json.dumps(DEFAULT_LABEL_GLOSSARY, indent=2), language="json")
+                fallback_tb = gr.Code(label="Fallback Cues (Multilingual, JSON; editable)", value=json.dumps(DEFAULT_FALLBACK_CUES, indent=2), language="json")
+                gr.Markdown("</div>")
             with gr.Column(scale=2):
                 gr.Markdown("<div class='card'><div class='header'>Model & run</div>")
                 repo = gr.Dropdown(label="Model", choices=MODEL_CHOICES, value=MODEL_CHOICES[0])
         with gr.Row():
             with gr.Column():
                 with gr.Accordion("Instructions used (system prompt)", open=False):
+                    instr_md = gr.Markdown("```\n" + DEFAULT_SYSTEM_INSTRUCTIONS + "\n```")
             with gr.Column():
+                with gr.Accordion("Context used (glossary)", open=True):
                     context_md = gr.Markdown("")
+        # Reset labels to official
         def _reset_labels():
             return OFFICIAL_LABELS_TEXT
         reset_btn.click(fn=_reset_labels, inputs=None, outputs=labels_text)
+        # Warm-up
+        warm_btn.click(fn=warmup_model, inputs=[repo, use_4bit, hf_token], outputs=diag)
+        # For initial context preview
+        def _pack_context_md(glossary_json, allowed_text):
+            try:
+                glossary = json.loads(glossary_json) if glossary_json else DEFAULT_LABEL_GLOSSARY
+            except Exception:
+                glossary = DEFAULT_LABEL_GLOSSARY
+            allowed_list = [ln.strip() for ln in (allowed_text or OFFICIAL_LABELS_TEXT).splitlines() if ln.strip()]
+            return "### Label Glossary (used)\n" + "\n".join(f"- {k}: {glossary.get(k,'')}" for k in allowed_list)
+        context_md.value = _pack_context_md(json.dumps(DEFAULT_LABEL_GLOSSARY), OFFICIAL_LABELS_TEXT)
+        # Single run
         run_btn.click(
             fn=run_single,
             inputs=[
+                text, file, gt_text, gt_file, use_cleaning, use_fallback,
+                labels_text, sys_instr_tb, glossary_tb, fallback_tb,
+                repo, use_4bit, max_tokens, hf_token
             ],
             outputs=[summary, json_out, diag, raw, context_md, instr_md, gr.Textbox(visible=False), prompt_preview, token_info],
         )
     with gr.Tab("Batch evaluation"):
         with gr.Row():
             with gr.Column(scale=3):
                 gr.Markdown("<div class='card'><div class='header'>ZIP input</div>")
                 zip_in = gr.File(label="ZIP with transcripts (.txt) and truths (.json)", file_types=[".zip"], type="filepath")
                 use_cleaning_b = gr.Checkbox(label="Apply default cleaning", value=True)
+                use_fallback_b = gr.Checkbox(label="Enable multilingual fallback rule layer", value=True)
                 gr.Markdown("</div>")
             with gr.Column(scale=2):
                 gr.Markdown("<div class='card'><div class='header'>Model & run</div>")
                 use_4bit_b = gr.Checkbox(label="Use 4-bit (GPU only)", value=True)
                 max_tokens_b = gr.Slider(label="Max input tokens", minimum=1024, maximum=8192, step=512, value=2048)
                 hf_token_b = gr.Textbox(label="HF_TOKEN (only for gated models)", type="password", value=os.environ.get("HF_TOKEN",""))
+                sys_instr_tb_b = gr.Textbox(label="System Instructions (editable for batch)", value=DEFAULT_SYSTEM_INSTRUCTIONS, lines=4)
+                glossary_tb_b = gr.Code(label="Label Glossary (JSON; editable for batch)", value=json.dumps(DEFAULT_LABEL_GLOSSARY, indent=2), language="json")
+                fallback_tb_b = gr.Code(label="Fallback Cues (Multilingual, JSON; editable for batch)", value=json.dumps(DEFAULT_FALLBACK_CUES, indent=2), language="json")
                 limit_files = gr.Slider(label="Process at most N files (0 = all)", minimum=0, maximum=2000, step=10, value=0)
                 run_batch_btn = gr.Button("Run Batch", variant="primary")
                 gr.Markdown("</div>")
         run_batch_btn.click(
             fn=run_batch,
+            inputs=[
+                zip_in, use_cleaning_b, use_fallback_b,
+                sys_instr_tb_b, glossary_tb_b, fallback_tb_b,
+                repo_b, use_4bit_b, max_tokens_b, hf_token_b, limit_files
+            ],
             outputs=[status, diag_b, df_out, csv_out],
         )