Spaces:

RishiRP
/

Talk2TaskDemo1

Sleeping

App Files Files Community

RishiRP commited on Sep 25, 2025

Commit

41b65ed

verified ·

1 Parent(s): dfeaa23

Update app.py

Browse files

Files changed (1) hide show

app.py +295 -925

app.py CHANGED Viewed

@@ -1,978 +1,348 @@
-# app.py
 import os
-import re
-import io
 import json
-import time
-import zipfile
-from pathlib import Path
-from typing import List, Dict, Any, Tuple, Optional
-import numpy as np
-import pandas as pd
 import gradio as gr
 import torch
-from transformers import (
-    AutoTokenizer,
-    AutoModelForCausalLM,
-    BitsAndBytesConfig,
-    GenerationConfig,
-)
-# =========================
-# Global config
-# =========================
-SPACE_CACHE = Path.home() / ".cache" / "huggingface"
-SPACE_CACHE.mkdir(parents=True, exist_ok=True)
-DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
-# Fast, deterministic, compact outputs for lower latency
-GEN_CONFIG = GenerationConfig(
-    temperature=0.0,
-    top_p=1.0,
-    do_sample=False,
-    max_new_tokens=128,  # increase if your JSON is getting truncated
-)
-# Official UBS labels (canonical)
-OFFICIAL_LABELS = [
-    "plan_contact",
-    "schedule_meeting",
-    "update_contact_info_non_postal",
-    "update_contact_info_postal_address",
-    "update_kyc_activity",
-    "update_kyc_origin_of_assets",
-    "update_kyc_purpose_of_businessrelation",
-    "update_kyc_total_assets",
-]
-OFFICIAL_LABELS_TEXT = "\n".join(OFFICIAL_LABELS)
-# =========================
-# Editable defaults (shown in UI)
-# =========================
-DEFAULT_SYSTEM_INSTRUCTIONS = (
-    "You extract ACTIONABLE TASKS from client–advisor transcripts. "
-    "The transcript may be in German, French, Italian, or English. "
-    "Prioritize RECALL: if a label plausibly applies, include it. "
-    "Use ONLY the canonical labels provided. "
-    "Return STRICT JSON only with keys 'labels' and 'tasks'. "
-    "Each task must include 'label', a brief 'explanation', and a short 'evidence' quote from the transcript."
-)
-# Very short, language-agnostic semantics to keep prompt small
-DEFAULT_LABEL_GLOSSARY = {
-    "plan_contact": "Commitment to contact later (advisor/client will reach out, follow-up promised).",
-    "schedule_meeting": "Scheduling or confirming a meeting/call/appointment (time/date/slot/virtual).",
-    "update_contact_info_non_postal": "Change or confirmation of phone/email (non-postal contact details).",
-    "update_contact_info_postal_address": "Change or confirmation of postal/residential/mailing address.",
-    "update_kyc_activity": "Change/confirmation of occupation, employment status, or economic activity.",
-    "update_kyc_origin_of_assets": "Discussion/confirmation of source of funds / origin of assets.",
-    "update_kyc_purpose_of_businessrelation": "Purpose of the banking relationship/account usage.",
-    "update_kyc_total_assets": "Discussion/confirmation of total assets/net worth.",
-}
-# Tiny multilingual fallback rules (optional) to guarantee recall if model is empty.
-DEFAULT_FALLBACK_CUES = {
-    "plan_contact": [
-        # EN
-        r"\b(get|got|will|we'?ll|i'?ll)\s+back to you\b",
-        r"\bfollow\s*up\b",
-        r"\breach out\b",
-        r"\btouch base\b",
-        r"\bcontact (you|me|us)\b",
-        # DE
-        r"\bin verbindung setzen\b",
-        r"\brückmeldung\b",
-        r"\bich\s+melde\b|\bwir\s+melden\b",
-        r"\bnachfassen\b",
-        # FR
-        r"\bje vous recontacte\b|\bnous vous recontacterons\b",
-        r"\bprendre contact\b|\breprendre contact\b",
-        # IT
-        r"\bla ricontatter[oò]\b|\bci metteremo in contatto\b",
-        r"\btenersi in contatto\b",
-    ],
-    "schedule_meeting": [
-        # EN
-        r"\b(let'?s\s+)?meet(ing|s)?\b",
-        r"\bschedule( a)? (call|meeting|appointment)\b",
-        r"\bbook( a)? (slot|time|meeting)\b",
-        r"\b(next week|tomorrow|this (afternoon|morning|evening))\b",
-        r"\bconfirm( the)? (time|meeting|appointment)\b",
-        # DE
-        r"\btermin(e|s)?\b|\bvereinbaren\b|\bansetzen\b|\babstimmen\b|\bbesprechung(en)?\b|\bvirtuell(e|en)?\b",
-        r"\bnächste(n|r)? woche\b|\b(dienstag|montag|mittwoch|donnerstag|freitag)\b|\bnachmittag|vormittag|morgen\b",
-        # FR
-        r"\brendez[- ]?vous\b|\bréunion\b|\bfixer\b|\bplanifier\b|\bcalendrier\b|\bse rencontrer\b|\bse voir\b",
-        r"\bla semaine prochaine\b|\bdemain\b|\bcet (après-midi|apres-midi|après midi|apres midi|matin|soir)\b",
-        # IT
-        r"\bappuntamento\b|\briunione\b|\borganizzare\b|\bprogrammare\b|\bincontrarci\b|\bcalendario\b",
-        r"\bla prossima settimana\b|\bdomani\b|\b(questo|questa)\s*(pomeriggio|mattina|sera)\b",
-    ],
-    "update_kyc_origin_of_assets": [
-        # EN
-        r"\bsource of funds\b|\borigin of assets\b|\bproof of (funds|assets)\b",
-        # DE
-        r"\bvermögensursprung(e|s)?\b|\bherkunft der mittel\b|\bnachweis\b",
-        # FR
-        r"\borigine des fonds\b|\borigine du patrimoine\b|\bjustificatif(s)?\b",
-        # IT
-        r"\borigine dei fondi\b|\borigine del patrimonio\b|\bprova dei fondi\b|\bgiustificativo\b",
-    ],
-    "update_kyc_activity": [
-        # EN
-        r"\bemployment status\b|\boccupation\b|\bjob change\b|\bsalary history\b",
-        # DE
-        r"\bbeschäftigungsstatus\b|\bberuf\b|\bjobwechsel\b|\bgehaltshistorie\b|\btätigkeit\b",
-        # FR
-        r"\bstatut professionnel\b|\bprofession\b|\bchangement d'emploi\b|\bhistorique salarial\b|\bactivité\b",
-        # IT
-        r"\bstato occupazionale\b|\bprofessione\b|\bcambio di lavoro\b|\bstoria salariale\b|\battivit[aà]\b",
-    ],
-}
 # =========================
-# Prompt templates (minimal multilingual)
 # =========================
-USER_PROMPT_TEMPLATE = (
-    "Transcript (may be DE/FR/IT/EN):\n"
-    "```\n{transcript}\n```\n\n"
-    "Allowed Labels (canonical; use only these):\n"
-    "{allowed_labels_list}\n\n"
-    "Label Glossary (concise semantics):\n"
-    "{glossary}\n\n"
-    "Return STRICT JSON ONLY in this exact schema:\n"
-    '{\n  "labels": ["<Label1>", "..."],\n'
-    '  "tasks": [{"label": "<Label1>", "explanation": "<why>", "evidence": "<quote>"}]\n}\n'
-)
-# =========================
-# Utilities
-# =========================
-def _now_ms() -> int:
-    return int(time.time() * 1000)
-def normalize_labels(labels: List[str]) -> List[str]:
-    return list(dict.fromkeys([l.strip() for l in labels if isinstance(l, str) and l.strip()]))
-def canonicalize_map(allowed: List[str]) -> Dict[str, str]:
-    return {lab.lower(): lab for lab in allowed}
-def robust_json_extract(text: str) -> Dict[str, Any]:
-    if not text:
-        return {"labels": [], "tasks": []}
-    start, end = text.find("{"), text.rfind("}")
-    candidate = text[start:end+1] if (start != -1 and end != -1 and end > start) else text
     try:
-        return json.loads(candidate)
     except Exception:
-        candidate = re.sub(r",\s*}", "}", candidate)
-        candidate = re.sub(r",\s*]", "]", candidate)
-        try:
-            return json.loads(candidate)
-        except Exception:
-            return {"labels": [], "tasks": []}
-def restrict_to_allowed(pred: Dict[str, Any], allowed: List[str]) -> Dict[str, Any]:
-    out = {"labels": [], "tasks": []}
-    allowed_map = canonicalize_map(allowed)
-    filt_labels = []
-    for l in pred.get("labels", []) or []:
-        k = str(l).strip().lower()
-        if k in allowed_map:
-            filt_labels.append(allowed_map[k])
-    filt_labels = normalize_labels(filt_labels)
-    filt_tasks = []
-    for t in pred.get("tasks", []) or []:
-        if not isinstance(t, dict):
-            continue
-        k = str(t.get("label", "")).strip().lower()
-        if k in allowed_map:
-            new_t = dict(t); new_t["label"] = allowed_map[k]
-            new_t = {
-                "label": new_t["label"],
-                "explanation": str(new_t.get("explanation", ""))[:300],
-                "evidence": str(new_t.get("evidence", ""))[:300],
-            }
-            filt_tasks.append(new_t)
-    merged = normalize_labels(list(set(filt_labels) | {tt["label"] for tt in filt_tasks}))
-    out["labels"] = merged
-    out["tasks"] = filt_tasks
-    return out
 # =========================
-# Pre-processing
 # =========================
-_DISCLAIMER_PATTERNS = [
-    r"(?is)^\s*(?:disclaimer|legal notice|confidentiality notice).+?(?:\n{2,}|$)",
-    r"(?is)^\s*the information contained.+?(?:\n{2,}|$)",
-    r"(?is)^\s*this message \(including any attachments\).+?(?:\n{2,}|$)",
-]
-_FOOTER_PATTERNS = [
-    r"(?is)\n+kind regards[^\n]*\n.*$", r"(?is)\n+best regards[^\n]*\n.*$",
-    r"(?is)\n+sent from my.*$", r"(?is)\n+ubs ag.*$",
-]
-_TIMESTAMP_SPEAKER = [
-    r"\[\d{1,2}:\d{2}(:\d{2})?\]",     # [00:01] or [00:01:02]
-    r"^\s*(advisor|client|client advisor)\s*:\s*",    # Advisor:, Client:
-    r"^\s*(speaker\s*\d+)\s*:\s*",     # Speaker 1:
-]
-def clean_transcript(text: str) -> str:
-    if not text:
-        return text
-    s = text
-    lines = []
-    for ln in s.splitlines():
-        ln2 = ln
-        for pat in _TIMESTAMP_SPEAKER:
-            ln2 = re.sub(pat, "", ln2, flags=re.IGNORECASE)
-        lines.append(ln2)
-    s = "\n".join(lines)
-    for pat in _DISCLAIMER_PATTERNS:
-        s = re.sub(pat, "", s).strip()
-    for pat in _FOOTER_PATTERNS:
-        s = re.sub(pat, "", s)
-    s = re.sub(r"[ \t]+", " ", s)
-    s = re.sub(r"\n{3,}", "\n\n", s).strip()
-    return s
-def read_text_file_any(file_input) -> str:
-    if not file_input:
-        return ""
-    if isinstance(file_input, (str, Path)):
-        try:
-            return Path(file_input).read_text(encoding="utf-8", errors="ignore")
-        except Exception:
-            return ""
-    try:
-        data = file_input.read()
-        return data.decode("utf-8", errors="ignore")
-    except Exception:
-        return ""
-def read_json_file_any(file_input) -> Optional[dict]:
-    if not file_input:
         return None
-    if isinstance(file_input, (str, Path)):
-        try:
-            return json.loads(Path(file_input).read_text(encoding="utf-8", errors="ignore"))
-        except Exception:
-            return None
     try:
-        return json.loads(file_input.read().decode("utf-8", errors="ignore"))
     except Exception:
         return None
-def truncate_tokens(tokenizer, text: str, max_tokens: int) -> str:
-    toks = tokenizer(text, add_special_tokens=False)["input_ids"]
-    if len(toks) <= max_tokens:
-        return text
-    return tokenizer.decode(toks[-max_tokens:], skip_special_tokens=True)
-# =========================
-# HF model wrapper (main LLM) – robust against meta tensor errors
-# =========================
-class ModelWrapper:
-    def __init__(self, repo_id: str, hf_token: Optional[str], load_in_4bit: bool, use_sdpa: bool):
-        self.repo_id = repo_id
-        self.hf_token = hf_token
-        self.load_in_4bit = load_in_4bit
-        self.use_sdpa = use_sdpa
-        self.tokenizer = None
-        self.model = None
-        self.load_path = "uninitialized"
-    def load(self):
-        # Build a BitsAndBytes config if needed
-        qcfg = None
-        if self.load_in_4bit and DEVICE == "cuda":
-            qcfg = BitsAndBytesConfig(
-                load_in_4bit=True,
-                bnb_4bit_quant_type="nf4",
-                bnb_4bit_compute_dtype=torch.float16,
-                bnb_4bit_use_double_quant=True,
-            )
-        # Try a safe load first (no low_cpu_mem_usage, device_map="auto")
-        errors = []
-        for attempt in [
-            # (desc, kwargs)
-            ("auto_device_no_lowcpu" + ("_sdpa" if self.use_sdpa else ""),
-             dict(
-                 torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
-                 device_map="auto" if DEVICE == "cuda" else None,
-                 low_cpu_mem_usage=False,  # avoid meta init
-                 quantization_config=qcfg,
-                 trust_remote_code=True,
-                 cache_dir=str(SPACE_CACHE),
-                 attn_implementation=("sdpa" if (self.use_sdpa and DEVICE == "cuda") else None),
-             )),
-            ("auto_device_no_sdpa",
-             dict(
-                 torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
-                 device_map="auto" if DEVICE == "cuda" else None,
-                 low_cpu_mem_usage=False,
-                 quantization_config=qcfg,
-                 trust_remote_code=True,
-                 cache_dir=str(SPACE_CACHE),
-                 # no attn_implementation key => let HF choose
-             )),
-            ("cpu_then_to_cuda" if DEVICE == "cuda" else "cpu_only",
-             dict(
-                 torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
-                 device_map=None,            # load on CPU
-                 low_cpu_mem_usage=False,
-                 quantization_config=None if DEVICE != "cuda" else qcfg,  # if 4bit, keep qcfg
-                 trust_remote_code=True,
-                 cache_dir=str(SPACE_CACHE),
-             )),
-        ]:
-            desc, kwargs = attempt
-            try:
-                tok = AutoTokenizer.from_pretrained(
-                    self.repo_id, token=self.hf_token,
-                    cache_dir=str(SPACE_CACHE), trust_remote_code=True, use_fast=True,
-                )
-                if tok.pad_token is None and tok.eos_token:
-                    tok.pad_token = tok.eos_token
-                mdl = AutoModelForCausalLM.from_pretrained(
-                    self.repo_id, token=self.hf_token, **kwargs
-                )
-                # If we loaded on CPU and have CUDA, move model (non-meta) to CUDA
-                if desc.startswith("cpu_then_to_cuda") and DEVICE == "cuda":
-                    mdl = mdl.to(torch.device("cuda"))
-                self.tokenizer = tok
-                self.model = mdl
-                self.load_path = desc
-                return
-            except Exception as e:
-                errors.append(f"{desc}: {e}")
-        raise RuntimeError("All load attempts failed:\n" + "\n".join(errors))
-    @torch.inference_mode()
-    def generate(self, system_prompt: str, user_prompt: str) -> str:
-        # Build inputs as input_ids=... (avoid **tensor kwargs mixing)
-        if hasattr(self.tokenizer, "apply_chat_template"):
-            messages = [
-                {"role": "system", "content": system_prompt},
-                {"role": "user", "content": user_prompt},
-            ]
-            input_ids = self.tokenizer.apply_chat_template(
-                messages,
-                tokenize=True,
-                add_generation_prompt=True,
-                return_tensors="pt",
-            )
-            input_ids = input_ids.to(self.model.device)
-            gen_kwargs = dict(
-                input_ids=input_ids,
-                generation_config=GEN_CONFIG,
-                eos_token_id=self.tokenizer.eos_token_id,
-                pad_token_id=self.tokenizer.pad_token_id,
-            )
-        else:
-            enc = self.tokenizer(
-                f"<s>[SYSTEM]\n{system_prompt}\n[/SYSTEM]\n[USER]\n{user_prompt}\n[/USER]\n",
-                return_tensors="pt"
-            ).to(self.model.device)
-            gen_kwargs = dict(
-                **enc,
-                generation_config=GEN_CONFIG,
-                eos_token_id=self.tokenizer.eos_token_id,
-                pad_token_id=self.tokenizer.pad_token_id,
-            )
-        with torch.cuda.amp.autocast(enabled=(DEVICE == "cuda")):
-            out_ids = self.model.generate(**gen_kwargs)
-        return self.tokenizer.decode(out_ids[0], skip_special_tokens=True)
-_MODEL_CACHE: Dict[str, ModelWrapper] = {}
-def get_model(repo_id: str, hf_token: Optional[str], load_in_4bit: bool, use_sdpa: bool) -> ModelWrapper:
-    key = f"{repo_id}::{'4bit' if (load_in_4bit and DEVICE=='cuda') else 'full'}::{'sdpa' if use_sdpa else 'nosdpa'}"
-    if key not in _MODEL_CACHE:
-        m = ModelWrapper(repo_id, hf_token, load_in_4bit, use_sdpa)
-        m.load()
-        _MODEL_CACHE[key] = m
-    return _MODEL_CACHE[key]
-# =========================
-# Evaluation (official weighted score)
-# =========================
-def evaluate_predictions(y_true: List[List[str]], y_pred: List[List[str]]) -> float:
-    ALLOWED_LABELS = OFFICIAL_LABELS
-    LABEL_TO_IDX = {label: idx for idx, label in enumerate(ALLOWED_LABELS)}
-    def _process_sample_labels(sample_labels: List[str], sample_name: str) -> List[str]:
-        if not isinstance(sample_labels, list):
-            raise ValueError(f"{sample_name} must be a list of strings, got {type(sample_labels)}")
-        seen, uniq = set(), []
-        for label in sample_labels:
-            if not isinstance(label, str):
-                raise ValueError(f"{sample_name} contains non-string: {label} (type: {type(label)})")
-            if label in seen:
-                raise ValueError(f"{sample_name} contains duplicate label: '{label}'")
-            if label not in ALLOWED_LABELS:
-                raise ValueError(f"{sample_name} contains invalid label: '{label}'. Allowed: {ALLOWED_LABELS}")
-            seen.add(label); uniq.append(label)
-        return uniq
-    if len(y_true) != len(y_pred):
-        raise ValueError(f"y_true and y_pred must have same length. Got {len(y_true)} vs {len(y_pred)}")
-    n_samples = len(y_true)
-    n_labels = len(OFFICIAL_LABELS)
-    y_true_binary = np.zeros((n_samples, n_labels), dtype=int)
-    y_pred_binary = np.zeros((n_samples, n_labels), dtype=int)
-    for i, sample_labels in enumerate(y_true):
-        for label in _process_sample_labels(sample_labels, f"y_true[{i}]"):
-            y_true_binary[i, LABEL_TO_IDX[label]] = 1
-    for i, sample_labels in enumerate(y_pred):
-        for label in _process_sample_labels(sample_labels, f"y_pred[{i}]"):
-            y_pred_binary[i, LABEL_TO_IDX[label]] = 1
-    fn = np.sum((y_true_binary == 1) & (y_pred_binary == 0), axis=1)  # penalty 2x
-    fp = np.sum((y_true_binary == 0) & (y_pred_binary == 1), axis=1)  # penalty 1x
-    weighted = 2.0 * fn + 1.0 * fp
-    max_err = 2.0 * np.sum(y_true_binary, axis=1) + 1.0 * (n_labels - np.sum(y_true_binary, axis=1))
-    per_sample = np.where(max_err > 0, 1.0 - (weighted / max_err), 1.0)
-    return float(max(0.0, min(1.0, np.mean(per_sample))))
-# =========================
-# Multilingual fallback (regex on original text)
-# =========================
-def multilingual_fallback(text: str, allowed: List[str], cues: Dict[str, List[str]]) -> Dict[str, Any]:
-    low = text.lower()
-    labels, tasks = [], []
-    for lab in allowed:
-        for pat in cues.get(lab, []):
-            m = re.search(pat, low)
-            if m:
-                i = m.start()
-                start = max(0, i - 60); end = min(len(text), i + len(m.group(0)) + 60)
-                if lab not in labels:
-                    labels.append(lab)
-                    tasks.append({
-                        "label": lab,
-                        "explanation": "Rule hit (multilingual fallback)",
-                        "evidence": text[start:end].strip()
-                    })
-                break
-    return {"labels": normalize_labels(labels), "tasks": tasks}
 # =========================
-# Inference helpers
 # =========================
-def build_glossary_str(glossary: Dict[str, str], allowed: List[str]) -> str:
-    return "\n".join([f"- {lab}: {glossary.get(lab, '')}" for lab in allowed])
-def warmup_model(model_repo: str, use_4bit: bool, use_sdpa: bool, hf_token: str) -> str:
-    t0 = _now_ms()
-    try:
-        model = get_model(model_repo, (hf_token or "").strip() or None, use_4bit, use_sdpa)
-        _ = model.generate("Return JSON only.", '{"labels": [], "tasks": []}')
-        return f"Warm-up complete in {_now_ms() - t0} ms. Load path: {model.load_path}"
-    except Exception as e:
-        return f"Warm-up failed: {e}"
-def run_single(
-    transcript_text: str,
-    transcript_file,
-    gt_json_text: str,
-    gt_json_file,
-    use_cleaning: bool,
-    use_fallback: bool,
-    allowed_labels_text: str,
-    sys_instructions_text: str,
-    glossary_json_text: str,
-    fallback_json_text: str,
-    model_repo: str,
-    use_4bit: bool,
-    use_sdpa: bool,
-    max_input_tokens: int,
-    hf_token: str,
-) -> Tuple[str, str, str, str, str, str, str, str, str]:
-    t0 = _now_ms()
-    # Load transcript
-    raw_text = ""
-    if transcript_file:
-        raw_text = read_text_file_any(transcript_file)
-    raw_text = (raw_text or transcript_text or "").strip()
-    if not raw_text:
-        return "", "", "No transcript provided.", "", "", "", "", "", ""
-    text = clean_transcript(raw_text) if use_cleaning else raw_text
-    # Allowed labels
-    user_allowed = [ln.strip() for ln in (allowed_labels_text or "").splitlines() if ln.strip()]
-    allowed = normalize_labels(user_allowed or OFFICIAL_LABELS)
-    # Editable configs
-    try:
-        sys_instructions = (sys_instructions_text or DEFAULT_SYSTEM_INSTRUCTIONS).strip()
-        if not sys_instructions:
-            sys_instructions = DEFAULT_SYSTEM_INSTRUCTIONS
-    except Exception:
-        sys_instructions = DEFAULT_SYSTEM_INSTRUCTIONS
-    try:
-        label_glossary = json.loads(glossary_json_text) if glossary_json_text else DEFAULT_LABEL_GLOSSARY
-    except Exception:
-        label_glossary = DEFAULT_LABEL_GLOSSARY
-    try:
-        fallback_cues = json.loads(fallback_json_text) if fallback_json_text else DEFAULT_FALLBACK_CUES
-    except Exception:
-        fallback_cues = DEFAULT_FALLBACK_CUES
-    # Model
     try:
-        model = get_model(model_repo, (hf_token or "").strip() or None, use_4bit, use_sdpa)
     except Exception as e:
-        return "", "", f"Model load failed: {e}", "", "", "", "", "", ""
-    # Truncate
-    trunc = truncate_tokens(model.tokenizer, text, max_input_tokens)
-    # Build prompt
-    glossary_str = build_glossary_str(label_glossary, allowed)
-    allowed_list_str = "\n".join(f"- {l}" for l in allowed)
-    user_prompt = USER_PROMPT_TEMPLATE.format(
-        transcript=trunc,
-        allowed_labels_list=allowed_list_str,
-        glossary=glossary_str,
     )
-    # Token info + prompt preview
-    transcript_tokens = len(model.tokenizer(trunc, add_special_tokens=False)["input_ids"])
-    prompt_tokens = len(model.tokenizer(user_prompt, add_special_tokens=False)["input_ids"])
-    token_info_text = f"Transcript tokens: {transcript_tokens} | Prompt tokens: {prompt_tokens} | Load path: {model.load_path}"
-    prompt_preview_text = "```\n" + user_prompt[:4000] + ("\n... (truncated)" if len(user_prompt) > 4000 else "") + "\n```"
-    # Generate
-    t1 = _now_ms()
     try:
-        out = model.generate(sys_instructions, user_prompt)
     except Exception as e:
-        return "", "", f"Generation error: {e}", "", "", "", prompt_preview_text, token_info_text, ""
-    t2 = _now_ms()
-    parsed = robust_json_extract(out)
-    filtered = restrict_to_allowed(parsed, allowed)
-    # Fallback (multilingual rules) on original text; merge for recall if enabled
-    if use_fallback:
-        fb = multilingual_fallback(trunc, allowed, fallback_cues)
-        if fb["labels"]:
-            merged_labels = sorted(list(set(filtered.get("labels", [])) | set(fb["labels"])))
-            existing = {tt.get("label") for tt in filtered.get("tasks", [])}
-            merged_tasks = filtered.get("tasks", []) + [t for t in fb["tasks"] if t["label"] not in existing]
-            filtered = {"labels": merged_labels, "tasks": merged_tasks}
-    # Diagnostics
-    diag = "\n".join([
-        f"Device: {DEVICE} (4-bit: {'Yes' if (use_4bit and DEVICE=='cuda') else 'No'})",
-        f"Model: {model_repo}",
-        f"Input cleaned: {'Yes' if use_cleaning else 'No'}",
-        f"Fallback rules: {'Yes' if use_fallback else 'No'}",
-        f"SDPA attention: {'Yes' if use_sdpa else 'No'}",
-        f"Tokens (input limit): ≤ {max_input_tokens}",
-        f"Latency: prep {t1-t0} ms, gen {t2-t1} ms, total {t2-t0} ms",
-        f"Allowed labels: {', '.join(allowed)}",
-    ])
-    # Summaries
-    labs = filtered.get("labels", [])
-    tasks = filtered.get("tasks", [])
-    summary = "Detected labels:\n" + ("\n".join(f"- {l}" for l in labs) if labs else "(none)")
-    if tasks:
-        summary += "\n\nTasks:\n" + "\n".join(
-            f"• [{t['label']}] {t.get('explanation','')} | ev: {t.get('evidence','')[:140]}{'…' if len(t.get('evidence',''))>140 else ''}"
-            for t in tasks
-        )
-    else:
-        summary += "\n\nTasks: (none)"
-    json_out = json.dumps(filtered, indent=2, ensure_ascii=False)
-    # Single-file scoring if GT provided
-    metrics = ""
-    if gt_json_file or (gt_json_text and gt_json_text.strip()):
-        truth_obj = None
-        if gt_json_file:
-            truth_obj = read_json_file_any(gt_json_file)
-        if (not truth_obj) and gt_json_text:
-            try:
-                truth_obj = json.loads(gt_json_text)
-            except Exception:
-                pass
-        if isinstance(truth_obj, dict) and isinstance(truth_obj.get("labels"), list):
-            true_labels = [x for x in truth_obj["labels"] if x in OFFICIAL_LABELS]
-            pred_labels = labs
-            try:
-                score = evaluate_predictions([true_labels], [pred_labels])
-                tp = len(set(true_labels) & set(pred_labels))
-                fp = len(set(pred_labels) - set(true_labels))
-                fn = len(set(true_labels) - set(pred_labels))
-                recall = tp / (tp + fn) if (tp + fn) else 1.0
-                precision = tp / (tp + fp) if (tp + fp) else 1.0
-                f1 = (2 * precision * recall / (precision + recall)) if (precision + recall) else 1.0
-                metrics = (
-                    f"Weighted score: {score:.3f}\n"
-                    f"Recall: {recall:.3f} | Precision: {precision:.3f} | F1: {f1:.3f}\n"
-                    f"TP={tp} FP={fp} FN={fn}\n"
-                    f"Truth: {', '.join(true_labels)}"
-                )
-            except Exception as e:
-                metrics = f"Scoring error: {e}"
-        else:
-            metrics = "Ground truth JSON missing or invalid; expected {'labels': [...]}."
-    # For UI: show effective context (glossary) and instructions
-    context_preview = "### Label Glossary (used)\n" + "\n".join(f"- {k}: {v}" for k, v in label_glossary.items() if k in allowed)
-    instructions_preview = "```\n" + sys_instructions + "\n```"
-    return summary, json_out, diag, out.strip(), context_preview, instructions_preview, metrics, prompt_preview_text, token_info_text
 # =========================
-# Batch mode (ZIP with transcripts + truths)
 # =========================
-def read_zip_from_path(path: str, exdir: Path) -> List[Path]:
-    exdir.mkdir(parents=True, exist_ok=True)
-    with open(path, "rb") as f:
-        data = f.read()
-    with zipfile.ZipFile(io.BytesIO(data)) as zf:
-        zf.extractall(exdir)
-    return [p for p in exdir.rglob("*") if p.is_file()]
-def run_batch(
-    zip_path,
-    use_cleaning: bool,
-    use_fallback: bool,
-    sys_instructions_text: str,
-    glossary_json_text: str,
-    fallback_json_text: str,
-    model_repo: str,
-    use_4bit: bool,
-    use_sdpa: bool,
-    max_input_tokens: int,
-    hf_token: str,
-    limit_files: int,
-) -> Tuple[str, str, pd.DataFrame, str]:
-    if not zip_path:
-        return ("No ZIP provided.", "", pd.DataFrame(), "")
-    # Editable configs
-    try:
-        sys_instructions = (sys_instructions_text or DEFAULT_SYSTEM_INSTRUCTIONS).strip()
-        if not sys_instructions:
-            sys_instructions = DEFAULT_SYSTEM_INSTRUCTIONS
-    except Exception:
-        sys_instructions = DEFAULT_SYSTEM_INSTRUCTIONS
-    try:
-        label_glossary = json.loads(glossary_json_text) if glossary_json_text else DEFAULT_LABEL_GLOSSARY
-    except Exception:
-        label_glossary = DEFAULT_LABEL_GLOSSARY
-    try:
-        fallback_cues = json.loads(fallback_json_text) if fallback_json_text else DEFAULT_FALLBACK_CUES
-    except Exception:
-        fallback_cues = DEFAULT_FALLBACK_CUES
-    # Prepare workspace
-    work = Path("/tmp/batch")
-    if work.exists():
-        for p in sorted(work.rglob("*"), reverse=True):
-            try: p.unlink()
-            except Exception: pass
-        try: work.rmdir()
-        except Exception: pass
-    work.mkdir(parents=True, exist_ok=True)
-    files = read_zip_from_path(zip_path, work)
-    txts: Dict[str, Path] = {}
-    gts: Dict[str, Path] = {}
-    for p in files:
-        if p.suffix.lower() == ".txt":
-            txts[p.stem] = p
-        elif p.suffix.lower() == ".json":
-            gts[p.stem] = p
-    stems = sorted(txts.keys())
-    if limit_files > 0:
-        stems = stems[:limit_files]
-    if not stems:
-        return ("No .txt transcripts found in ZIP.", "", pd.DataFrame(), "")
-    # Model
-    try:
-        model = get_model(model_repo, (hf_token or "").strip() or None, use_4bit, use_sdpa)
-    except Exception as e:
-        return (f"Model load failed: {e}", "", pd.DataFrame(), "")
-    allowed = OFFICIAL_LABELS[:]
-    glossary_str = build_glossary_str(label_glossary, allowed)
-    allowed_list_str = "\n".join(f"- {l}" for l in allowed)
-    y_true, y_pred = [], []
-    rows = []
-    t_start = _now_ms()
-    for stem in stems:
-        raw = txts[stem].read_text(encoding="utf-8", errors="ignore")
-        text = clean_transcript(raw) if use_cleaning else raw
-        trunc = truncate_tokens(model.tokenizer, text, max_input_tokens)
-        user_prompt = USER_PROMPT_TEMPLATE.format(
-            transcript=trunc,
-            allowed_labels_list=allowed_list_str,
-            glossary=glossary_str,
-        )
-        t0 = _now_ms()
-        out = model.generate(sys_instructions, user_prompt)
-        t1 = _now_ms()
-        parsed = robust_json_extract(out)
-        filtered = restrict_to_allowed(parsed, allowed)
-        if use_fallback:
-            fb = multilingual_fallback(trunc, allowed, fallback_cues)
-            if fb["labels"]:
-                merged_labels = sorted(list(set(filtered.get("labels", [])) | set(fb["labels"])))
-                existing = {tt.get("label") for tt in filtered.get("tasks", [])}
-                merged_tasks = filtered.get("tasks", []) + [t for t in fb["tasks"] if t["label"] not in existing]
-                filtered = {"labels": merged_labels, "tasks": merged_tasks}
-        pred_labels = filtered.get("labels", [])
-        y_pred.append(pred_labels)
-        gt_labels = []
-        if stem in gts:
-            try:
-                gt_obj = json.loads(gts[stem].read_text(encoding="utf-8", errors="ignore"))
-                if isinstance(gt_obj, dict) and isinstance(gt_obj.get("labels"), list):
-                    gt_labels = [x for x in gt_obj["labels"] if x in OFFICIAL_LABELS]
-            except Exception:
-                pass
-        y_true.append(gt_labels)
-        gt_set, pr_set = set(gt_labels), set(pred_labels)
-        tp = sorted(gt_set & pr_set)
-        fp = sorted(pr_set - gt_set)
-        fn = sorted(gt_set - pr_set)
-        rows.append({
-            "file": stem,
-            "true_labels": ", ".join(gt_labels),
-            "pred_labels": ", ".join(pred_labels),
-            "TP": len(tp), "FP": len(fp), "FN": len(fn),
-            "gen_ms": t1 - t0
-        })
-    have_truth = any(len(v) > 0 for v in y_true)
-    score = evaluate_predictions(y_true, y_pred) if have_truth else None
-    df = pd.DataFrame(rows).sort_values(["FN", "FP", "file"])
-    diag = [
-        f"Processed files: {len(stems)}",
-        f"Device: {DEVICE} (4-bit: {'Yes' if (use_4bit and DEVICE=='cuda') else 'No'})",
-        f"Model: {model_repo}",
-        f"Fallback rules: {'Yes' if use_fallback else 'No'}",
-        f"SDPA attention: {'Yes' if use_sdpa else 'No'}",
-        f"Tokens (input limit): ≤ {max_input_tokens}",
-        f"Batch time: {_now_ms()-t_start} ms",
-    ]
-    if have_truth and score is not None:
-        total_tp = int(df["TP"].sum())
-        total_fp = int(df["FP"].sum())
-        total_fn = int(df["FN"].sum())
-        recall = total_tp / (total_tp + total_fn) if (total_tp + total_fn) else 1.0
-        precision = total_tp / (total_tp + total_fp) if (total_tp + total_fp) else 1.0
-        f1 = (2 * precision * recall / (precision + recall)) if (precision + recall) else 1.0
-        diag += [
-            f"Official weighted score (0–1): {score:.3f}",
-            f"Recall: {recall:.3f} | Precision: {precision:.3f} | F1: {f1:.3f}",
-            f"Total TP={total_tp} FP={total_fp} FN={total_fn}",
-        ]
-    diag_str = "\n".join(diag)
-    out_csv = Path("/tmp/batch_results.csv")
-    df.to_csv(out_csv, index=False, encoding="utf-8")
-    return ("Batch done.", diag_str, df, str(out_csv))
-# =========================
-# UI
-# =========================
-MODEL_CHOICES = [
-    "swiss-ai/Apertus-8B-Instruct-2509",      # multilingual
-    "meta-llama/Meta-Llama-3-8B-Instruct",    # strong generalist
-    "mistralai/Mistral-7B-Instruct-v0.3",     # light/fast
-]
-# Light, modern UI (white background, neutral accents)
-custom_css = """
-:root { --radius: 14px; }
-.gradio-container { font-family: Inter, ui-sans-serif, system-ui; background: #ffffff; color: #111827; }
-.card { border: 1px solid #e5e7eb; border-radius: var(--radius); padding: 14px 16px; background: #ffffff; box-shadow: 0 1px 2px rgba(0,0,0,.03); }
-.header { font-weight: 700; font-size: 22px; margin-bottom: 4px; color: #0f172a; }
-.subtle { color: #475569; font-size: 14px; margin-bottom: 12px; }
-hr.sep { border: none; border-top: 1px solid #e5e7eb; margin: 10px 0 16px; }
-.gr-button { border-radius: 12px !important; }
-a, .prose a { color: #0ea5e9; }
 """
-with gr.Blocks(theme=gr.themes.Soft(), css=custom_css, fill_height=True) as demo:
-    gr.Markdown("<div class='header'>Talk2Task — Multilingual Task Extraction (UBS Challenge)</div>")
-    gr.Markdown("<div class='subtle'>Single-pass multilingual extraction (DE/FR/IT/EN) with compact prompts. Optional rule fallback ensures recall. Batch evaluation & scoring included.</div>")
-    with gr.Tab("Single transcript"):
         with gr.Row():
-            with gr.Column(scale=3):
-                gr.Markdown("<div class='card'><div class='header'>Transcript</div>")
-                file = gr.File(
-                    label="Drag & drop transcript (.txt / .md / .json)",
-                    file_types=[".txt", ".md", ".json"],
-                    type="filepath",
                 )
-                text = gr.Textbox(label="Or paste transcript", lines=10, placeholder="Paste transcript in DE/FR/IT/EN…")
-                gr.Markdown("<hr class='sep'/>")
-                gr.Markdown("<div class='header'>Ground truth JSON (optional)</div>")
-                gt_file = gr.File(
-                    label="Upload ground truth JSON (expects {'labels': [...]})",
-                    file_types=[".json"],
-                    type="filepath",
                 )
-                gt_text = gr.Textbox(label="Or paste ground truth JSON", lines=6, placeholder='{\"labels\": [\"schedule_meeting\"]}')
-                gr.Markdown("</div>")  # close card
-                gr.Markdown("<div class='card'><div class='header'>Processing options</div>")
-                use_cleaning = gr.Checkbox(label="Apply default cleaning (remove disclaimers, timestamps, speakers, footers)", value=True)
-                use_fallback = gr.Checkbox(label="Enable multilingual fallback rule layer", value=True)
-                gr.Markdown("</div>")
-                gr.Markdown("<div class='card'><div class='header'>Allowed labels</div>")
-                labels_text = gr.Textbox(label="Allowed Labels (one per line)", value=OFFICIAL_LABELS_TEXT, lines=8)
-                reset_btn = gr.Button("Reset to official labels")
-                gr.Markdown("</div>")
-                gr.Markdown("<div class='card'><div class='header'>Editable instructions & context</div>")
-                sys_instr_tb = gr.Textbox(label="System Instructions (editable)", value=DEFAULT_SYSTEM_INSTRUCTIONS, lines=5)
-                glossary_tb = gr.Code(label="Label Glossary (JSON; editable)", value=json.dumps(DEFAULT_LABEL_GLOSSARY, indent=2), language="json")
-                fallback_tb = gr.Code(label="Fallback Cues (Multilingual, JSON; editable)", value=json.dumps(DEFAULT_FALLBACK_CUES, indent=2), language="json")
-                gr.Markdown("</div>")
-            with gr.Column(scale=2):
-                gr.Markdown("<div class='card'><div class='header'>Model & run</div>")
-                repo = gr.Dropdown(label="Model", choices=MODEL_CHOICES, value=MODEL_CHOICES[0])
-                use_4bit = gr.Checkbox(label="Use 4-bit (GPU only)", value=True)
-                use_sdpa = gr.Checkbox(label="Use SDPA attention (faster on many GPUs)", value=True)
-                max_tokens = gr.Slider(label="Max input tokens", minimum=1024, maximum=8192, step=512, value=2048)
-                hf_token = gr.Textbox(label="HF_TOKEN (only for gated models)", type="password", value=os.environ.get("HF_TOKEN",""))
-                warm_btn = gr.Button("Warm up model (load & compile kernels)")
-                run_btn = gr.Button("Run Extraction", variant="primary")
-                gr.Markdown("</div>")
-                gr.Markdown("<div class='card'><div class='header'>Outputs</div>")
-                summary = gr.Textbox(label="Summary", lines=12)
-                json_out = gr.Code(label="Strict JSON Output", language="json")
-                diag = gr.Textbox(label="Diagnostics", lines=10)
-                raw = gr.Textbox(label="Raw Model Output", lines=8)
-                prompt_preview = gr.Code(label="Prompt preview (user prompt sent)", language="markdown")
-                token_info = gr.Textbox(label="Token counts (transcript / prompt / load path)", lines=2)
-                gr.Markdown("</div>")
-        with gr.Row():
-            with gr.Column():
-                with gr.Accordion("Instructions used (system prompt)", open=False):
-                    instr_md = gr.Markdown("```\n" + DEFAULT_SYSTEM_INSTRUCTIONS + "\n```")
-            with gr.Column():
-                with gr.Accordion("Context used (glossary)", open=True):
-                    context_md = gr.Markdown("")
-        # Reset labels to official
-        def _reset_labels():
-            return OFFICIAL_LABELS_TEXT
-        reset_btn.click(fn=_reset_labels, inputs=None, outputs=labels_text)
-        # Warm-up
-        warm_btn.click(fn=warmup_model, inputs=[repo, use_4bit, use_sdpa, hf_token], outputs=diag)
-        # For initial context preview
-        def _pack_context_md(glossary_json, allowed_text):
-            try:
-                glossary = json.loads(glossary_json) if glossary_json else DEFAULT_LABEL_GLOSSARY
-            except Exception:
-                glossary = DEFAULT_LABEL_GLOSSARY
-            allowed_list = [ln.strip() for ln in (allowed_text or OFFICIAL_LABELS_TEXT).splitlines() if ln.strip()]
-            return "### Label Glossary (used)\n" + "\n".join(f"- {k}: {glossary.get(k,'')}" for k in allowed_list)
-        context_md.value = _pack_context_md(json.dumps(DEFAULT_LABEL_GLOSSARY), OFFICIAL_LABELS_TEXT)
-        # Single run
         run_btn.click(
-            fn=run_single,
-            inputs=[
-                text, file, gt_text, gt_file, use_cleaning, use_fallback,
-                labels_text, sys_instr_tb, glossary_tb, fallback_tb,
-                repo, use_4bit, use_sdpa, max_tokens, hf_token
-            ],
-            outputs=[summary, json_out, diag, raw, context_md, instr_md, gr.Textbox(visible=False), prompt_preview, token_info],
         )
-    with gr.Tab("Batch evaluation"):
-        with gr.Row():
-            with gr.Column(scale=3):
-                gr.Markdown("<div class='card'><div class='header'>ZIP input</div>")
-                zip_in = gr.File(label="ZIP with transcripts (.txt) and truths (.json)", file_types=[".zip"], type="filepath")
-                use_cleaning_b = gr.Checkbox(label="Apply default cleaning", value=True)
-                use_fallback_b = gr.Checkbox(label="Enable multilingual fallback rule layer", value=True)
-                gr.Markdown("</div>")
-            with gr.Column(scale=2):
-                gr.Markdown("<div class='card'><div class='header'>Model & run</div>")
-                repo_b = gr.Dropdown(label="Model", choices=MODEL_CHOICES, value=MODEL_CHOICES[0])
-                use_4bit_b = gr.Checkbox(label="Use 4-bit (GPU only)", value=True)
-                use_sdpa_b = gr.Checkbox(label="Use SDPA attention (faster on many GPUs)", value=True)
-                max_tokens_b = gr.Slider(label="Max input tokens", minimum=1024, maximum=8192, step=512, value=2048)
-                hf_token_b = gr.Textbox(label="HF_TOKEN (only for gated models)", type="password", value=os.environ.get("HF_TOKEN",""))
-                sys_instr_tb_b = gr.Textbox(label="System Instructions (editable for batch)", value=DEFAULT_SYSTEM_INSTRUCTIONS, lines=4)
-                glossary_tb_b = gr.Code(label="Label Glossary (JSON; editable for batch)", value=json.dumps(DEFAULT_LABEL_GLOSSARY, indent=2), language="json")
-                fallback_tb_b = gr.Code(label="Fallback Cues (Multilingual, JSON; editable for batch)", value=json.dumps(DEFAULT_FALLBACK_CUES, indent=2), language="json")
-                limit_files = gr.Slider(label="Process at most N files (0 = all)", minimum=0, maximum=2000, step=10, value=0)
-                run_batch_btn = gr.Button("Run Batch", variant="primary")
-                gr.Markdown("</div>")
-        with gr.Row():
-            gr.Markdown("<div class='card'><div class='header'>Batch outputs</div>")
-            status = gr.Textbox(label="Status", lines=1)
-            diag_b = gr.Textbox(label="Batch diagnostics & metrics", lines=12)
-            df_out = gr.Dataframe(label="Per-file results (TP/FP/FN, latency)", interactive=False)
-            csv_out = gr.File(label="Download CSV", interactive=False)
-            gr.Markdown("</div>")
-        run_batch_btn.click(
-            fn=run_batch,
-            inputs=[
-                zip_in, use_cleaning_b, use_fallback_b,
-                sys_instr_tb_b, glossary_tb_b, fallback_tb_b,
-                repo_b, use_4bit_b, use_sdpa_b, max_tokens_b, hf_token_b, limit_files
-            ],
-            outputs=[status, diag_b, df_out, csv_out],
-        )
 if __name__ == "__main__":
     demo.launch()

 import os
 import json
 import gradio as gr
 import torch
+from typing import Optional, Tuple, Dict, Any
+from transformers import AutoTokenizer, AutoModelForCausalLM
 # =========================
+# Runtime / Model Defaults
 # =========================
+# Small, ungated default to avoid permission/download issues.
+# You can switch at runtime via the dropdown or set MODEL_ID env var.
+DEFAULT_MODEL_ID = os.environ.get("MODEL_ID", "TinyLlama/TinyLlama-1.1B-Chat-v1.0")
+def _has_bnb_and_cuda() -> bool:
+    if not torch.cuda.is_available():
+        return False
     try:
+        import bitsandbytes as _bnb  # noqa: F401
+        return True
     except Exception:
+        return False
+USE_BNB = _has_bnb_and_cuda()
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 # =========================
+# Model Load (safe + flexible)
 # =========================
+_tokenizer: Optional[AutoTokenizer] = None
+_model: Optional[AutoModelForCausalLM] = None
+_current_model_id: Optional[str] = None
+def load_model(model_id: str) -> Tuple[AutoTokenizer, AutoModelForCausalLM]:
+    """
+    Loads (or reuses) a model/tokenizer. Uses bitsandbytes 4-bit only if
+    CUDA is available AND bnb is installed. Otherwise plain CPU/GPU.
+    """
+    global _tokenizer, _model, _current_model_id
+    if _tokenizer is not None and _model is not None and _current_model_id == model_id:
+        return _tokenizer, _model
+    tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True, trust_remote_code=True)
+    if USE_BNB:
+        from transformers import BitsAndBytesConfig
+        quant = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16)
+        model = AutoModelForCausalLM.from_pretrained(
+            model_id,
+            quantization_config=quant,
+            device_map="auto",
+            trust_remote_code=True,
+        )
+    else:
+        dtype = torch.float16 if DEVICE == "cuda" else torch.float32
+        model = AutoModelForCausalLM.from_pretrained(
+            model_id,
+            torch_dtype=dtype,
+            low_cpu_mem_usage=True,
+            trust_remote_code=True,
+        ).to(DEVICE)
+    _tokenizer, _model, _current_model_id = tokenizer, model, model_id
+    return tokenizer, model
+# ======================================
+# Helpers: Ingest TXT/JSON from Tabs box
+# ======================================
+def read_file(file_obj: Optional[gr.File]) -> Optional[str]:
+    if not file_obj:
         return None
     try:
+        with open(file_obj.name, "r", encoding="utf-8", errors="ignore") as f:
+            return f.read()
     except Exception:
         return None
+def normalize_txt_input(paste_txt: str, upload_file: Optional[gr.File]) -> str:
+    file_text = read_file(upload_file)
+    if paste_txt and paste_txt.strip():
+        return paste_txt
+    return file_text or ""
+def normalize_json_input(paste_json: str, upload_file: Optional[gr.File]) -> str:
+    file_text = read_file(upload_file)
+    candidate = paste_json.strip() if paste_json else ""
+    if not candidate and file_text:
+        candidate = file_text
+    return candidate
 # =========================
+# Core Extraction (placeholder)
 # =========================
+def run_extraction(
+    model_choice: str,
+    params_checked: list,
+    instructions_text: str,
+    context_text: str,
+    txt_paste: str,
+    txt_upload: Optional[gr.File],
+    json_paste: str,
+    json_upload: Optional[gr.File],
+    max_new_tokens: int,
+    temperature: float,
+    top_p: float,
+) -> Tuple[str, str, str, str, str]:
+    """
+    Wire your real extraction here.
+    Returns:
+      tasks_out, entities_out, cleaned_out, summary_out, diagnostics
+    """
+    diagnostics_lines = []
+    # Resolve inputs from single-box Tab controls
+    input_txt = normalize_txt_input(txt_paste, txt_upload)
+    input_json_raw = normalize_json_input(json_paste, json_upload)
+    diagnostics_lines.append(f"Model: {model_choice}")
+    diagnostics_lines.append(f"Params: {params_checked}")
+    diagnostics_lines.append(f"Instructions length: {len(instructions_text)} chars")
+    diagnostics_lines.append(f"Context length: {len(context_text)} chars")
+    diagnostics_lines.append(f"TXT length: {len(input_txt)} chars")
+    # Try parse JSON (optional)
+    parsed_json: Dict[str, Any] = {}
+    if input_json_raw:
+        try:
+            parsed_json = json.loads(input_json_raw)
+            diagnostics_lines.append("JSON: parsed successfully")
+        except Exception as e:
+            diagnostics_lines.append(f"JSON parse error: {e}")
+    # Load selected model (safe)
     try:
+        tokenizer, model = load_model(model_choice)
     except Exception as e:
+        # If model fails to load, still return diagnostics
+        diag = "\n".join(diagnostics_lines + [f"Model load failed: {e}"])
+        return "", "", "", "", diag
+    # ---------- Dummy generation (replace with your real prompts) ----------
+    # Build a prompt from inputs (very basic)
+    user_prompt = (
+        "You are an assistant that extracts tasks and entities.\n"
+        f"Instructions: {instructions_text}\n"
+        f"Context: {context_text}\n"
+        "----\n"
+        f"TEXT:\n{input_txt[:4000]}\n"
+        "----\n"
+        f"JSON:\n{json.dumps(parsed_json)[:2000]}\n"
+        "Extract:\n- Tasks list\n- Entities list\n- Cleaned text (sanitized)\n- 1-2 line summary\n"
     )
     try:
+        inputs = tokenizer(user_prompt, return_tensors="pt").to(DEVICE)
+        with torch.no_grad():
+            outputs = _model.generate(
+                **inputs,
+                max_new_tokens=max_new_tokens,
+                do_sample=True,
+                temperature=temperature,
+                top_p=top_p,
+                pad_token_id=tokenizer.eos_token_id,
+            )
+        full_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
     except Exception as e:
+        diag = "\n".join(diagnostics_lines + [f"Inference failed: {e}"])
+        return "", "", "", "", diag
+    # Very naive post-split (replace with your own structured parsing)
+    tasks_out = "• Task 1\n• Task 2\n(Replace with your parser)"
+    entities_out = "• Entity A\n• Entity B\n(Replace with your parser)"
+    cleaned_out = "Cleaned text here… (Replace with your cleaning pipeline)"
+    summary_out = "Short summary here… (Replace with your summarizer)"
+    diagnostics_lines.append("Generation completed successfully.")
+    diagnostics = "\n".join(diagnostics_lines)
+    return tasks_out, entities_out, cleaned_out, summary_out, diagnostics
 # =========================
+# UI (Gradio Blocks)
 # =========================
+THEME_CSS = """
+/* Global colors: white background, black text */
+:root {
+  --body-background-fill: #ffffff !important;
+  --body-text-color: #111111 !important;
+  --link-text-color: #0b63ce !important;  /* blue */
+  --shadow-spread: 0px;
+}
+/* Ensure all text is readable (black-ish) */
+.gradio-container, .prose, .prose * {
+  color: #111111 !important;
+}
+/* Accent elements in blue (no purple) */
+label, .tabitem .label-wrap, .wrap .label-wrap {
+  color: #0b63ce !important;
+}
+/* Cards / Boxes */
+.gr-box, .gr-panel, .gr-group, .gr-accordion {
+  border: 1px solid #e5e7eb !important; /* light gray border */
+  border-radius: 14px !important;
+}
+/* Red run button */
+button#run-btn {
+  background: #e11900 !important;
+  color: #ffffff !important;
+  border: 1px solid #b50f00 !important;
+}
+button#run-btn:hover {
+  filter: brightness(0.95);
+}
+/* Inputs layout polish */
+.input-card {
+  padding: 10px;
+}
 """
+def build_interface() -> gr.Blocks:
+    with gr.Blocks(title="Talk2Task Demo", css=THEME_CSS) as demo:
+        # 1) MODEL SELECTION (full width) + checklist embedded
+        with gr.Group():
+            gr.Markdown("### Model & Parameters", elem_id="model-header")
+            with gr.Row(equal_height=True):
+                model_choice = gr.Dropdown(
+                    label="Model",
+                    choices=[
+                        DEFAULT_MODEL_ID,
+                        "mistralai/Mistral-7B-Instruct-v0.2",
+                        "meta-llama/Llama-3.1-8B-Instruct",  # if accessible
+                    ],
+                    value=DEFAULT_MODEL_ID,
+                    scale=3
+                )
+                params_checked = gr.CheckboxGroup(
+                    label="Options",
+                    choices=[
+                        "Default cleaning",
+                        "Remove PII",
+                        "Allow 4-bit (if available)",
+                        "Detect language",
+                    ],
+                    value=["Default cleaning"],
+                    scale=2
+                )
+            with gr.Row():
+                # generation controls (kept compact)
+                temperature = gr.Slider(0.0, 1.5, value=0.7, step=0.05, label="Temperature")
+                top_p = gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p")
+                max_new_tokens = gr.Slider(32, 1024, value=200, step=8, label="Max new tokens")
+        # 2) SINGLE “BOX” PER TYPE — via Tabs (Paste OR Drag & Drop) — side-by-side
+        gr.Markdown("### Input", elem_id="input-header")
+        with gr.Row(equal_height=True):
+            with gr.Group(elem_classes=["input-card"]):
+                gr.Markdown("**TXT Input** (Paste or Drag & Drop)", elem_id="txt-box-title")
+                with gr.Tabs():
+                    with gr.TabItem("Paste"):
+                        txt_paste = gr.TextArea(
+                            label="Paste TXT",
+                            placeholder="Paste raw transcript or text here...",
+                            lines=12,
+                        )
+                    with gr.TabItem("Drag & Drop"):
+                        txt_upload = gr.File(
+                            label="Upload .txt file",
+                            file_types=[".txt"],
+                        )
+            with gr.Group(elem_classes=["input-card"]):
+                gr.Markdown("**JSON Input** (Paste or Drag & Drop)", elem_id="json-box-title")
+                with gr.Tabs():
+                    with gr.TabItem("Paste"):
+                        json_paste = gr.Code(
+                            label="Paste JSON",
+                            language="json",
+                            value="{\n  \"example\": true\n}",
+                            lines=12,
+                        )
+                    with gr.TabItem("Drag & Drop"):
+                        json_upload = gr.File(
+                            label="Upload .json file",
+                            file_types=[".json"],
+                        )
+        # 3) RUN BUTTON (red), then collapsible Instructions & Context
+        run_btn = gr.Button("Run Extraction", elem_id="run-btn", variant="primary")
         with gr.Row():
+            with gr.Accordion("Instructions (editable)", open=False):
+                instructions_text = gr.TextArea(
+                    label="Instructions",
+                    value=(
+                        "Extract tasks, entities, and a short summary. "
+                        "Apply default cleaning unless unchecked."
+                    ),
+                    lines=5,
                 )
+            with gr.Accordion("Context (editable)", open=False):
+                context_text = gr.TextArea(
+                    label="Context",
+                    value=(
+                        "Use banking/consulting context if relevant. "
+                        "Prefer concise actionable phrasing."
+                    ),
+                    lines=5,
                 )
+        # 4) OUTPUT LAYOUT — symmetrical boxes
+        gr.Markdown("### Results", elem_id="results-header")
+        with gr.Row(equal_height=True):
+            tasks_out = gr.TextArea(label="Tasks", lines=10)
+            entities_out = gr.TextArea(label="Entities", lines=10)
+        with gr.Row(equal_height=True):
+            cleaned_out = gr.TextArea(label="Cleaned Text", lines=10)
+            summary_out = gr.TextArea(label="Summary", lines=10)
+        gr.Markdown("### Diagnostics", elem_id="diagnostics-header")
+        diagnostics = gr.TextArea(label="Diagnostics / Logs", lines=10)
+        # Wire up button
+        run_inputs = [
+            model_choice, params_checked, instructions_text, context_text,
+            txt_paste, txt_upload, json_paste, json_upload,
+            max_new_tokens, temperature, top_p
+        ]
+        run_outputs = [tasks_out, entities_out, cleaned_out, summary_out, diagnostics]
         run_btn.click(
+            fn=run_extraction,
+            inputs=run_inputs,
+            outputs=run_outputs
         )
+    return demo
+demo = build_interface()
 if __name__ == "__main__":
+    # Let Gradio/Spaces choose host & port; this keeps local runs easy too.
     demo.launch()