Spaces:

RishiRP
/

Talk2TaskDemo1

Sleeping

App Files Files Community

RishiRP commited on Sep 25, 2025

Commit

5a71496

verified ·

1 Parent(s): ad57ae1

Update app.py

Browse files

Files changed (1) hide show

app.py +80 -37

app.py CHANGED Viewed

@@ -28,10 +28,10 @@ SPACE_CACHE.mkdir(parents=True, exist_ok=True)
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 GEN_CONFIG = GenerationConfig(
-    temperature=0.1,
     top_p=1.0,
     do_sample=False,
-    max_new_tokens=128,
 )
 # Official UBS label set (strict)
@@ -48,7 +48,7 @@ OFFICIAL_LABELS = [
 OFFICIAL_LABELS_TEXT = "\n".join(OFFICIAL_LABELS)
 # Per-label keyword cues (static prompt context to improve recall)
-LABEL_KEYWORDS = {
     "plan_contact": [
         "call back", "get back to you", "i'll get back", "follow up",
         "reach out", "contact later", "check in", "touch base", "remind",
@@ -87,10 +87,9 @@ LABEL_KEYWORDS = {
         "portfolio size", "how much you own", "aggregate assets"
     ],
 }
-# =========================
-# Regex cues
-# =========================
-REGEX_CUES = {
     "schedule_meeting": [
         r"\b(let'?s\s+)?meet(s|ing)?\b",
         r"\bbook( a)? (time|slot|meeting)\b",
@@ -99,17 +98,16 @@ REGEX_CUES = {
         r"\bfind a time\b",
     ],
     "plan_contact": [
-        r"\b(get|got|i'?ll)\s+back to you\b",
         r"\bfollow\s*up\b",
         r"\breach out\b",
         r"\btouch base\b",
         r"\bping you\b",
     ],
-    # Add more regexes for other labels if useful
 }
 # =========================
-# Instructions (string-safe; concatenated)
 # =========================
 SYSTEM_PROMPT = (
     "You are a precise banking assistant that extracts ACTIONABLE TASKS from "
@@ -308,7 +306,7 @@ class ModelWrapper:
     @torch.inference_mode()
     def generate(self, system_prompt: str, user_prompt: str) -> str:
-        # Build inputs as input_ids=... (avoid **tensor bug from earlier)
         if hasattr(self.tokenizer, "apply_chat_template"):
             messages = [
                 {"role": "system", "content": system_prompt},
@@ -397,19 +395,41 @@ def evaluate_predictions(y_true: List[List[str]], y_pred: List[List[str]]) -> fl
     return float(max(0.0, min(1.0, np.mean(per_sample))))
 # =========================
-# Fallback: keyword heuristics if model returns empty
 # =========================
 def keyword_fallback(text: str, allowed: List[str]) -> Dict[str, Any]:
     low = text.lower()
     labels = []
     tasks = []
     for lab in allowed:
         hits = []
         for kw in LABEL_KEYWORDS.get(lab, []):
             k = kw.lower()
-            if k in low:
-                # capture small evidence window
-                i = low.find(k)
                 start = max(0, i - 40); end = min(len(text), i + len(k) + 40)
                 hits.append(text[start:end].strip())
         if hits:
@@ -419,6 +439,7 @@ def keyword_fallback(text: str, allowed: List[str]) -> Dict[str, Any]:
                 "explanation": "Keyword match in transcript.",
                 "evidence": hits[0]
             })
     return {"labels": normalize_labels(labels), "tasks": tasks}
 # =========================
@@ -431,6 +452,15 @@ def build_keyword_context(allowed: List[str]) -> str:
         parts.append(f"- {lab}: " + (", ".join(kws) if kws else "(no default cues)"))
     return "\n".join(parts)
 def run_single(
     transcript_text: str,
     transcript_file,            # filepath or file-like
@@ -443,7 +473,7 @@ def run_single(
     use_4bit: bool,
     max_input_tokens: int,
     hf_token: str,
-) -> Tuple[str, str, str, str, str, str, str]:
     t0 = _now_ms()
@@ -453,7 +483,7 @@ def run_single(
         raw_text = read_text_file_any(transcript_file)
     raw_text = (raw_text or transcript_text or "").strip()
     if not raw_text:
-        return "", "", "No transcript provided.", "", "", "", ""
     text = clean_transcript(raw_text) if use_cleaning else raw_text
@@ -465,7 +495,7 @@ def run_single(
     try:
         model = get_model(model_repo, (hf_token or "").strip() or None, use_4bit)
     except Exception as e:
-        return "", "", f"Model load failed: {e}", "", "", "", ""
     # Truncate
     trunc = truncate_tokens(model.tokenizer, text, max_input_tokens)
@@ -479,12 +509,18 @@ def run_single(
         keyword_context=keyword_ctx,
     )
     # Generate
     t1 = _now_ms()
     try:
         out = model.generate(SYSTEM_PROMPT, user_prompt)
     except Exception as e:
-        return "", "", f"Generation error: {e}", "", "", "", ""
     t2 = _now_ms()
     parsed = robust_json_extract(out)
@@ -531,7 +567,6 @@ def run_single(
     # Optional single-file scoring if GT provided
     metrics = ""
-    true_labels = None
     if gt_json_file or (gt_json_text and gt_json_text.strip()):
         truth_obj = None
         if gt_json_file:
@@ -563,7 +598,7 @@ def run_single(
         else:
             metrics = "Ground truth JSON missing or invalid; expected {'labels': [...]}."
-    return summary, json_out, diag, out.strip(), context_preview, instructions_preview, metrics
 # =========================
 # Batch mode (ZIP with transcripts + truths)
@@ -725,7 +760,6 @@ custom_css = """
 .header { font-weight: 700; font-size: 22px; margin-bottom: 4px; }
 .subtle { color: rgba(255,255,255,.65); font-size: 14px; margin-bottom: 12px; }
 hr.sep { border: none; border-top: 1px solid rgba(255,255,255,.08); margin: 10px 0 16px; }
-.accordion-title { font-weight: 600; }
 .gr-button { border-radius: 12px !important; }
 """
@@ -736,7 +770,7 @@ with gr.Blocks(theme=gr.themes.Soft(), css=custom_css, fill_height=True) as demo
     with gr.Tab("Single transcript"):
         with gr.Row():
             with gr.Column(scale=3):
-                gr.Markdown("<div class='card'><div class='header'>Transcript</div>", elem_id="card1")
                 file = gr.File(
                     label="Drag & drop transcript (.txt / .md / .json)",
                     file_types=[".txt", ".md", ".json"],
@@ -745,7 +779,7 @@ with gr.Blocks(theme=gr.themes.Soft(), css=custom_css, fill_height=True) as demo
                 text = gr.Textbox(label="Or paste transcript", lines=10)
                 gr.Markdown("<hr class='sep'/>")
-                gr.Markdown("<div class='header'>Ground truth JSON (optional)</div>", elem_id="card1b")
                 gt_file = gr.File(
                     label="Upload ground truth JSON (expects {'labels': [...]})",
                     file_types=[".json"],
@@ -754,7 +788,7 @@ with gr.Blocks(theme=gr.themes.Soft(), css=custom_css, fill_height=True) as demo
                 gt_text = gr.Textbox(label="Or paste ground truth JSON", lines=6, placeholder='{\"labels\": [\"schedule_meeting\"]}')
                 gr.Markdown("</div>")  # close card
-                gr.Markdown("<div class='card'><div class='header'>Preprocessing & heuristics</div>", elem_id="card2")
                 use_cleaning = gr.Checkbox(
                     label="Apply default cleaning (remove disclaimers, timestamps, speakers, footers)",
                     value=True,
@@ -765,7 +799,7 @@ with gr.Blocks(theme=gr.themes.Soft(), css=custom_css, fill_height=True) as demo
                 )
                 gr.Markdown("</div>")
-                gr.Markdown("<div class='card'><div class='header'>Allowed labels</div>", elem_id="card3")
                 labels_text = gr.Textbox(
                     label="Allowed Labels (one per line)",
                     value=OFFICIAL_LABELS_TEXT,  # prefilled
@@ -775,25 +809,28 @@ with gr.Blocks(theme=gr.themes.Soft(), css=custom_css, fill_height=True) as demo
                 gr.Markdown("</div>")
             with gr.Column(scale=2):
-                gr.Markdown("<div class='card'><div class='header'>Model & run</div>", elem_id="card4")
                 repo = gr.Dropdown(label="Model", choices=MODEL_CHOICES, value=MODEL_CHOICES[0])
                 use_4bit = gr.Checkbox(label="Use 4-bit (GPU only)", value=True)
-                max_tokens = gr.Slider(label="Max input tokens", minimum=1024, maximum=8192, step=512, value=4096)
                 hf_token = gr.Textbox(label="HF_TOKEN (only for gated models)", type="password", value=os.environ.get("HF_TOKEN",""))
                 run_btn = gr.Button("Run Extraction", variant="primary")
                 gr.Markdown("</div>")
-                gr.Markdown("<div class='card'><div class='header'>Outputs</div>", elem_id="card5")
                 summary = gr.Textbox(label="Summary", lines=12)
                 json_out = gr.Code(label="Strict JSON Output", language="json")
                 diag = gr.Textbox(label="Diagnostics", lines=8)
                 raw = gr.Textbox(label="Raw Model Output", lines=8)
                 gr.Markdown("</div>")
         with gr.Row():
             with gr.Column():
                 with gr.Accordion("Instructions used (system prompt)", open=False):
-                    instr_md = gr.Markdown("")
             with gr.Column():
                 with gr.Accordion("Context used (allowed labels + keyword cues)", open=True):
                     context_md = gr.Markdown("")
@@ -803,6 +840,13 @@ with gr.Blocks(theme=gr.themes.Soft(), css=custom_css, fill_height=True) as demo
             return OFFICIAL_LABELS_TEXT
         reset_btn.click(fn=_reset_labels, inputs=None, outputs=labels_text)
         # single run
         def _pack_context_md(allowed: str) -> str:
             allowed_list = [ln.strip() for ln in (allowed or OFFICIAL_LABELS_TEXT).splitlines() if ln.strip()]
@@ -815,33 +859,32 @@ with gr.Blocks(theme=gr.themes.Soft(), css=custom_css, fill_height=True) as demo
                 text, file, gt_text, gt_file, use_cleaning, use_keyword_fallback,
                 labels_text, repo, use_4bit, max_tokens, hf_token
             ],
-            outputs=[summary, json_out, diag, raw, context_md, instr_md, gr.Textbox(visible=False)],
         )
-        # also keep instructions visible at initial load
-        instr_md.value = "```\n" + SYSTEM_PROMPT + "\n```"
         context_md.value = _pack_context_md(OFFICIAL_LABELS_TEXT)
     with gr.Tab("Batch evaluation"):
         with gr.Row():
             with gr.Column(scale=3):
-                gr.Markdown("<div class='card'><div class='header'>ZIP input</div>", elem_id="card6")
                 zip_in = gr.File(label="ZIP with transcripts (.txt) and truths (.json)", file_types=[".zip"], type="filepath")
                 use_cleaning_b = gr.Checkbox(label="Apply default cleaning", value=True)
                 use_keyword_fallback_b = gr.Checkbox(label="Keyword fallback if model returns empty", value=True)
                 gr.Markdown("</div>")
             with gr.Column(scale=2):
-                gr.Markdown("<div class='card'><div class='header'>Model & run</div>", elem_id="card7")
                 repo_b = gr.Dropdown(label="Model", choices=MODEL_CHOICES, value=MODEL_CHOICES[0])
                 use_4bit_b = gr.Checkbox(label="Use 4-bit (GPU only)", value=True)
-                max_tokens_b = gr.Slider(label="Max input tokens", minimum=1024, maximum=8192, step=512, value=4096)
                 hf_token_b = gr.Textbox(label="HF_TOKEN (only for gated models)", type="password", value=os.environ.get("HF_TOKEN",""))
                 limit_files = gr.Slider(label="Process at most N files (0 = all)", minimum=0, maximum=2000, step=10, value=0)
                 run_batch_btn = gr.Button("Run Batch", variant="primary")
                 gr.Markdown("</div>")
         with gr.Row():
-            gr.Markdown("<div class='card'><div class='header'>Batch outputs</div>", elem_id="card8")
             status = gr.Textbox(label="Status", lines=1)
             diag_b = gr.Textbox(label="Batch diagnostics & metrics", lines=12)
             df_out = gr.Dataframe(label="Per-file results (TP/FP/FN, latency)", interactive=False)

 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 GEN_CONFIG = GenerationConfig(
+    temperature=0.0,
     top_p=1.0,
     do_sample=False,
+    max_new_tokens=96,  # small for speed; adjust if needed
 )
 # Official UBS label set (strict)
 OFFICIAL_LABELS_TEXT = "\n".join(OFFICIAL_LABELS)
 # Per-label keyword cues (static prompt context to improve recall)
+LABEL_KEYWORDS: Dict[str, List[str]] = {
     "plan_contact": [
         "call back", "get back to you", "i'll get back", "follow up",
         "reach out", "contact later", "check in", "touch base", "remind",
         "portfolio size", "how much you own", "aggregate assets"
     ],
 }
+# Regex cues to catch phrasing variants
+REGEX_CUES: Dict[str, List[str]] = {
     "schedule_meeting": [
         r"\b(let'?s\s+)?meet(s|ing)?\b",
         r"\bbook( a)? (time|slot|meeting)\b",
         r"\bfind a time\b",
     ],
     "plan_contact": [
+        r"\b(i'?ll|get|got)\s+back to you\b",
         r"\bfollow\s*up\b",
         r"\breach out\b",
         r"\btouch base\b",
         r"\bping you\b",
     ],
 }
 # =========================
+# Instructions (concise; concatenated to avoid string issues)
 # =========================
 SYSTEM_PROMPT = (
     "You are a precise banking assistant that extracts ACTIONABLE TASKS from "
     @torch.inference_mode()
     def generate(self, system_prompt: str, user_prompt: str) -> str:
+        # Build inputs as input_ids=... (avoid **tensor bug)
         if hasattr(self.tokenizer, "apply_chat_template"):
             messages = [
                 {"role": "system", "content": system_prompt},
     return float(max(0.0, min(1.0, np.mean(per_sample))))
 # =========================
+# Fallback: regex + keywords if model returns empty
 # =========================
 def keyword_fallback(text: str, allowed: List[str]) -> Dict[str, Any]:
     low = text.lower()
     labels = []
     tasks = []
+    # Regex first
+    for lab in allowed:
+        patterns = REGEX_CUES.get(lab, [])
+        found = None
+        for pat in patterns:
+            m = re.search(pat, low)
+            if m:
+                i = m.start()
+                start = max(0, i - 40); end = min(len(text), i + len(m.group(0)) + 40)
+                found = text[start:end].strip()
+                break
+        if found:
+            labels.append(lab)
+            tasks.append({
+                "label": lab,
+                "explanation": "Regex cue matched in transcript.",
+                "evidence": found
+            })
+    # Keyword contains() as backstop
     for lab in allowed:
+        if lab in labels:
+            continue
         hits = []
         for kw in LABEL_KEYWORDS.get(lab, []):
             k = kw.lower()
+            i = low.find(k)
+            if i != -1:
                 start = max(0, i - 40); end = min(len(text), i + len(k) + 40)
                 hits.append(text[start:end].strip())
         if hits:
                 "explanation": "Keyword match in transcript.",
                 "evidence": hits[0]
             })
     return {"labels": normalize_labels(labels), "tasks": tasks}
 # =========================
         parts.append(f"- {lab}: " + (", ".join(kws) if kws else "(no default cues)"))
     return "\n".join(parts)
+def warmup_model(model_repo: str, use_4bit: bool, hf_token: str) -> str:
+    t0 = _now_ms()
+    try:
+        model = get_model(model_repo, (hf_token or "").strip() or None, use_4bit)
+        _ = model.generate("Return JSON only.", '{"labels": [], "tasks": []}')
+        return f"Warm-up complete in {_now_ms() - t0} ms."
+    except Exception as e:
+        return f"Warm-up failed: {e}"
 def run_single(
     transcript_text: str,
     transcript_file,            # filepath or file-like
     use_4bit: bool,
     max_input_tokens: int,
     hf_token: str,
+) -> Tuple[str, str, str, str, str, str, str, str, str]:
     t0 = _now_ms()
         raw_text = read_text_file_any(transcript_file)
     raw_text = (raw_text or transcript_text or "").strip()
     if not raw_text:
+        return "", "", "No transcript provided.", "", "", "", "", "", ""
     text = clean_transcript(raw_text) if use_cleaning else raw_text
     try:
         model = get_model(model_repo, (hf_token or "").strip() or None, use_4bit)
     except Exception as e:
+        return "", "", f"Model load failed: {e}", "", "", "", "", "", ""
     # Truncate
     trunc = truncate_tokens(model.tokenizer, text, max_input_tokens)
         keyword_context=keyword_ctx,
     )
+    # Token info + prompt preview
+    transcript_tokens = len(model.tokenizer(trunc, add_special_tokens=False)["input_ids"])
+    prompt_tokens = len(model.tokenizer(user_prompt, add_special_tokens=False)["input_ids"])
+    token_info_text = f"Transcript tokens: {transcript_tokens} | Prompt tokens: {prompt_tokens}"
+    prompt_preview_text = "```\n" + user_prompt[:4000] + ("\n... (truncated)" if len(user_prompt) > 4000 else "") + "\n```"
     # Generate
     t1 = _now_ms()
     try:
         out = model.generate(SYSTEM_PROMPT, user_prompt)
     except Exception as e:
+        return "", "", f"Generation error: {e}", "", "", "", prompt_preview_text, token_info_text, ""
     t2 = _now_ms()
     parsed = robust_json_extract(out)
     # Optional single-file scoring if GT provided
     metrics = ""
     if gt_json_file or (gt_json_text and gt_json_text.strip()):
         truth_obj = None
         if gt_json_file:
         else:
             metrics = "Ground truth JSON missing or invalid; expected {'labels': [...]}."
+    return summary, json_out, diag, out.strip(), context_preview, instructions_preview, metrics, prompt_preview_text, token_info_text
 # =========================
 # Batch mode (ZIP with transcripts + truths)
 .header { font-weight: 700; font-size: 22px; margin-bottom: 4px; }
 .subtle { color: rgba(255,255,255,.65); font-size: 14px; margin-bottom: 12px; }
 hr.sep { border: none; border-top: 1px solid rgba(255,255,255,.08); margin: 10px 0 16px; }
 .gr-button { border-radius: 12px !important; }
 """
     with gr.Tab("Single transcript"):
         with gr.Row():
             with gr.Column(scale=3):
+                gr.Markdown("<div class='card'><div class='header'>Transcript</div>")
                 file = gr.File(
                     label="Drag & drop transcript (.txt / .md / .json)",
                     file_types=[".txt", ".md", ".json"],
                 text = gr.Textbox(label="Or paste transcript", lines=10)
                 gr.Markdown("<hr class='sep'/>")
+                gr.Markdown("<div class='header'>Ground truth JSON (optional)</div>")
                 gt_file = gr.File(
                     label="Upload ground truth JSON (expects {'labels': [...]})",
                     file_types=[".json"],
                 gt_text = gr.Textbox(label="Or paste ground truth JSON", lines=6, placeholder='{\"labels\": [\"schedule_meeting\"]}')
                 gr.Markdown("</div>")  # close card
+                gr.Markdown("<div class='card'><div class='header'>Preprocessing & heuristics</div>")
                 use_cleaning = gr.Checkbox(
                     label="Apply default cleaning (remove disclaimers, timestamps, speakers, footers)",
                     value=True,
                 )
                 gr.Markdown("</div>")
+                gr.Markdown("<div class='card'><div class='header'>Allowed labels</div>")
                 labels_text = gr.Textbox(
                     label="Allowed Labels (one per line)",
                     value=OFFICIAL_LABELS_TEXT,  # prefilled
                 gr.Markdown("</div>")
             with gr.Column(scale=2):
+                gr.Markdown("<div class='card'><div class='header'>Model & run</div>")
                 repo = gr.Dropdown(label="Model", choices=MODEL_CHOICES, value=MODEL_CHOICES[0])
                 use_4bit = gr.Checkbox(label="Use 4-bit (GPU only)", value=True)
+                max_tokens = gr.Slider(label="Max input tokens", minimum=1024, maximum=8192, step=512, value=2048)
                 hf_token = gr.Textbox(label="HF_TOKEN (only for gated models)", type="password", value=os.environ.get("HF_TOKEN",""))
+                warm_btn = gr.Button("Warm up model (load & compile kernels)")
                 run_btn = gr.Button("Run Extraction", variant="primary")
                 gr.Markdown("</div>")
+                gr.Markdown("<div class='card'><div class='header'>Outputs</div>")
                 summary = gr.Textbox(label="Summary", lines=12)
                 json_out = gr.Code(label="Strict JSON Output", language="json")
                 diag = gr.Textbox(label="Diagnostics", lines=8)
                 raw = gr.Textbox(label="Raw Model Output", lines=8)
+                prompt_preview = gr.Code(label="Prompt preview (user prompt sent)", language="markdown")
+                token_info = gr.Textbox(label="Token counts (transcript / prompt)", lines=2)
                 gr.Markdown("</div>")
         with gr.Row():
             with gr.Column():
                 with gr.Accordion("Instructions used (system prompt)", open=False):
+                    instr_md = gr.Markdown("```\n" + SYSTEM_PROMPT + "\n```")
             with gr.Column():
                 with gr.Accordion("Context used (allowed labels + keyword cues)", open=True):
                     context_md = gr.Markdown("")
             return OFFICIAL_LABELS_TEXT
         reset_btn.click(fn=_reset_labels, inputs=None, outputs=labels_text)
+        # warm-up
+        warm_btn.click(
+            fn=warmup_model,
+            inputs=[repo, use_4bit, hf_token],
+            outputs=diag,
+        )
         # single run
         def _pack_context_md(allowed: str) -> str:
             allowed_list = [ln.strip() for ln in (allowed or OFFICIAL_LABELS_TEXT).splitlines() if ln.strip()]
                 text, file, gt_text, gt_file, use_cleaning, use_keyword_fallback,
                 labels_text, repo, use_4bit, max_tokens, hf_token
             ],
+            outputs=[summary, json_out, diag, raw, context_md, instr_md, gr.Textbox(visible=False), prompt_preview, token_info],
         )
+        # initial context preview
         context_md.value = _pack_context_md(OFFICIAL_LABELS_TEXT)
     with gr.Tab("Batch evaluation"):
         with gr.Row():
             with gr.Column(scale=3):
+                gr.Markdown("<div class='card'><div class='header'>ZIP input</div>")
                 zip_in = gr.File(label="ZIP with transcripts (.txt) and truths (.json)", file_types=[".zip"], type="filepath")
                 use_cleaning_b = gr.Checkbox(label="Apply default cleaning", value=True)
                 use_keyword_fallback_b = gr.Checkbox(label="Keyword fallback if model returns empty", value=True)
                 gr.Markdown("</div>")
             with gr.Column(scale=2):
+                gr.Markdown("<div class='card'><div class='header'>Model & run</div>")
                 repo_b = gr.Dropdown(label="Model", choices=MODEL_CHOICES, value=MODEL_CHOICES[0])
                 use_4bit_b = gr.Checkbox(label="Use 4-bit (GPU only)", value=True)
+                max_tokens_b = gr.Slider(label="Max input tokens", minimum=1024, maximum=8192, step=512, value=2048)
                 hf_token_b = gr.Textbox(label="HF_TOKEN (only for gated models)", type="password", value=os.environ.get("HF_TOKEN",""))
                 limit_files = gr.Slider(label="Process at most N files (0 = all)", minimum=0, maximum=2000, step=10, value=0)
                 run_batch_btn = gr.Button("Run Batch", variant="primary")
                 gr.Markdown("</div>")
         with gr.Row():
+            gr.Markdown("<div class='card'><div class='header'>Batch outputs</div>")
             status = gr.Textbox(label="Status", lines=1)
             diag_b = gr.Textbox(label="Batch diagnostics & metrics", lines=12)
             df_out = gr.Dataframe(label="Per-file results (TP/FP/FN, latency)", interactive=False)