Spaces:

bhsinghgrid
/

devflow

Sleeping

App Files Files Community

bhsinghgrid commited on Mar 24

Commit

5fd6ec8

verified ·

1 Parent(s): 5953b4e

Upload folder using huggingface_hub

Browse files

Files changed (1) hide show

app.py +215 -6

app.py CHANGED Viewed

@@ -4,6 +4,8 @@ import os
 import subprocess
 import sys
 import shutil
 from datetime import datetime
 from pathlib import Path
@@ -12,13 +14,22 @@ import torch
 from huggingface_hub import hf_hub_download
 from config import CONFIG
-from inference import _resolve_device, load_model, run_inference, _decode_clean, _decode_with_cleanup
 from model.tokenizer import SanskritSourceTokenizer, SanskritTargetTokenizer
 RESULTS_DIR = "generated_results"
 DEFAULT_ANALYSIS_OUT = "analysis_outputs/T4"
 os.makedirs(RESULTS_DIR, exist_ok=True)
 HF_DEFAULT_MODEL_REPO = os.environ.get("HF_DEFAULT_MODEL_REPO", "bhsinghgrid/DevaFlow")
 HF_DEFAULT_MODEL_FILE = os.environ.get("HF_DEFAULT_MODEL_FILE", "best_model.pt")
@@ -387,18 +398,198 @@ def _live_input_summary(model_bundle, input_text: str) -> str:
     )
 def run_single_task(model_bundle, task, output_dir, input_text, task4_phase):
     if not model_bundle:
         raise gr.Error("Load a model first.")
     code, log, used_bundled = _run_analysis_cmd(task, model_bundle["ckpt_path"], output_dir, input_text, task4_phase)
     if code != 0:
         _bundle_task_outputs(model_bundle, output_dir)
-        log = f"{log}\n\n--- Live input summary ---\n{_live_input_summary(model_bundle, input_text)}"
         status = f"Task {task} fallback mode: bundled reports + live input analysis."
     else:
         if used_bundled:
             _bundle_task_outputs(model_bundle, output_dir)
-            status = f"Task {task} loaded from bundled analysis outputs."
         else:
             status = f"Task {task} completed (exit={code})."
     return status, log
@@ -485,6 +676,7 @@ CUSTOM_CSS = """
 with gr.Blocks(title="Sanskrit Diffusion Client Demo", css=CUSTOM_CSS) as demo:
     model_state = gr.State(None)
     gr.Markdown(
         """
@@ -532,7 +724,8 @@ with gr.Blocks(title="Sanskrit Diffusion Client Demo", css=CUSTOM_CSS) as demo:
                         value="analyze",
                         label="Task 4 Phase",
                     )
-                    run_all_btn = gr.Button("Run All 5 Tasks", variant="primary")
             with gr.Row():
                 task_choice = gr.Dropdown(
@@ -651,9 +844,25 @@ with gr.Blocks(title="Sanskrit Diffusion Client Demo", css=CUSTOM_CSS) as demo:
         outputs=[task_run_status, task_run_log],
     )
     run_all_btn.click(
-        fn=run_all_tasks,
         inputs=[model_state, analysis_output_dir, analysis_input, task4_phase],
-        outputs=[task_run_status, task_run_log],
     )
     refresh_outputs_btn.click(
         fn=refresh_task_outputs,

 import subprocess
 import sys
 import shutil
+import threading
+import uuid
 from datetime import datetime
 from pathlib import Path
 from huggingface_hub import hf_hub_download
 from config import CONFIG
+from inference import (
+    _resolve_device,
+    load_model,
+    run_inference,
+    _decode_clean,
+    _decode_with_cleanup,
+    _iast_to_deva,
+    _compute_cer,
+)
 from model.tokenizer import SanskritSourceTokenizer, SanskritTargetTokenizer
 RESULTS_DIR = "generated_results"
 DEFAULT_ANALYSIS_OUT = "analysis_outputs/T4"
 os.makedirs(RESULTS_DIR, exist_ok=True)
+_BG_JOBS = {}
 HF_DEFAULT_MODEL_REPO = os.environ.get("HF_DEFAULT_MODEL_REPO", "bhsinghgrid/DevaFlow")
 HF_DEFAULT_MODEL_FILE = os.environ.get("HF_DEFAULT_MODEL_FILE", "best_model.pt")
     )
+def _mini_tfidf_scores(text: str) -> dict:
+    tokens = [t for t in text.split() if t.strip()]
+    if not tokens:
+        return {}
+    corpus = [
+        "dharmo rakṣati rakṣitaḥ",
+        "satyameva jayate",
+        "ahiṃsā paramo dharmaḥ",
+        "vasudhaiva kuṭumbakam",
+        "yatra nāryastu pūjyante",
+        text,
+    ]
+    docs = [set([t for t in d.split() if t.strip()]) for d in corpus]
+    n = len(docs)
+    scores = {}
+    for tok in tokens:
+        df = sum(1 for d in docs if tok in d)
+        idf = (1.0 + (n + 1) / (1 + df))
+        scores[tok] = round(float(idf), 4)
+    return scores
+def _run_single_prediction(model_bundle, text: str, cfg_override: dict | None = None) -> str:
+    cfg = copy.deepcopy(model_bundle["cfg"])
+    if cfg_override:
+        for k, v in cfg_override.items():
+            cfg["inference"][k] = v
+    src_tok = model_bundle["src_tok"]
+    tgt_tok = model_bundle["tgt_tok"]
+    device = torch.device(model_bundle["device"])
+    input_ids = torch.tensor(
+        [src_tok.encode(text.strip())[:cfg["model"]["max_seq_len"]]],
+        dtype=torch.long,
+        device=device,
+    )
+    out = run_inference(model_bundle["model"], input_ids, cfg)
+    return _decode_with_cleanup(tgt_tok, out[0].tolist(), text.strip(), cfg["inference"])
+def _live_task_analysis(model_bundle, task: str, input_text: str) -> str:
+    text = input_text.strip()
+    if not text:
+        return "Live analysis skipped: empty input."
+    pred = _run_single_prediction(model_bundle, text)
+    toks = [t for t in pred.split() if t]
+    uniq = len(set(toks)) / max(1, len(toks))
+    if str(task) == "1":
+        t0 = datetime.now()
+        _ = _run_single_prediction(model_bundle, text, {"num_steps": 16})
+        t1 = datetime.now()
+        _ = _run_single_prediction(model_bundle, text, {"num_steps": 64})
+        t2 = datetime.now()
+        fast_ms = (t1 - t0).total_seconds() * 1000
+        full_ms = (t2 - t1).total_seconds() * 1000
+        return (
+            f"[Live Task1]\n"
+            f"Input: {text}\nPrediction: {pred}\n"
+            f"Token-length={len(toks)}  unique-ratio={uniq:.3f}\n"
+            f"Latency proxy: 16-step={fast_ms:.1f}ms, 64-step={full_ms:.1f}ms"
+        )
+    if str(task) == "2":
+        tfidf = _mini_tfidf_scores(text)
+        top = sorted(tfidf.items(), key=lambda kv: kv[1], reverse=True)[:5]
+        return (
+            f"[Live Task2]\n"
+            f"Input: {text}\nPrediction: {pred}\n"
+            f"Token-length={len(toks)}  unique-ratio={uniq:.3f}\n"
+            f"TF-IDF(top): {top}"
+        )
+    if str(task) == "3":
+        tfidf = _mini_tfidf_scores(text)
+        tf_mean = sum(tfidf.values()) / max(1, len(tfidf))
+        return (
+            f"[Live Task3]\n"
+            f"Input: {text}\nPrediction: {pred}\n"
+            f"Token-length={len(toks)}  unique-ratio={uniq:.3f}\n"
+            f"Concept proxy: mean TF-IDF={tf_mean:.3f}"
+        )
+    if str(task) == "5":
+        ref = _iast_to_deva(text)
+        scales = [0.0, 0.5, 1.0, 1.5, 2.0]
+        rows = []
+        for s in scales:
+            cfg_map = {
+                "repetition_penalty": 1.1 + 0.15 * s,
+                "diversity_penalty": min(1.0, 0.10 * s),
+            }
+            out = _run_single_prediction(model_bundle, text, cfg_map)
+            cer = _compute_cer(out, ref)
+            rows.append((s, round(cer, 4), out[:48]))
+        return "[Live Task5]\n" + "\n".join([f"λ={r[0]:.1f} CER={r[1]:.4f} out={r[2]}" for r in rows])
+    return _live_input_summary(model_bundle, text)
+def _bg_worker(job_id: str, model_bundle, output_dir: str, input_text: str, task4_phase: str):
+    tasks = ["1", "2", "3", "4", "5"]
+    failures = 0
+    logs = []
+    _BG_JOBS[job_id].update({"state": "running", "progress": 0, "failures": 0, "updated": datetime.now().isoformat()})
+    for idx, task in enumerate(tasks, start=1):
+        _BG_JOBS[job_id].update(
+            {
+                "state": f"running task {task}",
+                "progress": int((idx - 1) * 100 / len(tasks)),
+                "updated": datetime.now().isoformat(),
+            }
+        )
+        code, log, used_bundled = _run_analysis_cmd(task, model_bundle["ckpt_path"], output_dir, input_text, task4_phase)
+        logs.append(f"\n\n{'='*22} TASK {task} {'='*22}\n{log}")
+        if code != 0:
+            failures += 1
+            logs.append(f"\n[Live fallback]\n{_live_task_analysis(model_bundle, task, input_text)}\n")
+        elif used_bundled:
+            logs.append(f"\n[Live bundled summary]\n{_live_task_analysis(model_bundle, task, input_text)}\n")
+        _BG_JOBS[job_id].update(
+            {
+                "log": "".join(logs),
+                "failures": failures,
+                "progress": int(idx * 100 / len(tasks)),
+                "updated": datetime.now().isoformat(),
+            }
+        )
+    if failures:
+        _bundle_task_outputs(model_bundle, output_dir)
+    _BG_JOBS[job_id].update(
+        {
+            "state": "done",
+            "done": True,
+            "progress": 100,
+            "log": "".join(logs),
+            "failures": failures,
+            "updated": datetime.now().isoformat(),
+        }
+    )
+def start_run_all_background(model_bundle, output_dir, input_text, task4_phase):
+    if not model_bundle:
+        raise gr.Error("Load a model first.")
+    os.makedirs(output_dir, exist_ok=True)
+    job_id = uuid.uuid4().hex[:10]
+    _BG_JOBS[job_id] = {
+        "state": "queued",
+        "progress": 0,
+        "log": "",
+        "failures": 0,
+        "done": False,
+        "output_dir": output_dir,
+        "created": datetime.now().isoformat(),
+        "updated": datetime.now().isoformat(),
+    }
+    th = threading.Thread(
+        target=_bg_worker,
+        args=(job_id, model_bundle, output_dir, input_text, task4_phase),
+        daemon=True,
+    )
+    th.start()
+    return f"Background run started. Job ID: {job_id}", f"Job {job_id} queued...", job_id
+def poll_run_all_background(job_id, output_dir):
+    if not job_id or job_id not in _BG_JOBS:
+        msg = "No active background job. Start Run All 5 Tasks first."
+        empty = refresh_task_outputs(output_dir)
+        return msg, msg, *empty
+    j = _BG_JOBS[job_id]
+    status = (
+        f"Job {job_id} | state={j['state']} | progress={j['progress']}% | "
+        f"failures={j['failures']} | updated={j['updated']}"
+    )
+    outputs = refresh_task_outputs(output_dir)
+    return status, j.get("log", ""), *outputs
 def run_single_task(model_bundle, task, output_dir, input_text, task4_phase):
     if not model_bundle:
         raise gr.Error("Load a model first.")
     code, log, used_bundled = _run_analysis_cmd(task, model_bundle["ckpt_path"], output_dir, input_text, task4_phase)
     if code != 0:
         _bundle_task_outputs(model_bundle, output_dir)
+        log = f"{log}\n\n--- Live task analysis ---\n{_live_task_analysis(model_bundle, task, input_text)}"
         status = f"Task {task} fallback mode: bundled reports + live input analysis."
     else:
         if used_bundled:
             _bundle_task_outputs(model_bundle, output_dir)
+            log = f"{log}\n\n--- Live task analysis ---\n{_live_task_analysis(model_bundle, task, input_text)}"
+            status = f"Task {task} loaded from bundled analysis outputs + live analysis."
         else:
             status = f"Task {task} completed (exit={code})."
     return status, log
 with gr.Blocks(title="Sanskrit Diffusion Client Demo", css=CUSTOM_CSS) as demo:
     model_state = gr.State(None)
+    bg_job_state = gr.State("")
     gr.Markdown(
         """
                         value="analyze",
                         label="Task 4 Phase",
                     )
+                    run_all_btn = gr.Button("Run All 5 Tasks (Background)", variant="primary")
+                    track_bg_btn = gr.Button("Track Background Run")
             with gr.Row():
                 task_choice = gr.Dropdown(
         outputs=[task_run_status, task_run_log],
     )
     run_all_btn.click(
+        fn=start_run_all_background,
         inputs=[model_state, analysis_output_dir, analysis_input, task4_phase],
+        outputs=[task_run_status, task_run_log, bg_job_state],
+    )
+    track_bg_btn.click(
+        fn=poll_run_all_background,
+        inputs=[bg_job_state, analysis_output_dir],
+        outputs=[
+            task_run_status,
+            task_run_log,
+            task1_box,
+            task2_box,
+            task2_drift_img,
+            task2_attn_img,
+            task3_box,
+            task3_img,
+            task5_box,
+            task4_img,
+        ],
     )
     refresh_outputs_btn.click(
         fn=refresh_task_outputs,