Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files
app.py
CHANGED
|
@@ -3,6 +3,7 @@ import json
|
|
| 3 |
import os
|
| 4 |
import subprocess
|
| 5 |
import sys
|
|
|
|
| 6 |
from datetime import datetime
|
| 7 |
from pathlib import Path
|
| 8 |
|
|
@@ -69,6 +70,19 @@ def discover_checkpoints():
|
|
| 69 |
return found
|
| 70 |
|
| 71 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
def checkpoint_map():
|
| 73 |
return {item["label"]: item for item in discover_checkpoints()}
|
| 74 |
|
|
@@ -172,7 +186,7 @@ def load_selected_model(checkpoint_label):
|
|
| 172 |
"n_heads": cfg["model"]["n_heads"],
|
| 173 |
}
|
| 174 |
status = f"Loaded `{experiment}` on `{device}` (`{cfg['model_type']}`)"
|
| 175 |
-
suggested_out =
|
| 176 |
return bundle, status, model_info, cfg["inference"]["num_steps"], suggested_out
|
| 177 |
|
| 178 |
|
|
@@ -284,6 +298,9 @@ def generate_from_ui(
|
|
| 284 |
|
| 285 |
def _run_analysis_cmd(task, ckpt_path, output_dir, input_text="dharmo rakṣati rakṣitaḥ", phase="analyze"):
|
| 286 |
os.makedirs(output_dir, exist_ok=True)
|
|
|
|
|
|
|
|
|
|
| 287 |
# Space-safe Task4 fallback: if ablation models don't exist, bootstrap them
|
| 288 |
# from currently selected checkpoint so Task4 can still execute end-to-end.
|
| 289 |
if str(task) == "4" and phase == "analyze":
|
|
@@ -300,7 +317,7 @@ def _run_analysis_cmd(task, ckpt_path, output_dir, input_text="dharmo rakṣati
|
|
| 300 |
|
| 301 |
cmd = [
|
| 302 |
sys.executable,
|
| 303 |
-
|
| 304 |
"--task",
|
| 305 |
str(task),
|
| 306 |
"--checkpoint",
|
|
@@ -323,11 +340,48 @@ def _run_analysis_cmd(task, ckpt_path, output_dir, input_text="dharmo rakṣati
|
|
| 323 |
return proc.returncode, log
|
| 324 |
|
| 325 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 326 |
def run_single_task(model_bundle, task, output_dir, input_text, task4_phase):
|
| 327 |
if not model_bundle:
|
| 328 |
raise gr.Error("Load a model first.")
|
| 329 |
code, log = _run_analysis_cmd(task, model_bundle["ckpt_path"], output_dir, input_text, task4_phase)
|
| 330 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 331 |
return status, log
|
| 332 |
|
| 333 |
|
|
@@ -341,7 +395,10 @@ def run_all_tasks(model_bundle, output_dir, input_text, task4_phase):
|
|
| 341 |
logs.append(f"\n\n{'='*22} TASK {task} {'='*22}\n{log}")
|
| 342 |
if code != 0:
|
| 343 |
failures += 1
|
| 344 |
-
|
|
|
|
|
|
|
|
|
|
| 345 |
return status, "".join(logs)
|
| 346 |
|
| 347 |
|
|
|
|
| 3 |
import os
|
| 4 |
import subprocess
|
| 5 |
import sys
|
| 6 |
+
import shutil
|
| 7 |
from datetime import datetime
|
| 8 |
from pathlib import Path
|
| 9 |
|
|
|
|
| 70 |
return found
|
| 71 |
|
| 72 |
|
| 73 |
+
def _guess_analysis_dir(experiment: str, ckpt_path: str) -> str:
|
| 74 |
+
base = Path("analysis_outputs")
|
| 75 |
+
if base.exists():
|
| 76 |
+
if experiment and (base / experiment).is_dir():
|
| 77 |
+
return str(base / experiment)
|
| 78 |
+
for part in Path(ckpt_path).parts:
|
| 79 |
+
if part.startswith("T") and part[1:].isdigit() and (base / part).is_dir():
|
| 80 |
+
return str(base / part)
|
| 81 |
+
if (base / "T4").is_dir():
|
| 82 |
+
return str(base / "T4")
|
| 83 |
+
return os.path.join("analysis", "outputs_ui", experiment or "default")
|
| 84 |
+
|
| 85 |
+
|
| 86 |
def checkpoint_map():
|
| 87 |
return {item["label"]: item for item in discover_checkpoints()}
|
| 88 |
|
|
|
|
| 186 |
"n_heads": cfg["model"]["n_heads"],
|
| 187 |
}
|
| 188 |
status = f"Loaded `{experiment}` on `{device}` (`{cfg['model_type']}`)"
|
| 189 |
+
suggested_out = _guess_analysis_dir(experiment, ckpt_path)
|
| 190 |
return bundle, status, model_info, cfg["inference"]["num_steps"], suggested_out
|
| 191 |
|
| 192 |
|
|
|
|
| 298 |
|
| 299 |
def _run_analysis_cmd(task, ckpt_path, output_dir, input_text="dharmo rakṣati rakṣitaḥ", phase="analyze"):
|
| 300 |
os.makedirs(output_dir, exist_ok=True)
|
| 301 |
+
script = Path("analysis") / "run_analysis.py"
|
| 302 |
+
if not script.exists():
|
| 303 |
+
return 2, "Analysis runner missing in Space image. Falling back to bundled analysis outputs."
|
| 304 |
# Space-safe Task4 fallback: if ablation models don't exist, bootstrap them
|
| 305 |
# from currently selected checkpoint so Task4 can still execute end-to-end.
|
| 306 |
if str(task) == "4" and phase == "analyze":
|
|
|
|
| 317 |
|
| 318 |
cmd = [
|
| 319 |
sys.executable,
|
| 320 |
+
str(script),
|
| 321 |
"--task",
|
| 322 |
str(task),
|
| 323 |
"--checkpoint",
|
|
|
|
| 340 |
return proc.returncode, log
|
| 341 |
|
| 342 |
|
| 343 |
+
def _bundle_task_outputs(model_bundle, output_dir):
|
| 344 |
+
src_dir = _guess_analysis_dir(model_bundle.get("experiment", ""), model_bundle.get("ckpt_path", ""))
|
| 345 |
+
if not os.path.isdir(src_dir):
|
| 346 |
+
return
|
| 347 |
+
os.makedirs(output_dir, exist_ok=True)
|
| 348 |
+
for name in os.listdir(src_dir):
|
| 349 |
+
src = os.path.join(src_dir, name)
|
| 350 |
+
dst = os.path.join(output_dir, name)
|
| 351 |
+
if os.path.isfile(src):
|
| 352 |
+
shutil.copy2(src, dst)
|
| 353 |
+
|
| 354 |
+
|
| 355 |
+
def _live_input_summary(model_bundle, input_text: str) -> str:
|
| 356 |
+
if not input_text.strip():
|
| 357 |
+
return "No input text provided."
|
| 358 |
+
cfg = copy.deepcopy(model_bundle["cfg"])
|
| 359 |
+
src_tok = model_bundle["src_tok"]
|
| 360 |
+
tgt_tok = model_bundle["tgt_tok"]
|
| 361 |
+
device = torch.device(model_bundle["device"])
|
| 362 |
+
inp = torch.tensor([src_tok.encode(input_text.strip())[:cfg["model"]["max_seq_len"]]], dtype=torch.long, device=device)
|
| 363 |
+
out = run_inference(model_bundle["model"], inp, cfg)
|
| 364 |
+
pred = _decode_with_cleanup(tgt_tok, out[0].tolist(), input_text.strip(), cfg["inference"])
|
| 365 |
+
toks = pred.split()
|
| 366 |
+
uniq = len(set(toks)) / max(1, len(toks))
|
| 367 |
+
return (
|
| 368 |
+
f"Live input: {input_text}\n"
|
| 369 |
+
f"Prediction: {pred}\n"
|
| 370 |
+
f"Length(tokens): {len(toks)}\n"
|
| 371 |
+
f"Unique-token ratio: {uniq:.3f}"
|
| 372 |
+
)
|
| 373 |
+
|
| 374 |
+
|
| 375 |
def run_single_task(model_bundle, task, output_dir, input_text, task4_phase):
|
| 376 |
if not model_bundle:
|
| 377 |
raise gr.Error("Load a model first.")
|
| 378 |
code, log = _run_analysis_cmd(task, model_bundle["ckpt_path"], output_dir, input_text, task4_phase)
|
| 379 |
+
if code != 0:
|
| 380 |
+
_bundle_task_outputs(model_bundle, output_dir)
|
| 381 |
+
log = f"{log}\n\n--- Live input summary ---\n{_live_input_summary(model_bundle, input_text)}"
|
| 382 |
+
status = f"Task {task} fallback mode: bundled reports + live input analysis."
|
| 383 |
+
else:
|
| 384 |
+
status = f"Task {task} completed (exit={code})."
|
| 385 |
return status, log
|
| 386 |
|
| 387 |
|
|
|
|
| 395 |
logs.append(f"\n\n{'='*22} TASK {task} {'='*22}\n{log}")
|
| 396 |
if code != 0:
|
| 397 |
failures += 1
|
| 398 |
+
if failures:
|
| 399 |
+
_bundle_task_outputs(model_bundle, output_dir)
|
| 400 |
+
logs.append(f"\n\n--- Live input summary ---\n{_live_input_summary(model_bundle, input_text)}")
|
| 401 |
+
status = f"Run-all finished with {failures} fallback task(s)." if failures else "All 5 tasks completed."
|
| 402 |
return status, "".join(logs)
|
| 403 |
|
| 404 |
|