bhsinghgrid commited on
Commit
3a0ae5e
·
verified ·
1 Parent(s): c0eacc0

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +61 -4
app.py CHANGED
@@ -3,6 +3,7 @@ import json
3
  import os
4
  import subprocess
5
  import sys
 
6
  from datetime import datetime
7
  from pathlib import Path
8
 
@@ -69,6 +70,19 @@ def discover_checkpoints():
69
  return found
70
 
71
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  def checkpoint_map():
73
  return {item["label"]: item for item in discover_checkpoints()}
74
 
@@ -172,7 +186,7 @@ def load_selected_model(checkpoint_label):
172
  "n_heads": cfg["model"]["n_heads"],
173
  }
174
  status = f"Loaded `{experiment}` on `{device}` (`{cfg['model_type']}`)"
175
- suggested_out = os.path.join("analysis", "outputs_ui", experiment)
176
  return bundle, status, model_info, cfg["inference"]["num_steps"], suggested_out
177
 
178
 
@@ -284,6 +298,9 @@ def generate_from_ui(
284
 
285
  def _run_analysis_cmd(task, ckpt_path, output_dir, input_text="dharmo rakṣati rakṣitaḥ", phase="analyze"):
286
  os.makedirs(output_dir, exist_ok=True)
 
 
 
287
  # Space-safe Task4 fallback: if ablation models don't exist, bootstrap them
288
  # from currently selected checkpoint so Task4 can still execute end-to-end.
289
  if str(task) == "4" and phase == "analyze":
@@ -300,7 +317,7 @@ def _run_analysis_cmd(task, ckpt_path, output_dir, input_text="dharmo rakṣati
300
 
301
  cmd = [
302
  sys.executable,
303
- "analysis/run_analysis.py",
304
  "--task",
305
  str(task),
306
  "--checkpoint",
@@ -323,11 +340,48 @@ def _run_analysis_cmd(task, ckpt_path, output_dir, input_text="dharmo rakṣati
323
  return proc.returncode, log
324
 
325
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
326
  def run_single_task(model_bundle, task, output_dir, input_text, task4_phase):
327
  if not model_bundle:
328
  raise gr.Error("Load a model first.")
329
  code, log = _run_analysis_cmd(task, model_bundle["ckpt_path"], output_dir, input_text, task4_phase)
330
- status = f"Task {task} {'completed' if code == 0 else 'failed'} (exit={code})."
 
 
 
 
 
331
  return status, log
332
 
333
 
@@ -341,7 +395,10 @@ def run_all_tasks(model_bundle, output_dir, input_text, task4_phase):
341
  logs.append(f"\n\n{'='*22} TASK {task} {'='*22}\n{log}")
342
  if code != 0:
343
  failures += 1
344
- status = f"Run-all finished with {failures} failed task(s)." if failures else "All 5 tasks completed."
 
 
 
345
  return status, "".join(logs)
346
 
347
 
 
3
  import os
4
  import subprocess
5
  import sys
6
+ import shutil
7
  from datetime import datetime
8
  from pathlib import Path
9
 
 
70
  return found
71
 
72
 
73
+ def _guess_analysis_dir(experiment: str, ckpt_path: str) -> str:
74
+ base = Path("analysis_outputs")
75
+ if base.exists():
76
+ if experiment and (base / experiment).is_dir():
77
+ return str(base / experiment)
78
+ for part in Path(ckpt_path).parts:
79
+ if part.startswith("T") and part[1:].isdigit() and (base / part).is_dir():
80
+ return str(base / part)
81
+ if (base / "T4").is_dir():
82
+ return str(base / "T4")
83
+ return os.path.join("analysis", "outputs_ui", experiment or "default")
84
+
85
+
86
  def checkpoint_map():
87
  return {item["label"]: item for item in discover_checkpoints()}
88
 
 
186
  "n_heads": cfg["model"]["n_heads"],
187
  }
188
  status = f"Loaded `{experiment}` on `{device}` (`{cfg['model_type']}`)"
189
+ suggested_out = _guess_analysis_dir(experiment, ckpt_path)
190
  return bundle, status, model_info, cfg["inference"]["num_steps"], suggested_out
191
 
192
 
 
298
 
299
  def _run_analysis_cmd(task, ckpt_path, output_dir, input_text="dharmo rakṣati rakṣitaḥ", phase="analyze"):
300
  os.makedirs(output_dir, exist_ok=True)
301
+ script = Path("analysis") / "run_analysis.py"
302
+ if not script.exists():
303
+ return 2, "Analysis runner missing in Space image. Falling back to bundled analysis outputs."
304
  # Space-safe Task4 fallback: if ablation models don't exist, bootstrap them
305
  # from currently selected checkpoint so Task4 can still execute end-to-end.
306
  if str(task) == "4" and phase == "analyze":
 
317
 
318
  cmd = [
319
  sys.executable,
320
+ str(script),
321
  "--task",
322
  str(task),
323
  "--checkpoint",
 
340
  return proc.returncode, log
341
 
342
 
343
+ def _bundle_task_outputs(model_bundle, output_dir):
344
+ src_dir = _guess_analysis_dir(model_bundle.get("experiment", ""), model_bundle.get("ckpt_path", ""))
345
+ if not os.path.isdir(src_dir):
346
+ return
347
+ os.makedirs(output_dir, exist_ok=True)
348
+ for name in os.listdir(src_dir):
349
+ src = os.path.join(src_dir, name)
350
+ dst = os.path.join(output_dir, name)
351
+ if os.path.isfile(src):
352
+ shutil.copy2(src, dst)
353
+
354
+
355
+ def _live_input_summary(model_bundle, input_text: str) -> str:
356
+ if not input_text.strip():
357
+ return "No input text provided."
358
+ cfg = copy.deepcopy(model_bundle["cfg"])
359
+ src_tok = model_bundle["src_tok"]
360
+ tgt_tok = model_bundle["tgt_tok"]
361
+ device = torch.device(model_bundle["device"])
362
+ inp = torch.tensor([src_tok.encode(input_text.strip())[:cfg["model"]["max_seq_len"]]], dtype=torch.long, device=device)
363
+ out = run_inference(model_bundle["model"], inp, cfg)
364
+ pred = _decode_with_cleanup(tgt_tok, out[0].tolist(), input_text.strip(), cfg["inference"])
365
+ toks = pred.split()
366
+ uniq = len(set(toks)) / max(1, len(toks))
367
+ return (
368
+ f"Live input: {input_text}\n"
369
+ f"Prediction: {pred}\n"
370
+ f"Length(tokens): {len(toks)}\n"
371
+ f"Unique-token ratio: {uniq:.3f}"
372
+ )
373
+
374
+
375
  def run_single_task(model_bundle, task, output_dir, input_text, task4_phase):
376
  if not model_bundle:
377
  raise gr.Error("Load a model first.")
378
  code, log = _run_analysis_cmd(task, model_bundle["ckpt_path"], output_dir, input_text, task4_phase)
379
+ if code != 0:
380
+ _bundle_task_outputs(model_bundle, output_dir)
381
+ log = f"{log}\n\n--- Live input summary ---\n{_live_input_summary(model_bundle, input_text)}"
382
+ status = f"Task {task} fallback mode: bundled reports + live input analysis."
383
+ else:
384
+ status = f"Task {task} completed (exit={code})."
385
  return status, log
386
 
387
 
 
395
  logs.append(f"\n\n{'='*22} TASK {task} {'='*22}\n{log}")
396
  if code != 0:
397
  failures += 1
398
+ if failures:
399
+ _bundle_task_outputs(model_bundle, output_dir)
400
+ logs.append(f"\n\n--- Live input summary ---\n{_live_input_summary(model_bundle, input_text)}")
401
+ status = f"Run-all finished with {failures} fallback task(s)." if failures else "All 5 tasks completed."
402
  return status, "".join(logs)
403
 
404