Spaces:

Stemson-AI
/

denoise-judging

Sleeping

App Files Files Community

cgeorgiaw HF Staff commited on 27 days ago

Commit

4657393

verified ·

1 Parent(s): 78c0906

Switch to Stemson-AI/denoise_judging dataset (n2v vs digital_twin, 200 triplets)

Browse files

Files changed (1) hide show

app.py +27 -11

app.py CHANGED Viewed

@@ -8,6 +8,7 @@ Required Space secret: HF_TOKEN (write access to RESULTS_REPO).
 from __future__ import annotations
 import io
 import json
 import os
@@ -21,8 +22,11 @@ from pathlib import Path
 import gradio as gr
 from huggingface_hub import HfApi, list_repo_files, snapshot_download
-TRIPLETS_REPO = "Stemson-AI/denoise-judging-triplets"
 RESULTS_REPO = "Stemson-AI/denoise-judgments"
 HF_TOKEN = os.environ.get("HF_TOKEN")
 if not HF_TOKEN:
@@ -48,14 +52,26 @@ def load_triplets() -> tuple[Path, list[dict]]:
     local_root = Path(__file__).resolve().parent / "triplets_local"
     if local_root.exists():
         shutil.rmtree(local_root)
-    # follow_symlinks=True (default) copies file contents from the blobs sibling.
     shutil.copytree(snapshot, local_root)
     rows: list[dict] = []
-    with open(local_root / "metadata.jsonl") as f:
-        for line in f:
-            line = line.strip()
-            if line:
-                rows.append(json.loads(line))
     return local_root, rows
@@ -119,7 +135,7 @@ def _paths_for_current(session: dict) -> tuple[str, str, str] | None:
 def _assign_sides(session: dict) -> None:
-    methods = ["cimp_gan", "n2v"]
     random.shuffle(methods)
     session["left_method"], session["right_method"] = methods
@@ -128,7 +144,7 @@ def _progress(session: dict) -> str:
     total = session["n_total"]
     done = session["n_done_now"]
     if total == 0:
-        return "All 60 triplets are already judged for this email — thank you!"
     return f"Triplet {min(done + 1, total)} / {total} this session  •  {session['n_already']} already done before"
@@ -287,8 +303,8 @@ with gr.Blocks(title="Denoising A/B Judging", theme=gr.themes.Soft()) as demo:
     gr.Markdown("# Denoising A/B Judging")
     gr.Markdown(
-        "For each triplet you'll see a **raw** image and two denoised options "
-        "(**A** and **B**). Pick the one you think is the better denoising. "
         "Click any image to zoom in."
     )

 from __future__ import annotations
+import csv
 import io
 import json
 import os
 import gradio as gr
 from huggingface_hub import HfApi, list_repo_files, snapshot_download
+TRIPLETS_REPO = "Stemson-AI/denoise_judging"
 RESULTS_REPO = "Stemson-AI/denoise-judgments"
+# Methods compared as the blind A/B options. Files at
+# `judging_dataset/<tag>/<method>.png` (plus `raw.png` for context).
+METHODS = ["n2v", "digital_twin"]
 HF_TOKEN = os.environ.get("HF_TOKEN")
 if not HF_TOKEN:
     local_root = Path(__file__).resolve().parent / "triplets_local"
     if local_root.exists():
         shutil.rmtree(local_root)
     shutil.copytree(snapshot, local_root)
+    base = local_root / "judging_dataset"
     rows: list[dict] = []
+    with open(base / "manifest.csv", newline="") as f:
+        for r in csv.DictReader(f):
+            tag = r["tag"]
+            row = {
+                "triplet_id": tag,
+                "raw": f"judging_dataset/{tag}/raw.png",
+                # carry the manifest fields so they're available downstream if needed.
+                "manifest": r,
+            }
+            for m in METHODS:
+                row[m] = f"judging_dataset/{tag}/{m}.png"
+            # sanity: skip any row whose images are missing
+            if all((local_root / row[k]).exists() for k in ("raw", *METHODS)):
+                rows.append(row)
+            else:
+                print(f"skipping {tag}: missing image files")
     return local_root, rows
 def _assign_sides(session: dict) -> None:
+    methods = list(METHODS)
     random.shuffle(methods)
     session["left_method"], session["right_method"] = methods
     total = session["n_total"]
     done = session["n_done_now"]
     if total == 0:
+        return "All triplets are already judged for this email — thank you!"
     return f"Triplet {min(done + 1, total)} / {total} this session  •  {session['n_already']} already done before"
     gr.Markdown("# Denoising A/B Judging")
     gr.Markdown(
+        "For each triplet you'll see a **raw** image and two options "
+        "(**A** and **B**). Pick the one you think is the better image. "
         "Click any image to zoom in."
     )