Spaces:
Sleeping
Sleeping
Switch to Stemson-AI/denoise_judging dataset (n2v vs digital_twin, 200 triplets)
Browse files
app.py
CHANGED
|
@@ -8,6 +8,7 @@ Required Space secret: HF_TOKEN (write access to RESULTS_REPO).
|
|
| 8 |
|
| 9 |
from __future__ import annotations
|
| 10 |
|
|
|
|
| 11 |
import io
|
| 12 |
import json
|
| 13 |
import os
|
|
@@ -21,8 +22,11 @@ from pathlib import Path
|
|
| 21 |
import gradio as gr
|
| 22 |
from huggingface_hub import HfApi, list_repo_files, snapshot_download
|
| 23 |
|
| 24 |
-
TRIPLETS_REPO = "Stemson-AI/
|
| 25 |
RESULTS_REPO = "Stemson-AI/denoise-judgments"
|
|
|
|
|
|
|
|
|
|
| 26 |
|
| 27 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 28 |
if not HF_TOKEN:
|
|
@@ -48,14 +52,26 @@ def load_triplets() -> tuple[Path, list[dict]]:
|
|
| 48 |
local_root = Path(__file__).resolve().parent / "triplets_local"
|
| 49 |
if local_root.exists():
|
| 50 |
shutil.rmtree(local_root)
|
| 51 |
-
# follow_symlinks=True (default) copies file contents from the blobs sibling.
|
| 52 |
shutil.copytree(snapshot, local_root)
|
|
|
|
|
|
|
| 53 |
rows: list[dict] = []
|
| 54 |
-
with open(
|
| 55 |
-
for
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
return local_root, rows
|
| 60 |
|
| 61 |
|
|
@@ -119,7 +135,7 @@ def _paths_for_current(session: dict) -> tuple[str, str, str] | None:
|
|
| 119 |
|
| 120 |
|
| 121 |
def _assign_sides(session: dict) -> None:
|
| 122 |
-
methods =
|
| 123 |
random.shuffle(methods)
|
| 124 |
session["left_method"], session["right_method"] = methods
|
| 125 |
|
|
@@ -128,7 +144,7 @@ def _progress(session: dict) -> str:
|
|
| 128 |
total = session["n_total"]
|
| 129 |
done = session["n_done_now"]
|
| 130 |
if total == 0:
|
| 131 |
-
return "All
|
| 132 |
return f"Triplet {min(done + 1, total)} / {total} this session • {session['n_already']} already done before"
|
| 133 |
|
| 134 |
|
|
@@ -287,8 +303,8 @@ with gr.Blocks(title="Denoising A/B Judging", theme=gr.themes.Soft()) as demo:
|
|
| 287 |
|
| 288 |
gr.Markdown("# Denoising A/B Judging")
|
| 289 |
gr.Markdown(
|
| 290 |
-
"For each triplet you'll see a **raw** image and two
|
| 291 |
-
"(**A** and **B**). Pick the one you think is the better
|
| 292 |
"Click any image to zoom in."
|
| 293 |
)
|
| 294 |
|
|
|
|
| 8 |
|
| 9 |
from __future__ import annotations
|
| 10 |
|
| 11 |
+
import csv
|
| 12 |
import io
|
| 13 |
import json
|
| 14 |
import os
|
|
|
|
| 22 |
import gradio as gr
|
| 23 |
from huggingface_hub import HfApi, list_repo_files, snapshot_download
|
| 24 |
|
| 25 |
+
TRIPLETS_REPO = "Stemson-AI/denoise_judging"
|
| 26 |
RESULTS_REPO = "Stemson-AI/denoise-judgments"
|
| 27 |
+
# Methods compared as the blind A/B options. Files at
|
| 28 |
+
# `judging_dataset/<tag>/<method>.png` (plus `raw.png` for context).
|
| 29 |
+
METHODS = ["n2v", "digital_twin"]
|
| 30 |
|
| 31 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 32 |
if not HF_TOKEN:
|
|
|
|
| 52 |
local_root = Path(__file__).resolve().parent / "triplets_local"
|
| 53 |
if local_root.exists():
|
| 54 |
shutil.rmtree(local_root)
|
|
|
|
| 55 |
shutil.copytree(snapshot, local_root)
|
| 56 |
+
|
| 57 |
+
base = local_root / "judging_dataset"
|
| 58 |
rows: list[dict] = []
|
| 59 |
+
with open(base / "manifest.csv", newline="") as f:
|
| 60 |
+
for r in csv.DictReader(f):
|
| 61 |
+
tag = r["tag"]
|
| 62 |
+
row = {
|
| 63 |
+
"triplet_id": tag,
|
| 64 |
+
"raw": f"judging_dataset/{tag}/raw.png",
|
| 65 |
+
# carry the manifest fields so they're available downstream if needed.
|
| 66 |
+
"manifest": r,
|
| 67 |
+
}
|
| 68 |
+
for m in METHODS:
|
| 69 |
+
row[m] = f"judging_dataset/{tag}/{m}.png"
|
| 70 |
+
# sanity: skip any row whose images are missing
|
| 71 |
+
if all((local_root / row[k]).exists() for k in ("raw", *METHODS)):
|
| 72 |
+
rows.append(row)
|
| 73 |
+
else:
|
| 74 |
+
print(f"skipping {tag}: missing image files")
|
| 75 |
return local_root, rows
|
| 76 |
|
| 77 |
|
|
|
|
| 135 |
|
| 136 |
|
| 137 |
def _assign_sides(session: dict) -> None:
|
| 138 |
+
methods = list(METHODS)
|
| 139 |
random.shuffle(methods)
|
| 140 |
session["left_method"], session["right_method"] = methods
|
| 141 |
|
|
|
|
| 144 |
total = session["n_total"]
|
| 145 |
done = session["n_done_now"]
|
| 146 |
if total == 0:
|
| 147 |
+
return "All triplets are already judged for this email — thank you!"
|
| 148 |
return f"Triplet {min(done + 1, total)} / {total} this session • {session['n_already']} already done before"
|
| 149 |
|
| 150 |
|
|
|
|
| 303 |
|
| 304 |
gr.Markdown("# Denoising A/B Judging")
|
| 305 |
gr.Markdown(
|
| 306 |
+
"For each triplet you'll see a **raw** image and two options "
|
| 307 |
+
"(**A** and **B**). Pick the one you think is the better image. "
|
| 308 |
"Click any image to zoom in."
|
| 309 |
)
|
| 310 |
|