cgeorgiaw HF Staff commited on
Commit
4657393
·
verified ·
1 Parent(s): 78c0906

Switch to Stemson-AI/denoise_judging dataset (n2v vs digital_twin, 200 triplets)

Browse files
Files changed (1) hide show
  1. app.py +27 -11
app.py CHANGED
@@ -8,6 +8,7 @@ Required Space secret: HF_TOKEN (write access to RESULTS_REPO).
8
 
9
  from __future__ import annotations
10
 
 
11
  import io
12
  import json
13
  import os
@@ -21,8 +22,11 @@ from pathlib import Path
21
  import gradio as gr
22
  from huggingface_hub import HfApi, list_repo_files, snapshot_download
23
 
24
- TRIPLETS_REPO = "Stemson-AI/denoise-judging-triplets"
25
  RESULTS_REPO = "Stemson-AI/denoise-judgments"
 
 
 
26
 
27
  HF_TOKEN = os.environ.get("HF_TOKEN")
28
  if not HF_TOKEN:
@@ -48,14 +52,26 @@ def load_triplets() -> tuple[Path, list[dict]]:
48
  local_root = Path(__file__).resolve().parent / "triplets_local"
49
  if local_root.exists():
50
  shutil.rmtree(local_root)
51
- # follow_symlinks=True (default) copies file contents from the blobs sibling.
52
  shutil.copytree(snapshot, local_root)
 
 
53
  rows: list[dict] = []
54
- with open(local_root / "metadata.jsonl") as f:
55
- for line in f:
56
- line = line.strip()
57
- if line:
58
- rows.append(json.loads(line))
 
 
 
 
 
 
 
 
 
 
 
59
  return local_root, rows
60
 
61
 
@@ -119,7 +135,7 @@ def _paths_for_current(session: dict) -> tuple[str, str, str] | None:
119
 
120
 
121
  def _assign_sides(session: dict) -> None:
122
- methods = ["cimp_gan", "n2v"]
123
  random.shuffle(methods)
124
  session["left_method"], session["right_method"] = methods
125
 
@@ -128,7 +144,7 @@ def _progress(session: dict) -> str:
128
  total = session["n_total"]
129
  done = session["n_done_now"]
130
  if total == 0:
131
- return "All 60 triplets are already judged for this email — thank you!"
132
  return f"Triplet {min(done + 1, total)} / {total} this session • {session['n_already']} already done before"
133
 
134
 
@@ -287,8 +303,8 @@ with gr.Blocks(title="Denoising A/B Judging", theme=gr.themes.Soft()) as demo:
287
 
288
  gr.Markdown("# Denoising A/B Judging")
289
  gr.Markdown(
290
- "For each triplet you'll see a **raw** image and two denoised options "
291
- "(**A** and **B**). Pick the one you think is the better denoising. "
292
  "Click any image to zoom in."
293
  )
294
 
 
8
 
9
  from __future__ import annotations
10
 
11
+ import csv
12
  import io
13
  import json
14
  import os
 
22
  import gradio as gr
23
  from huggingface_hub import HfApi, list_repo_files, snapshot_download
24
 
25
+ TRIPLETS_REPO = "Stemson-AI/denoise_judging"
26
  RESULTS_REPO = "Stemson-AI/denoise-judgments"
27
+ # Methods compared as the blind A/B options. Files at
28
+ # `judging_dataset/<tag>/<method>.png` (plus `raw.png` for context).
29
+ METHODS = ["n2v", "digital_twin"]
30
 
31
  HF_TOKEN = os.environ.get("HF_TOKEN")
32
  if not HF_TOKEN:
 
52
  local_root = Path(__file__).resolve().parent / "triplets_local"
53
  if local_root.exists():
54
  shutil.rmtree(local_root)
 
55
  shutil.copytree(snapshot, local_root)
56
+
57
+ base = local_root / "judging_dataset"
58
  rows: list[dict] = []
59
+ with open(base / "manifest.csv", newline="") as f:
60
+ for r in csv.DictReader(f):
61
+ tag = r["tag"]
62
+ row = {
63
+ "triplet_id": tag,
64
+ "raw": f"judging_dataset/{tag}/raw.png",
65
+ # carry the manifest fields so they're available downstream if needed.
66
+ "manifest": r,
67
+ }
68
+ for m in METHODS:
69
+ row[m] = f"judging_dataset/{tag}/{m}.png"
70
+ # sanity: skip any row whose images are missing
71
+ if all((local_root / row[k]).exists() for k in ("raw", *METHODS)):
72
+ rows.append(row)
73
+ else:
74
+ print(f"skipping {tag}: missing image files")
75
  return local_root, rows
76
 
77
 
 
135
 
136
 
137
  def _assign_sides(session: dict) -> None:
138
+ methods = list(METHODS)
139
  random.shuffle(methods)
140
  session["left_method"], session["right_method"] = methods
141
 
 
144
  total = session["n_total"]
145
  done = session["n_done_now"]
146
  if total == 0:
147
+ return "All triplets are already judged for this email — thank you!"
148
  return f"Triplet {min(done + 1, total)} / {total} this session • {session['n_already']} already done before"
149
 
150
 
 
303
 
304
  gr.Markdown("# Denoising A/B Judging")
305
  gr.Markdown(
306
+ "For each triplet you'll see a **raw** image and two options "
307
+ "(**A** and **B**). Pick the one you think is the better image. "
308
  "Click any image to zoom in."
309
  )
310