forouzanfallah commited on
Commit
71cbe5b
·
verified ·
1 Parent(s): 6032b28

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +174 -193
app.py CHANGED
@@ -21,15 +21,12 @@ HF_TOKEN = os.getenv("HF_TOKEN")
21
  _hf_api = HfApi(token=HF_TOKEN)
22
 
23
  # --- Main settings ---
24
- # UPDATED: Set to 20 as requested
25
  TARGET_PER_PERSON = 20
26
  CONTACT_EMAIL = "ffallah@asu.edu"
27
 
28
  # --- Paths ---
29
- # We still use the JSON file to get the list of filenames,
30
  CAPTIONS_JSON_PATH = os.environ.get("CAPTIONS_JSON_PATH", "data/captions.json")
31
 
32
- # Folders with matching filenames across all FIVE folders:
33
  GT_MASKED_DIR = "data/gt_b" # Image 1
34
  GT_UNMASKED_DIR = "data/adc_b" # Image 2
35
  SR_DIR = "data/sr_b" # Image 3
@@ -40,10 +37,10 @@ IMAGE_5_DIR = "data/see_b" # Image 5
40
  RESULTS_DIR = "results"
41
  PROGRESS_PATH = os.path.join(RESULTS_DIR, "progress.json")
42
  ALL_RESULTS_JSONL = os.path.join(RESULTS_DIR, "all_results.jsonl")
43
- SAVE_PII = True # Set to False to anonymize saved name/email
44
 
45
  WRITE_LOCK = threading.Lock()
46
- STRICT_ENFORCEMENT = False # UPDATED: Set to False to prevent crashes if dataset size != target
47
 
48
  # ----------------------
49
  # Data model
@@ -60,17 +57,36 @@ class Sample:
60
  # ----------------------
61
  # Helpers
62
  # ----------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  def user_target_count(samples: List[Sample]) -> int:
64
- # UPDATED: Logic to be more robust.
65
- # It takes the minimum of the available samples or the target.
66
- # This prevents crashes if you have fewer or more images than 20.
67
  return min(len(samples), TARGET_PER_PERSON)
68
 
69
  def user_left_count(user_seen: List[str], samples: List[Sample]) -> int:
70
  target = user_target_count(samples)
71
- seen = set(user_seen)
72
- # Only count seen images that are actually in the current sample list
73
- allowed_ids = {s.sample_id for s in samples}
74
  seen_in_allowed = len([sid for sid in seen if sid in allowed_ids])
75
  return max(0, target - seen_in_allowed)
76
 
@@ -84,15 +100,8 @@ def push_results_to_private_repo(uid: str):
84
  if not HF_TOKEN or not HF_RESULTS_REPO:
85
  return
86
  try:
87
- # _ensure_private_repo(HF_RESULTS_REPO)
88
  os.makedirs(RESULTS_DIR, exist_ok=True)
89
- # # Ensure files exist
90
- # if not os.path.exists(ALL_RESULTS_JSONL):
91
- # open(ALL_RESULTS_JSONL, "a").close()
92
-
93
  user_file = os.path.join(RESULTS_DIR, f"{uid}.jsonl")
94
- # if not os.path.exists(user_file):
95
- # open(user_file, "a").close()
96
 
97
  ops = [
98
  CommitOperationAdd(
@@ -127,20 +136,28 @@ def ensure_paths():
127
  (IMAGE_5_DIR, "IMAGE_5_DIR"),
128
  ]:
129
  if not os.path.isdir(pth):
130
- # We just print a warning instead of crashing, to allow partial setup
131
  print(f"Warning: Directory '{pth}' for {name} not found.")
132
 
133
  def load_image(path: str) -> Image.Image:
134
- if not os.path.exists(path):
135
- return Image.new("RGB", (256, 256), color="gray") # Return placeholder if missing
136
- return Image.open(path).convert("RGB")
 
 
 
 
137
 
138
  def load_dataset(captions_path: str, gt_masked_dir: str, gt_unmasked_dir: str, sr_dir: str, original_dir: str, image_5_dir: str) -> List[Sample]:
139
  if not os.path.exists(captions_path):
 
140
  return []
141
 
142
  with open(captions_path, "r", encoding="utf-8") as f:
143
- captions_data = json.load(f)
 
 
 
 
144
 
145
  samples: List[Sample] = []
146
  for item in captions_data:
@@ -149,8 +166,6 @@ def load_dataset(captions_path: str, gt_masked_dir: str, gt_unmasked_dir: str, s
149
  continue
150
 
151
  sample_id = os.path.splitext(base_filename)[0]
152
-
153
- # Define all 5 target paths
154
  paths = {
155
  "masked": os.path.join(gt_masked_dir, base_filename),
156
  "unmasked": os.path.join(gt_unmasked_dir, base_filename),
@@ -159,22 +174,25 @@ def load_dataset(captions_path: str, gt_masked_dir: str, gt_unmasked_dir: str, s
159
  "img5": os.path.join(image_5_dir, base_filename)
160
  }
161
 
162
- # STRICT CHECK: All 5 must exist
163
- if all(os.path.exists(p) for p in paths.values()):
164
- samples.append(
165
- Sample(
166
- sample_id=sample_id,
167
- masked_gt_path=paths["masked"],
168
- unmasked_gt_path=paths["unmasked"],
169
- sr_path=paths["sr"],
170
- original_path=paths["original"],
171
- image_5_path=paths["img5"]
172
- )
 
 
 
 
 
 
173
  )
174
- else:
175
- # Debugging: Find out which folder is the culprit
176
- missing = [k for k, v in paths.items() if not os.path.exists(v)]
177
- print(f"Skipping {base_filename}: Missing in folders {missing}")
178
 
179
  return samples
180
 
@@ -209,11 +227,6 @@ def append_jsonl(path: str, record: Dict[str, Any]):
209
  # LOGIC FOR CONVERTING SLIDERS TO RANK
210
  # ----------------------
211
  def convert_scores_to_rank(s1, s2, s3, s4, s5) -> Dict[str, int]:
212
- """
213
- Takes 5 scores (1-10). Returns a dictionary:
214
- {'image_1': rank, 'image_2': rank...}
215
- where Rank 1 is the Highest Score.
216
- """
217
  scores = [
218
  ("image_1", s1),
219
  ("image_2", s2),
@@ -221,25 +234,21 @@ def convert_scores_to_rank(s1, s2, s3, s4, s5) -> Dict[str, int]:
221
  ("image_4", s4),
222
  ("image_5", s5)
223
  ]
224
- # Sort by score descending (High score first)
225
  scores.sort(key=lambda x: x[1], reverse=True)
226
-
227
  ranks = {}
228
  current_rank = 1
229
  for img_key, score in scores:
230
  ranks[img_key] = current_rank
231
  current_rank += 1
232
-
233
  return ranks
234
 
235
  # ----------------------
236
  # App logic
237
  # ----------------------
238
  def pick_next_index(user_seen: List[str], samples: List[Sample]) -> int:
239
- # Logic: Only pick from samples that match the criteria
240
- seen_set = set(user_seen)
241
- remaining = [i for i, s in enumerate(samples) if s.sample_id not in seen_set]
242
-
243
  if not remaining:
244
  return -1
245
  return random.choice(remaining)
@@ -250,9 +259,9 @@ def start_or_resume(name: str, email: str):
250
 
251
  ensure_paths()
252
  samples = load_dataset(CAPTIONS_JSON_PATH, GT_MASKED_DIR, GT_UNMASKED_DIR, SR_DIR, ORIGINAL_DIR, IMAGE_5_DIR)
253
-
254
  if not samples:
255
- raise gr.Error("No images found. Please check dataset configuration.")
256
 
257
  uid = hash_user_id(name, email)
258
  progress = load_progress()
@@ -263,15 +272,22 @@ def start_or_resume(name: str, email: str):
263
  user_seen: List[str] = progress[uid].get("seen", [])
264
  left = user_left_count(user_seen, samples)
265
 
 
 
 
266
  # If the user has finished their target
267
  if left == 0 and len(user_seen) >= user_target_count(samples):
268
  status = (
269
  f"Welcome back, {name}. You’ve completed all {user_target_count(samples)} images. 🎉\n"
270
  f"Your personal results file: {os.path.join(RESULTS_DIR, f'{uid}.jsonl')}"
271
  )
 
272
  return (
273
- uid, samples, user_seen, -1,
274
- None, None, None, None, None, # images
 
 
 
275
  status,
276
  os.path.join(RESULTS_DIR, f"{uid}.jsonl"),
277
  gr.update(visible=False),
@@ -280,9 +296,20 @@ def start_or_resume(name: str, email: str):
280
  )
281
 
282
  idx = pick_next_index(user_seen, samples)
283
- if idx == -1:
284
- # Case where target not reached but no fresh images left
285
- return (uid, samples, user_seen, -1, None, None, None, None, None, "No more new images available.", "", gr.update(visible=False), gr.update(visible=True), gr.update(visible=True))
 
 
 
 
 
 
 
 
 
 
 
286
 
287
  sample = samples[idx]
288
 
@@ -294,8 +321,13 @@ def start_or_resume(name: str, email: str):
294
  os.makedirs(RESULTS_DIR, exist_ok=True)
295
  user_file_path = os.path.join(RESULTS_DIR, f"{uid}.jsonl")
296
 
 
 
297
  return (
298
- uid, samples, user_seen, idx,
 
 
 
299
  load_image(sample.masked_gt_path),
300
  load_image(sample.unmasked_gt_path),
301
  load_image(sample.sr_path),
@@ -308,6 +340,7 @@ def start_or_resume(name: str, email: str):
308
  gr.update(visible=False),
309
  )
310
 
 
311
  def _save_record_and_progress(
312
  name: str,
313
  email: str,
@@ -325,21 +358,20 @@ def _save_record_and_progress(
325
  if not name or not email:
326
  raise gr.Error("Please enter your name and email.")
327
 
328
- if idx is None or idx < 0 or idx >= len(samples):
 
 
329
  return load_progress()
330
 
331
- # --- CALCULATE RANK FROM SLIDERS ---
332
  rank_dict = convert_scores_to_rank(score_1, score_2, score_3, score_4, score_5)
333
 
334
- sample = samples[idx]
335
  progress = load_progress()
336
  progress.setdefault(uid, {"seen": []})
337
  seen = set(progress[uid].get("seen", []))
338
 
339
  if sample.sample_id in seen:
340
  return progress
341
-
342
- # We allow saving even if target met, just in case, but usually UI stops them.
343
 
344
  record: Dict[str, Any] = {
345
  "timestamp": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
@@ -347,14 +379,6 @@ def _save_record_and_progress(
347
  "name": name if SAVE_PII else None,
348
  "email": email if SAVE_PII else None,
349
  "sample_id": sample.sample_id,
350
- # "image_paths": {
351
- # "masked_gt": sample.masked_gt_path,
352
- # "unmasked_gt": sample.unmasked_gt_path,
353
- # "sr": sample.sr_path,
354
- # "original": sample.original_path,
355
- # "image_5": sample.image_5_path,
356
- # },
357
- # Save raw scores (1-10) and the ranking
358
  "raw_scores": {
359
  "image_1": score_1,
360
  "image_2": score_2,
@@ -363,17 +387,22 @@ def _save_record_and_progress(
363
  "image_5": score_5,
364
  },
365
  "responses": {
366
- # "image_ranking": rank_dict, # Format: {"image_1": 1, "image_2": 4, ...}
367
  "notes": q1_notes or "",
 
368
  },
369
  }
370
 
371
  os.makedirs(RESULTS_DIR, exist_ok=True)
372
  append_jsonl(os.path.join(RESULTS_DIR, f"{uid}.jsonl"), record)
373
  append_jsonl(ALL_RESULTS_JSONL, record)
374
- # push_results_to_private_repo(uid)
375
- thread = threading.Thread(target=push_results_to_private_repo, args=(uid,))
376
- thread.start()
 
 
 
 
 
377
 
378
  seen.add(sample.sample_id)
379
  progress[uid]["seen"] = sorted(list(seen))
@@ -403,7 +432,7 @@ def submit_finish(
403
  return (
404
  user_seen, idx,
405
  gr.update(), gr.update(), gr.update(), gr.update(), gr.update(),
406
- gr.update(),
407
  gr.update(), gr.update(), gr.update(), gr.update(), gr.update(),
408
  gr.update(),
409
  )
@@ -411,15 +440,15 @@ def submit_finish(
411
  return (
412
  user_seen, idx,
413
  gr.update(value=None), gr.update(value=None), gr.update(value=None), gr.update(value=None), gr.update(value=None),
414
- gr.update(value=""),
415
  gr.update(value="Finished!"),
416
  gr.update(value=5), gr.update(value=5), gr.update(value=5), gr.update(value=5), gr.update(value=5),
417
- gr.update(value=None),
418
  )
419
-
420
  def pause_exit(user_seen, samples):
421
  return user_seen, samples
422
-
423
  def submit_next_image(
424
  name: str,
425
  email: str,
@@ -430,75 +459,54 @@ def submit_next_image(
430
  s1: float, s2: float, s3: float, s4: float, s5: float,
431
  q1_notes: str
432
  ):
 
433
  try:
434
  progress = _save_record_and_progress(
435
- name, email, uid, samples, user_seen, idx,
436
  s1, s2, s3, s4, s5,
437
  q1_notes
438
  )
439
  except gr.Error as e:
440
  raise e
441
- # return (
442
- # user_seen, idx,
443
- # gr.update(), gr.update(), gr.update(), gr.update(), gr.update(),
444
- # gr.update(),
445
- # gr.update(), gr.update(), gr.update(), gr.update(), gr.update(),
446
- # gr.update(),
447
- # )
448
-
449
- # left_after = user_left_count(progress[uid]["seen"], samples)
450
- # target = user_target_count(samples)
451
- updated_seen = progress[uid]["seen"]
452
-
453
- if len(updated_seen) >= TARGET_PER_PERSON:
454
  status = (
455
  f"Saved! You’ve completed all {target} images. 🎉 "
456
  f"Click **Exit** to close this session."
457
  )
458
  return (
459
- updated_seen, -1,
460
- None, None, None, None, None, # No more images to load
461
- gr.update(value="Target reached!"), # Status
462
- gr.update(value=""), # Clear notes
463
- 5, 5, 5, 5, 5 # Reset sliders
 
464
  )
465
-
466
- # if left_after == 0:
467
- # status = (
468
- # f"Saved! You’ve completed all {target} images. 🎉 "
469
- # f"Click **Exit** to close this session."
470
- # )
471
- # return (
472
- # updated_seen, -1,
473
- # None, None, None, None, None, # Return None to avoid image load errors
474
- # gr.update(value="Target reached! Processing..."),
475
- # gr.update(value=""),
476
- # 5, 5, 5, 5, 5,
477
- # gr.update(value=None)
478
- # )
479
-
480
- idx_next = pick_next_index(updated_seen, samples)
481
  if idx_next == -1:
482
- return (updated_seen, -1, None, None, None, None, None, "No more images.", "", 5, 5, 5, 5, 5)
483
- # Fallback if no images left
484
- # return (
485
- # progress[uid]["seen"], -1,
486
- # None, None, None, None, None,
487
- # gr.update(value="No more images."),
488
- # gr.update(value=""),
489
- # gr.update(value=5), gr.update(value=5), gr.update(value=5), gr.update(value=5), gr.update(value=5),
490
- # gr.update(value=None),
491
- # )
492
-
493
- sample_next = samples[idx_next]
494
-
495
- status =""
496
- # (
497
- # f"Saved! Personal progress — images left: {left_after} of {target}.\n"
498
- # f"Next sample: {sample_next.sample_id}"
499
- # )
500
  return (
501
- updated_seen, idx_next,
502
  load_image(sample_next.masked_gt_path),
503
  load_image(sample_next.unmasked_gt_path),
504
  load_image(sample_next.sr_path),
@@ -507,40 +515,26 @@ def submit_next_image(
507
  gr.update(value=""),
508
  gr.update(value=""),
509
  5, 5, 5, 5, 5,
510
-
511
  )
512
- # return (
513
- # progress[uid]["seen"], idx_next,
514
- # load_image(sample_next.masked_gt_path),
515
- # load_image(sample_next.unmasked_gt_path),
516
- # load_image(sample_next.sr_path),
517
- # load_image(sample_next.original_path),
518
- # load_image(sample_next.image_5_path),
519
- # gr.update(value=status),
520
- # gr.update(value=""),
521
- # gr.update(value=5), gr.update(value=5), gr.update(value=5), gr.update(value=5), gr.update(value=5),
522
- # gr.update(value=None),
523
- # )
524
 
525
 
526
  def to_thanks(name: str, user_seen: List[str], samples: List[Sample]):
527
- # Calculate how many are left based on the updated seen list
528
- left = user_left_count(user_seen, samples)
529
- target = user_target_count(samples)
530
  if left > 0:
531
- # Message for users who are leaving early
532
- msg = (
533
  f"### ⏸️ Session Paused!\n\n"
534
  f"### ✅ Thanks, {name}! Your progress has been saved.\n\n"
535
- f"We’re grateful for your time and expertise. Our suggested target is "
536
- f"{TARGET_PER_PERSON} images per reviewer"
537
- f"You have **{left}** images left.\n\n"
538
- f"You can close this tab and return whenever you like—just use the same Name and Email to **continue where you left off**.\n\n"
539
- f"If you have questions, issues, or suggestions, please email **{CONTACT_EMAIL}**.\n\n"
540
- f"Click **Start Again** to evaluate another image."
541
  )
542
  else:
543
- # Message for users who completed the target
544
  msg = (
545
  f"### ✅ All Done, {name}!\n\n"
546
  f"You’ve completed the target of **{target}** images. Your responses are securely saved.\n\n"
@@ -553,17 +547,12 @@ def hide_thanks():
553
  return gr.update(visible=False)
554
 
555
  def maybe_show_thanks(name: str, seen: List[str], samples: List[Sample]):
556
- # Check if the user has reached the target
557
  if len(set(seen or [])) >= TARGET_PER_PERSON:
558
- return to_thanks(name, seen, samples)
559
-
560
- # If not done, keep evaluation panel visible
561
  return gr.update(visible=True), gr.update(visible=False), gr.update()
562
-
563
  def reset_to_start():
564
- """
565
- Clears inputs and resets the view to the login page.
566
- """
567
  return (
568
  gr.update(value=""), # Clear Name
569
  gr.update(value=""), # Clear Email
@@ -583,14 +572,14 @@ with gr.Blocks(title="RTS Human Evaluation", theme=gr.themes.Soft()) as demo:
583
 
584
  ### 👋 Welcome, and thanks for lending your expertise!
585
  We’re inviting domain experts to help evaluate satellite image patches for RTS.
586
-
587
  ---
588
 
589
  ### 📋 Instructions
590
  * **Suggested target:** ~{TARGET_PER_PERSON} images per reviewer.
591
  * **The Task:** For each set, you will see 5 variations of the same satellite image.
592
  * **Rating:** Rate each image from **1 (Poor)** to **10 (Excellent)** based on how clearly the RTS feature (indicated by the **Red Box**) is depicted.
593
-
594
  ### ⏸️ Saving & Resuming
595
  * **Automatic Saving:** Your progress is saved automatically after every "Submit".
596
  * **Take a Break:** You can close this tab at any time.
@@ -598,7 +587,7 @@ with gr.Blocks(title="RTS Human Evaluation", theme=gr.themes.Soft()) as demo:
598
 
599
  ---
600
  **Questions or issues?** Email **{CONTACT_EMAIL}** — we appreciate your feedback and suggestions.
601
-
602
  **Ready?** Enter your details below to begin.
603
  """
604
  )
@@ -618,39 +607,37 @@ with gr.Blocks(title="RTS Human Evaluation", theme=gr.themes.Soft()) as demo:
618
 
619
  eval_panel = gr.Group(visible=False)
620
  with eval_panel:
621
-
622
- # --- NEW LAYOUT: 5 COLUMNS, 1-10 SLIDERS ---
623
  gr.Markdown(
624
  """
625
  Focus your attention on the area inside the **Red Box**. This marks the potential location of the Retrogressive Thaw Slump (RTS). Compare the five images below. Rate how clearly and accurately each image depicts the **RTS** feature.
626
-
627
  **Rating Scale (1 - 10):**
628
  * **10 (Excellent):** The RTS feature is sharp, distinct, and clearly visible.
629
  * **1 (Poor):** The RTS feature is blurry, distorted, or impossible to distinguish.
630
  """
631
  )
632
-
633
  with gr.Row():
634
  with gr.Column(scale=1, min_width=150):
635
  gr.Markdown("<div style='text-align:center; font-weight:600;'>Image 1</div>")
636
  image_1 = gr.Image(show_label=False, interactive=False, height=256, show_download_button=False)
637
  score_1 = gr.Slider(minimum=1, maximum=10, step=1, value=5, label="Score (1-10)")
638
-
639
  with gr.Column(scale=1, min_width=150):
640
  gr.Markdown("<div style='text-align:center; font-weight:600;'>Image 2</div>")
641
  image_2 = gr.Image(show_label=False, interactive=False, height=256, show_download_button=False)
642
  score_2 = gr.Slider(minimum=1, maximum=10, step=1, value=5, label="Score (1-10)")
643
-
644
  with gr.Column(scale=1, min_width=150):
645
  gr.Markdown("<div style='text-align:center; font-weight:600;'>Image 3</div>")
646
  image_3 = gr.Image(show_label=False, interactive=False, height=256, show_download_button=False)
647
  score_3 = gr.Slider(minimum=1, maximum=10, step=1, value=5, label="Score (1-10)")
648
-
649
  with gr.Column(scale=1, min_width=150):
650
  gr.Markdown("<div style='text-align:center; font-weight:600;'>Image 4</div>")
651
  image_4 = gr.Image(show_label=False, interactive=False, height=256, show_download_button=False)
652
  score_4 = gr.Slider(minimum=1, maximum=10, step=1, value=5, label="Score (1-10)")
653
-
654
  with gr.Column(scale=1, min_width=150):
655
  gr.Markdown("<div style='text-align:center; font-weight:600;'>Image 5</div>")
656
  image_5 = gr.Image(show_label=False, interactive=False, height=256, show_download_button=False)
@@ -661,19 +648,18 @@ with gr.Blocks(title="RTS Human Evaluation", theme=gr.themes.Soft()) as demo:
661
  lines=2,
662
  placeholder="If there are multiple RTS or ambiguities, please note here."
663
  )
664
-
665
  with gr.Row():
666
  submit_next_btn = gr.Button("Submit & Next Image", variant="primary")
667
  pause_exit_btn = gr.Button("Exit", variant="secondary")
668
 
669
- # your_jsonl_path = gr.Textbox(label="Your results file path (for reference)", interactive=False)
670
  your_jsonl_path = gr.State()
 
671
  with gr.Group(visible=False) as thanks_group:
672
  thanks_md = gr.Markdown("### ✅ Thanks! Your responses were saved.\n\nClick **Start Again** to evaluate another image.")
673
  restart_btn = gr.Button("Start Again", variant="primary")
674
 
675
  # --- Wiring ---
676
-
677
  start_event = start_btn.click(
678
  start_or_resume,
679
  inputs=[name, email],
@@ -692,7 +678,7 @@ with gr.Blocks(title="RTS Human Evaluation", theme=gr.themes.Soft()) as demo:
692
  inputs=[state_seen, state_samples],
693
  outputs=[state_seen, state_samples],
694
  )
695
-
696
  # 2. Then show the "Thanks/Resume" screen with the 'how many left' message
697
  pause_event.then(
698
  to_thanks,
@@ -715,7 +701,6 @@ with gr.Blocks(title="RTS Human Evaluation", theme=gr.themes.Soft()) as demo:
715
  outputs=[eval_panel, thanks_group, thanks_md],
716
  )
717
 
718
- # --- CHANGED: Calls reset_to_start instead of start_or_resume ---
719
  restart_event = restart_btn.click(
720
  reset_to_start,
721
  inputs=[],
@@ -727,16 +712,13 @@ with gr.Blocks(title="RTS Human Evaluation", theme=gr.themes.Soft()) as demo:
727
  )
728
 
729
  if __name__ == "__main__":
730
- # --- DYNAMIC READ FROM HF_RESULTS_REPO ---
731
  if HF_RESULTS_REPO:
732
  from huggingface_hub import snapshot_download
733
- # print(f"Reading images and metadata from: {HF_RESULTS_REPO}...")
734
  try:
735
- # This pulls your repo's 'data' folder into the current workspace
736
  snapshot_download(
737
  repo_id=HF_RESULTS_REPO,
738
  repo_type="dataset",
739
- local_dir=".",
740
  allow_patterns=["data/*", "results/*"],
741
  token=HF_TOKEN
742
  )
@@ -744,9 +726,8 @@ if __name__ == "__main__":
744
  print(f"Error reading from HF: {e}")
745
 
746
  ensure_paths()
747
- # Pre-check dataset load from the newly downloaded files
748
  _ = load_dataset(CAPTIONS_JSON_PATH, GT_MASKED_DIR, GT_UNMASKED_DIR, SR_DIR, ORIGINAL_DIR, IMAGE_5_DIR)
749
-
750
- print("✅ Successfully synced with HF Repo. Launching app.")
751
  demo.queue()
752
- demo.launch()
 
21
  _hf_api = HfApi(token=HF_TOKEN)
22
 
23
  # --- Main settings ---
 
24
  TARGET_PER_PERSON = 20
25
  CONTACT_EMAIL = "ffallah@asu.edu"
26
 
27
  # --- Paths ---
 
28
  CAPTIONS_JSON_PATH = os.environ.get("CAPTIONS_JSON_PATH", "data/captions.json")
29
 
 
30
  GT_MASKED_DIR = "data/gt_b" # Image 1
31
  GT_UNMASKED_DIR = "data/adc_b" # Image 2
32
  SR_DIR = "data/sr_b" # Image 3
 
37
  RESULTS_DIR = "results"
38
  PROGRESS_PATH = os.path.join(RESULTS_DIR, "progress.json")
39
  ALL_RESULTS_JSONL = os.path.join(RESULTS_DIR, "all_results.jsonl")
40
+ SAVE_PII = True
41
 
42
  WRITE_LOCK = threading.Lock()
43
+ STRICT_ENFORCEMENT = False
44
 
45
  # ----------------------
46
  # Data model
 
57
  # ----------------------
58
  # Helpers
59
  # ----------------------
60
+ def ensure_sample_objects(samples_input):
61
+ """
62
+ Accepts either:
63
+ - list[Sample] (already objects), or
64
+ - list[dict] (serialized Sample.__dict__)
65
+ Returns list[Sample].
66
+ """
67
+ if not samples_input:
68
+ return []
69
+ if isinstance(samples_input, list):
70
+ if len(samples_input) == 0:
71
+ return []
72
+ first = samples_input[0]
73
+ if isinstance(first, dict):
74
+ try:
75
+ return [Sample(**s) for s in samples_input]
76
+ except Exception:
77
+ # fall through to returning empty to avoid crashes
78
+ return []
79
+ elif isinstance(first, Sample):
80
+ return samples_input
81
+ return []
82
+
83
  def user_target_count(samples: List[Sample]) -> int:
 
 
 
84
  return min(len(samples), TARGET_PER_PERSON)
85
 
86
  def user_left_count(user_seen: List[str], samples: List[Sample]) -> int:
87
  target = user_target_count(samples)
88
+ seen = set(user_seen or [])
89
+ allowed_ids = {s.sample_id for s in samples}
 
90
  seen_in_allowed = len([sid for sid in seen if sid in allowed_ids])
91
  return max(0, target - seen_in_allowed)
92
 
 
100
  if not HF_TOKEN or not HF_RESULTS_REPO:
101
  return
102
  try:
 
103
  os.makedirs(RESULTS_DIR, exist_ok=True)
 
 
 
 
104
  user_file = os.path.join(RESULTS_DIR, f"{uid}.jsonl")
 
 
105
 
106
  ops = [
107
  CommitOperationAdd(
 
136
  (IMAGE_5_DIR, "IMAGE_5_DIR"),
137
  ]:
138
  if not os.path.isdir(pth):
 
139
  print(f"Warning: Directory '{pth}' for {name} not found.")
140
 
141
  def load_image(path: str) -> Image.Image:
142
+ if not path or not os.path.exists(path):
143
+ # return a simple placeholder image so UI doesn't crash
144
+ return Image.new("RGB", (256, 256), color="gray")
145
+ try:
146
+ return Image.open(path).convert("RGB")
147
+ except Exception:
148
+ return Image.new("RGB", (256, 256), color="gray")
149
 
150
  def load_dataset(captions_path: str, gt_masked_dir: str, gt_unmasked_dir: str, sr_dir: str, original_dir: str, image_5_dir: str) -> List[Sample]:
151
  if not os.path.exists(captions_path):
152
+ print(f"Captions file not found at {captions_path}")
153
  return []
154
 
155
  with open(captions_path, "r", encoding="utf-8") as f:
156
+ try:
157
+ captions_data = json.load(f)
158
+ except Exception:
159
+ print("Failed to parse captions JSON.")
160
+ return []
161
 
162
  samples: List[Sample] = []
163
  for item in captions_data:
 
166
  continue
167
 
168
  sample_id = os.path.splitext(base_filename)[0]
 
 
169
  paths = {
170
  "masked": os.path.join(gt_masked_dir, base_filename),
171
  "unmasked": os.path.join(gt_unmasked_dir, base_filename),
 
174
  "img5": os.path.join(image_5_dir, base_filename)
175
  }
176
 
177
+ # If strict enforcement required, require all five files to exist.
178
+ if STRICT_ENFORCEMENT:
179
+ if not all(os.path.exists(p) for p in paths.values()):
180
+ missing = [k for k, v in paths.items() if not os.path.exists(v)]
181
+ print(f"Skipping {base_filename}: Missing in folders {missing}")
182
+ continue
183
+
184
+ # In non-strict mode, it's okay to include samples even if some files missing;
185
+ # we will supply placeholders at load time.
186
+ samples.append(
187
+ Sample(
188
+ sample_id=sample_id,
189
+ masked_gt_path=paths["masked"],
190
+ unmasked_gt_path=paths["unmasked"],
191
+ sr_path=paths["sr"],
192
+ original_path=paths["original"],
193
+ image_5_path=paths["img5"]
194
  )
195
+ )
 
 
 
196
 
197
  return samples
198
 
 
227
  # LOGIC FOR CONVERTING SLIDERS TO RANK
228
  # ----------------------
229
  def convert_scores_to_rank(s1, s2, s3, s4, s5) -> Dict[str, int]:
 
 
 
 
 
230
  scores = [
231
  ("image_1", s1),
232
  ("image_2", s2),
 
234
  ("image_4", s4),
235
  ("image_5", s5)
236
  ]
 
237
  scores.sort(key=lambda x: x[1], reverse=True)
 
238
  ranks = {}
239
  current_rank = 1
240
  for img_key, score in scores:
241
  ranks[img_key] = current_rank
242
  current_rank += 1
 
243
  return ranks
244
 
245
  # ----------------------
246
  # App logic
247
  # ----------------------
248
  def pick_next_index(user_seen: List[str], samples: List[Sample]) -> int:
249
+ samples_obj = ensure_sample_objects(samples)
250
+ seen_set = set(user_seen or [])
251
+ remaining = [i for i, s in enumerate(samples_obj) if s.sample_id not in seen_set]
 
252
  if not remaining:
253
  return -1
254
  return random.choice(remaining)
 
259
 
260
  ensure_paths()
261
  samples = load_dataset(CAPTIONS_JSON_PATH, GT_MASKED_DIR, GT_UNMASKED_DIR, SR_DIR, ORIGINAL_DIR, IMAGE_5_DIR)
262
+
263
  if not samples:
264
+ raise gr.Error("No images found. Please check dataset configuration.")
265
 
266
  uid = hash_user_id(name, email)
267
  progress = load_progress()
 
272
  user_seen: List[str] = progress[uid].get("seen", [])
273
  left = user_left_count(user_seen, samples)
274
 
275
+ # placeholder image to avoid Gradio trying to load None
276
+ placeholder_img = Image.new("RGB", (256, 256), color="gray")
277
+
278
  # If the user has finished their target
279
  if left == 0 and len(user_seen) >= user_target_count(samples):
280
  status = (
281
  f"Welcome back, {name}. You’ve completed all {user_target_count(samples)} images. 🎉\n"
282
  f"Your personal results file: {os.path.join(RESULTS_DIR, f'{uid}.jsonl')}"
283
  )
284
+ samples_serialized = [s.__dict__ for s in samples]
285
  return (
286
+ uid,
287
+ samples_serialized,
288
+ user_seen,
289
+ -1,
290
+ placeholder_img, placeholder_img, placeholder_img, placeholder_img, placeholder_img,
291
  status,
292
  os.path.join(RESULTS_DIR, f"{uid}.jsonl"),
293
  gr.update(visible=False),
 
296
  )
297
 
298
  idx = pick_next_index(user_seen, samples)
299
+ if idx == -1:
300
+ samples_serialized = [s.__dict__ for s in samples]
301
+ return (
302
+ uid,
303
+ samples_serialized,
304
+ user_seen,
305
+ -1,
306
+ placeholder_img, placeholder_img, placeholder_img, placeholder_img, placeholder_img,
307
+ "No more new images available.",
308
+ "",
309
+ gr.update(visible=False),
310
+ gr.update(visible=True),
311
+ gr.update(visible=True)
312
+ )
313
 
314
  sample = samples[idx]
315
 
 
321
  os.makedirs(RESULTS_DIR, exist_ok=True)
322
  user_file_path = os.path.join(RESULTS_DIR, f"{uid}.jsonl")
323
 
324
+ samples_serialized = [s.__dict__ for s in samples]
325
+
326
  return (
327
+ uid,
328
+ samples_serialized,
329
+ user_seen,
330
+ idx,
331
  load_image(sample.masked_gt_path),
332
  load_image(sample.unmasked_gt_path),
333
  load_image(sample.sr_path),
 
340
  gr.update(visible=False),
341
  )
342
 
343
+
344
  def _save_record_and_progress(
345
  name: str,
346
  email: str,
 
358
  if not name or not email:
359
  raise gr.Error("Please enter your name and email.")
360
 
361
+ samples_obj = ensure_sample_objects(samples)
362
+
363
+ if idx is None or idx < 0 or idx >= len(samples_obj):
364
  return load_progress()
365
 
 
366
  rank_dict = convert_scores_to_rank(score_1, score_2, score_3, score_4, score_5)
367
 
368
+ sample = samples_obj[idx]
369
  progress = load_progress()
370
  progress.setdefault(uid, {"seen": []})
371
  seen = set(progress[uid].get("seen", []))
372
 
373
  if sample.sample_id in seen:
374
  return progress
 
 
375
 
376
  record: Dict[str, Any] = {
377
  "timestamp": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
 
379
  "name": name if SAVE_PII else None,
380
  "email": email if SAVE_PII else None,
381
  "sample_id": sample.sample_id,
 
 
 
 
 
 
 
 
382
  "raw_scores": {
383
  "image_1": score_1,
384
  "image_2": score_2,
 
387
  "image_5": score_5,
388
  },
389
  "responses": {
 
390
  "notes": q1_notes or "",
391
+ "image_ranking": rank_dict,
392
  },
393
  }
394
 
395
  os.makedirs(RESULTS_DIR, exist_ok=True)
396
  append_jsonl(os.path.join(RESULTS_DIR, f"{uid}.jsonl"), record)
397
  append_jsonl(ALL_RESULTS_JSONL, record)
398
+
399
+ # start background push but don't let failures crash the app
400
+ try:
401
+ thread = threading.Thread(target=push_results_to_private_repo, args=(uid,))
402
+ thread.daemon = True
403
+ thread.start()
404
+ except Exception:
405
+ pass
406
 
407
  seen.add(sample.sample_id)
408
  progress[uid]["seen"] = sorted(list(seen))
 
432
  return (
433
  user_seen, idx,
434
  gr.update(), gr.update(), gr.update(), gr.update(), gr.update(),
435
+ gr.update(),
436
  gr.update(), gr.update(), gr.update(), gr.update(), gr.update(),
437
  gr.update(),
438
  )
 
440
  return (
441
  user_seen, idx,
442
  gr.update(value=None), gr.update(value=None), gr.update(value=None), gr.update(value=None), gr.update(value=None),
443
+ gr.update(value=""),
444
  gr.update(value="Finished!"),
445
  gr.update(value=5), gr.update(value=5), gr.update(value=5), gr.update(value=5), gr.update(value=5),
446
+ gr.update(value=None),
447
  )
448
+
449
  def pause_exit(user_seen, samples):
450
  return user_seen, samples
451
+
452
  def submit_next_image(
453
  name: str,
454
  email: str,
 
459
  s1: float, s2: float, s3: float, s4: float, s5: float,
460
  q1_notes: str
461
  ):
462
+ samples_obj = ensure_sample_objects(samples)
463
  try:
464
  progress = _save_record_and_progress(
465
+ name, email, uid, samples_obj, user_seen, idx,
466
  s1, s2, s3, s4, s5,
467
  q1_notes
468
  )
469
  except gr.Error as e:
470
  raise e
471
+
472
+ seen_list = progress.get(uid, {}).get("seen", [])
473
+ left_after = user_left_count(seen_list, samples_obj)
474
+ target = user_target_count(samples_obj)
475
+
476
+ # placeholder image to avoid Gradio trying to load None
477
+ placeholder_img = Image.new("RGB", (256, 256), color="gray")
478
+
479
+ # If user reached the target, return placeholders for images and let the then() chain show thanks
480
+ if left_after == 0:
 
 
 
481
  status = (
482
  f"Saved! You’ve completed all {target} images. 🎉 "
483
  f"Click **Exit** to close this session."
484
  )
485
  return (
486
+ seen_list, -1,
487
+ placeholder_img, placeholder_img, placeholder_img, placeholder_img, placeholder_img,
488
+ gr.update(value=status),
489
+ gr.update(value=""),
490
+ 5, 5, 5, 5, 5,
491
+ gr.update(value=None)
492
  )
493
+
494
+ idx_next = pick_next_index(seen_list, samples_obj)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
495
  if idx_next == -1:
496
+ # no more images but target not met (rare). return placeholders too.
497
+ return (
498
+ seen_list, -1,
499
+ placeholder_img, placeholder_img, placeholder_img, placeholder_img, placeholder_img,
500
+ "No more images.",
501
+ "",
502
+ 5, 5, 5, 5, 5,
503
+ None
504
+ )
505
+
506
+ sample_next = samples_obj[idx_next]
507
+
 
 
 
 
 
 
508
  return (
509
+ seen_list, idx_next,
510
  load_image(sample_next.masked_gt_path),
511
  load_image(sample_next.unmasked_gt_path),
512
  load_image(sample_next.sr_path),
 
515
  gr.update(value=""),
516
  gr.update(value=""),
517
  5, 5, 5, 5, 5,
518
+ gr.update(value=None)
519
  )
 
 
 
 
 
 
 
 
 
 
 
 
520
 
521
 
522
  def to_thanks(name: str, user_seen: List[str], samples: List[Sample]):
523
+ samples_obj = ensure_sample_objects(samples)
524
+ left = user_left_count(user_seen, samples_obj)
525
+ target = user_target_count(samples_obj)
526
  if left > 0:
527
+ msg = (
 
528
  f"### ⏸️ Session Paused!\n\n"
529
  f"### ✅ Thanks, {name}! Your progress has been saved.\n\n"
530
+ f"We’re grateful for your time and expertise. Our suggested target is "
531
+ f"{TARGET_PER_PERSON} images per reviewer.\n\n"
532
+ f"You have **{left}** images left.\n\n"
533
+ f"You can close this tab and return whenever you like—just use the same Name and Email to **continue where you left off**.\n\n"
534
+ f"If you have questions, issues, or suggestions, please email **{CONTACT_EMAIL}**.\n\n"
535
+ f"Click **Start Again** to evaluate another image."
536
  )
537
  else:
 
538
  msg = (
539
  f"### ✅ All Done, {name}!\n\n"
540
  f"You’ve completed the target of **{target}** images. Your responses are securely saved.\n\n"
 
547
  return gr.update(visible=False)
548
 
549
  def maybe_show_thanks(name: str, seen: List[str], samples: List[Sample]):
550
+ samples_obj = ensure_sample_objects(samples)
551
  if len(set(seen or [])) >= TARGET_PER_PERSON:
552
+ return to_thanks(name, seen, samples_obj)
 
 
553
  return gr.update(visible=True), gr.update(visible=False), gr.update()
554
+
555
  def reset_to_start():
 
 
 
556
  return (
557
  gr.update(value=""), # Clear Name
558
  gr.update(value=""), # Clear Email
 
572
 
573
  ### 👋 Welcome, and thanks for lending your expertise!
574
  We’re inviting domain experts to help evaluate satellite image patches for RTS.
575
+
576
  ---
577
 
578
  ### 📋 Instructions
579
  * **Suggested target:** ~{TARGET_PER_PERSON} images per reviewer.
580
  * **The Task:** For each set, you will see 5 variations of the same satellite image.
581
  * **Rating:** Rate each image from **1 (Poor)** to **10 (Excellent)** based on how clearly the RTS feature (indicated by the **Red Box**) is depicted.
582
+
583
  ### ⏸️ Saving & Resuming
584
  * **Automatic Saving:** Your progress is saved automatically after every "Submit".
585
  * **Take a Break:** You can close this tab at any time.
 
587
 
588
  ---
589
  **Questions or issues?** Email **{CONTACT_EMAIL}** — we appreciate your feedback and suggestions.
590
+
591
  **Ready?** Enter your details below to begin.
592
  """
593
  )
 
607
 
608
  eval_panel = gr.Group(visible=False)
609
  with eval_panel:
 
 
610
  gr.Markdown(
611
  """
612
  Focus your attention on the area inside the **Red Box**. This marks the potential location of the Retrogressive Thaw Slump (RTS). Compare the five images below. Rate how clearly and accurately each image depicts the **RTS** feature.
613
+
614
  **Rating Scale (1 - 10):**
615
  * **10 (Excellent):** The RTS feature is sharp, distinct, and clearly visible.
616
  * **1 (Poor):** The RTS feature is blurry, distorted, or impossible to distinguish.
617
  """
618
  )
619
+
620
  with gr.Row():
621
  with gr.Column(scale=1, min_width=150):
622
  gr.Markdown("<div style='text-align:center; font-weight:600;'>Image 1</div>")
623
  image_1 = gr.Image(show_label=False, interactive=False, height=256, show_download_button=False)
624
  score_1 = gr.Slider(minimum=1, maximum=10, step=1, value=5, label="Score (1-10)")
625
+
626
  with gr.Column(scale=1, min_width=150):
627
  gr.Markdown("<div style='text-align:center; font-weight:600;'>Image 2</div>")
628
  image_2 = gr.Image(show_label=False, interactive=False, height=256, show_download_button=False)
629
  score_2 = gr.Slider(minimum=1, maximum=10, step=1, value=5, label="Score (1-10)")
630
+
631
  with gr.Column(scale=1, min_width=150):
632
  gr.Markdown("<div style='text-align:center; font-weight:600;'>Image 3</div>")
633
  image_3 = gr.Image(show_label=False, interactive=False, height=256, show_download_button=False)
634
  score_3 = gr.Slider(minimum=1, maximum=10, step=1, value=5, label="Score (1-10)")
635
+
636
  with gr.Column(scale=1, min_width=150):
637
  gr.Markdown("<div style='text-align:center; font-weight:600;'>Image 4</div>")
638
  image_4 = gr.Image(show_label=False, interactive=False, height=256, show_download_button=False)
639
  score_4 = gr.Slider(minimum=1, maximum=10, step=1, value=5, label="Score (1-10)")
640
+
641
  with gr.Column(scale=1, min_width=150):
642
  gr.Markdown("<div style='text-align:center; font-weight:600;'>Image 5</div>")
643
  image_5 = gr.Image(show_label=False, interactive=False, height=256, show_download_button=False)
 
648
  lines=2,
649
  placeholder="If there are multiple RTS or ambiguities, please note here."
650
  )
651
+
652
  with gr.Row():
653
  submit_next_btn = gr.Button("Submit & Next Image", variant="primary")
654
  pause_exit_btn = gr.Button("Exit", variant="secondary")
655
 
 
656
  your_jsonl_path = gr.State()
657
+
658
  with gr.Group(visible=False) as thanks_group:
659
  thanks_md = gr.Markdown("### ✅ Thanks! Your responses were saved.\n\nClick **Start Again** to evaluate another image.")
660
  restart_btn = gr.Button("Start Again", variant="primary")
661
 
662
  # --- Wiring ---
 
663
  start_event = start_btn.click(
664
  start_or_resume,
665
  inputs=[name, email],
 
678
  inputs=[state_seen, state_samples],
679
  outputs=[state_seen, state_samples],
680
  )
681
+
682
  # 2. Then show the "Thanks/Resume" screen with the 'how many left' message
683
  pause_event.then(
684
  to_thanks,
 
701
  outputs=[eval_panel, thanks_group, thanks_md],
702
  )
703
 
 
704
  restart_event = restart_btn.click(
705
  reset_to_start,
706
  inputs=[],
 
712
  )
713
 
714
  if __name__ == "__main__":
 
715
  if HF_RESULTS_REPO:
716
  from huggingface_hub import snapshot_download
 
717
  try:
 
718
  snapshot_download(
719
  repo_id=HF_RESULTS_REPO,
720
  repo_type="dataset",
721
+ local_dir=".",
722
  allow_patterns=["data/*", "results/*"],
723
  token=HF_TOKEN
724
  )
 
726
  print(f"Error reading from HF: {e}")
727
 
728
  ensure_paths()
 
729
  _ = load_dataset(CAPTIONS_JSON_PATH, GT_MASKED_DIR, GT_UNMASKED_DIR, SR_DIR, ORIGINAL_DIR, IMAGE_5_DIR)
730
+
731
+ print("✅ Launching app.")
732
  demo.queue()
733
+ demo.launch()