forouzanfallah commited on
Commit
bfa3575
·
verified ·
1 Parent(s): 47ad573

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +140 -920
app.py CHANGED
@@ -1,1503 +1,723 @@
1
  import os
2
-
3
  import json
4
-
5
  import hashlib
6
-
7
  import random
8
-
9
  import threading
10
-
11
  import time
12
-
13
  from dataclasses import dataclass
14
-
15
  from typing import List, Dict, Any
16
 
17
-
18
-
19
  import gradio as gr
20
-
21
  from PIL import Image
22
-
23
  from huggingface_hub import HfApi, CommitOperationAdd
24
 
25
-
26
-
27
  # ----------------------
28
-
29
  # Configuration
30
-
31
  # ----------------------
32
-
33
  # --- HF Repo ---
34
-
35
  HF_RESULTS_REPO = os.getenv("HF_RESULTS_REPO")
36
-
37
  HF_RESULTS_REPO_TYPE = "dataset"
38
-
39
  HF_TOKEN = os.getenv("HF_TOKEN")
40
-
41
  _hf_api = HfApi(token=HF_TOKEN)
42
 
43
-
44
-
45
  # --- Main settings ---
46
-
47
- # UPDATED: Set to 20 as requested
48
-
49
  TARGET_PER_PERSON = 20
50
-
51
  CONTACT_EMAIL = "ffallah@asu.edu"
52
 
53
-
54
-
55
  # --- Paths ---
56
-
57
- # We still use the JSON file to get the list of filenames,
58
-
59
  CAPTIONS_JSON_PATH = os.environ.get("CAPTIONS_JSON_PATH", "data/captions.json")
60
 
61
-
62
-
63
- # Folders with matching filenames across all FIVE folders:
64
-
65
  GT_MASKED_DIR = "data/gt_b" # Image 1
66
-
67
  GT_UNMASKED_DIR = "data/adc_b" # Image 2
68
-
69
  SR_DIR = "data/sr_b" # Image 3
70
-
71
  ORIGINAL_DIR = "data/lr_b" # Image 4
72
-
73
  IMAGE_5_DIR = "data/see_b" # Image 5
74
 
75
-
76
-
77
  # --- Results ---
78
-
79
  RESULTS_DIR = "results"
80
-
81
  PROGRESS_PATH = os.path.join(RESULTS_DIR, "progress.json")
82
-
83
  ALL_RESULTS_JSONL = os.path.join(RESULTS_DIR, "all_results.jsonl")
84
-
85
- SAVE_PII = True # Set to False to anonymize saved name/email
86
-
87
-
88
 
89
  WRITE_LOCK = threading.Lock()
90
-
91
- STRICT_ENFORCEMENT = False # UPDATED: Set to False to prevent crashes if dataset size != target
92
-
93
-
94
 
95
  # ----------------------
96
-
97
  # Data model
98
-
99
  # ----------------------
100
-
101
  @dataclass
102
-
103
  class Sample:
104
-
105
  sample_id: str
106
-
107
  masked_gt_path: str # Image 1
108
-
109
  unmasked_gt_path: str # Image 2
110
-
111
  sr_path: str # Image 3
112
-
113
  original_path: str # Image 4
114
-
115
  image_5_path: str # Image 5
116
 
117
-
118
-
119
  # ----------------------
120
-
121
  # Helpers
122
-
123
  # ----------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
 
125
  def user_target_count(samples: List[Sample]) -> int:
126
-
127
- # UPDATED: Logic to be more robust.
128
-
129
- # It takes the minimum of the available samples or the target.
130
-
131
- # This prevents crashes if you have fewer or more images than 20.
132
-
133
  return min(len(samples), TARGET_PER_PERSON)
134
 
135
-
136
-
137
  def user_left_count(user_seen: List[str], samples: List[Sample]) -> int:
138
-
139
  target = user_target_count(samples)
140
-
141
- seen = set(user_seen)
142
-
143
- # Only count seen images that are actually in the current sample list
144
-
145
- allowed_ids = {s.sample_id for s in samples}
146
-
147
  seen_in_allowed = len([sid for sid in seen if sid in allowed_ids])
148
-
149
  return max(0, target - seen_in_allowed)
150
 
151
-
152
-
153
  def _ensure_private_repo(repo_id: str):
154
-
155
  try:
156
-
157
  _hf_api.repo_info(repo_id, repo_type=HF_RESULTS_REPO_TYPE)
158
-
159
  except Exception:
160
-
161
  _hf_api.create_repo(repo_id=repo_id, repo_type=HF_RESULTS_REPO_TYPE, private=True)
162
 
163
-
164
-
165
  def push_results_to_private_repo(uid: str):
166
-
167
  if not HF_TOKEN or not HF_RESULTS_REPO:
168
-
169
  return
170
-
171
  try:
172
-
173
- # _ensure_private_repo(HF_RESULTS_REPO)
174
-
175
  os.makedirs(RESULTS_DIR, exist_ok=True)
176
-
177
- # # Ensure files exist
178
-
179
- # if not os.path.exists(ALL_RESULTS_JSONL):
180
-
181
- # open(ALL_RESULTS_JSONL, "a").close()
182
-
183
-
184
-
185
  user_file = os.path.join(RESULTS_DIR, f"{uid}.jsonl")
186
 
187
- # if not os.path.exists(user_file):
188
-
189
- # open(user_file, "a").close()
190
-
191
-
192
-
193
  ops = [
194
-
195
  CommitOperationAdd(
196
-
197
  path_in_repo="results/all_results.jsonl",
198
-
199
  path_or_fileobj=ALL_RESULTS_JSONL
200
-
201
  ),
202
-
203
  CommitOperationAdd(
204
-
205
  path_in_repo=f"results/users/{uid}.jsonl",
206
-
207
  path_or_fileobj=user_file
208
-
209
  ),
210
-
211
  CommitOperationAdd(
212
-
213
  path_in_repo="results/progress.json",
214
-
215
  path_or_fileobj=PROGRESS_PATH
216
-
217
  ),
218
-
219
  ]
220
-
221
  _hf_api.create_commit(
222
-
223
  repo_id=HF_RESULTS_REPO,
224
-
225
  repo_type=HF_RESULTS_REPO_TYPE,
226
-
227
  operations=ops,
228
-
229
  commit_message="Update RTS eval results"
230
-
231
  )
232
-
233
  except Exception as e:
234
-
235
  print("[WARN] push_results_to_private_repo failed:", e)
236
 
237
-
238
-
239
  def ensure_paths():
240
-
241
  os.makedirs(RESULTS_DIR, exist_ok=True)
242
-
243
  for pth, name in [
244
-
245
  (GT_MASKED_DIR, "GT_MASKED_DIR"),
246
-
247
  (GT_UNMASKED_DIR, "GT_UNMASKED_DIR"),
248
-
249
  (SR_DIR, "SR_DIR"),
250
-
251
  (ORIGINAL_DIR, "ORIGINAL_DIR"),
252
-
253
  (IMAGE_5_DIR, "IMAGE_5_DIR"),
254
-
255
  ]:
256
-
257
  if not os.path.isdir(pth):
258
-
259
- # We just print a warning instead of crashing, to allow partial setup
260
-
261
  print(f"Warning: Directory '{pth}' for {name} not found.")
262
 
263
-
264
-
265
  def load_image(path: str) -> Image.Image:
266
-
267
- if not os.path.exists(path):
268
-
269
- return Image.new("RGB", (256, 256), color="gray") # Return placeholder if missing
270
-
271
- return Image.open(path).convert("RGB")
272
-
273
-
274
 
275
  def load_dataset(captions_path: str, gt_masked_dir: str, gt_unmasked_dir: str, sr_dir: str, original_dir: str, image_5_dir: str) -> List[Sample]:
276
-
277
  if not os.path.exists(captions_path):
278
-
279
  return []
280
 
281
-
282
-
283
  with open(captions_path, "r", encoding="utf-8") as f:
284
-
285
- captions_data = json.load(f)
286
-
287
-
 
288
 
289
  samples: List[Sample] = []
290
-
291
  for item in captions_data:
292
-
293
  base_filename = item.get("image")
294
-
295
  if not base_filename:
296
-
297
  continue
298
 
299
-
300
-
301
  sample_id = os.path.splitext(base_filename)[0]
302
-
303
-
304
-
305
- # Define all 5 target paths
306
-
307
  paths = {
308
-
309
  "masked": os.path.join(gt_masked_dir, base_filename),
310
-
311
  "unmasked": os.path.join(gt_unmasked_dir, base_filename),
312
-
313
  "sr": os.path.join(sr_dir, base_filename),
314
-
315
  "original": os.path.join(original_dir, base_filename),
316
-
317
  "img5": os.path.join(image_5_dir, base_filename)
318
-
319
  }
320
 
321
-
322
-
323
- # STRICT CHECK: All 5 must exist
324
-
325
- if all(os.path.exists(p) for p in paths.values()):
326
-
327
- samples.append(
328
-
329
- Sample(
330
-
331
- sample_id=sample_id,
332
-
333
- masked_gt_path=paths["masked"],
334
-
335
- unmasked_gt_path=paths["unmasked"],
336
-
337
- sr_path=paths["sr"],
338
-
339
- original_path=paths["original"],
340
-
341
- image_5_path=paths["img5"]
342
-
343
- )
344
-
345
  )
346
-
347
- else:
348
-
349
- # Debugging: Find out which folder is the culprit
350
-
351
- missing = [k for k, v in paths.items() if not os.path.exists(v)]
352
-
353
- print(f"Skipping {base_filename}: Missing in folders {missing}")
354
-
355
-
356
 
357
  return samples
358
 
359
-
360
-
361
  # ----------------------
362
-
363
  # Progress & results I/O
364
-
365
  # ----------------------
366
-
367
  def hash_user_id(name: str, email: str) -> str:
368
-
369
  norm = (name or "").strip().lower() + "|" + (email or "").strip().lower()
370
-
371
  return hashlib.sha256(norm.encode("utf-8")).hexdigest()[:16]
372
 
373
-
374
-
375
  def load_progress() -> Dict[str, Dict[str, Any]]:
376
-
377
  if not os.path.exists(PROGRESS_PATH):
378
-
379
  return {}
380
-
381
  try:
382
-
383
  with open(PROGRESS_PATH, "r", encoding="utf-8") as f:
384
-
385
  return json.load(f)
386
-
387
  except Exception:
388
-
389
  return {}
390
 
391
-
392
-
393
  def save_progress(progress: Dict[str, Dict[str, Any]]):
394
-
395
  with WRITE_LOCK:
396
-
397
  with open(PROGRESS_PATH, "w", encoding="utf-8") as f:
398
-
399
  json.dump(progress, f, ensure_ascii=False, indent=2)
400
 
401
-
402
-
403
  def append_jsonl(path: str, record: Dict[str, Any]):
404
-
405
  line = json.dumps(record, ensure_ascii=False)
406
-
407
  with WRITE_LOCK:
408
-
409
  with open(path, "a", encoding="utf-8") as f:
410
-
411
  f.write(line + "\n")
412
 
413
-
414
-
415
  # ----------------------
416
-
417
  # LOGIC FOR CONVERTING SLIDERS TO RANK
418
-
419
  # ----------------------
420
-
421
  def convert_scores_to_rank(s1, s2, s3, s4, s5) -> Dict[str, int]:
422
-
423
- """
424
-
425
- Takes 5 scores (1-10). Returns a dictionary:
426
-
427
- {'image_1': rank, 'image_2': rank...}
428
-
429
- where Rank 1 is the Highest Score.
430
-
431
- """
432
-
433
  scores = [
434
-
435
  ("image_1", s1),
436
-
437
  ("image_2", s2),
438
-
439
  ("image_3", s3),
440
-
441
  ("image_4", s4),
442
-
443
  ("image_5", s5)
444
-
445
  ]
446
-
447
- # Sort by score descending (High score first)
448
-
449
  scores.sort(key=lambda x: x[1], reverse=True)
450
-
451
-
452
-
453
  ranks = {}
454
-
455
  current_rank = 1
456
-
457
  for img_key, score in scores:
458
-
459
  ranks[img_key] = current_rank
460
-
461
  current_rank += 1
462
-
463
-
464
-
465
  return ranks
466
 
467
-
468
-
469
  # ----------------------
470
-
471
  # App logic
472
-
473
  # ----------------------
474
-
475
  def pick_next_index(user_seen: List[str], samples: List[Sample]) -> int:
476
-
477
- # Logic: Only pick from samples that match the criteria
478
-
479
- seen_set = set(user_seen)
480
-
481
  remaining = [i for i, s in enumerate(samples) if s.sample_id not in seen_set]
482
-
483
-
484
-
485
  if not remaining:
486
-
487
  return -1
488
-
489
  return random.choice(remaining)
490
 
491
-
492
-
493
  def start_or_resume(name: str, email: str):
494
-
495
  if not name or not email:
496
-
497
  raise gr.Error("Please enter your name and email to begin.")
498
 
499
-
500
-
501
  ensure_paths()
502
-
503
  samples = load_dataset(CAPTIONS_JSON_PATH, GT_MASKED_DIR, GT_UNMASKED_DIR, SR_DIR, ORIGINAL_DIR, IMAGE_5_DIR)
504
 
505
-
506
-
507
  if not samples:
508
-
509
- raise gr.Error("No images found. Please check dataset configuration.")
510
-
511
-
512
 
513
  uid = hash_user_id(name, email)
514
-
515
  progress = load_progress()
516
-
517
  if uid not in progress:
518
-
519
  progress[uid] = {"seen": []}
520
-
521
  save_progress(progress)
522
 
523
-
524
-
525
  user_seen: List[str] = progress[uid].get("seen", [])
526
-
527
  left = user_left_count(user_seen, samples)
528
 
529
-
 
530
 
531
  # If the user has finished their target
532
-
533
  if left == 0 and len(user_seen) >= user_target_count(samples):
534
-
535
  status = (
536
-
537
  f"Welcome back, {name}. You’ve completed all {user_target_count(samples)} images. 🎉\n"
538
-
539
  f"Your personal results file: {os.path.join(RESULTS_DIR, f'{uid}.jsonl')}"
540
-
541
  )
542
-
543
  return (
544
-
545
- uid, samples, user_seen, -1,
546
-
547
- None, None, None, None, None, # images
548
-
549
  status,
550
-
551
  os.path.join(RESULTS_DIR, f"{uid}.jsonl"),
552
-
553
  gr.update(visible=False),
554
-
555
  gr.update(visible=True),
556
-
557
  gr.update(visible=True),
558
-
559
  )
560
 
561
-
562
-
563
  idx = pick_next_index(user_seen, samples)
564
-
565
- if idx == -1:
566
-
567
- # Case where target not reached but no fresh images left
568
-
569
- return (uid, samples, user_seen, -1, None, None, None, None, None, "No more new images available.", "", gr.update(visible=False), gr.update(visible=True), gr.update(visible=True))
570
-
571
-
 
 
 
 
 
572
 
573
  sample = samples[idx]
574
 
575
-
576
-
577
  status = (
578
-
579
  f"Welcome, {name}. Personal progress — images left: {left} of {user_target_count(samples)}.\n"
580
-
581
  f"Current sample: {sample.sample_id}"
582
-
583
  )
584
 
585
-
586
-
587
  os.makedirs(RESULTS_DIR, exist_ok=True)
588
-
589
  user_file_path = os.path.join(RESULTS_DIR, f"{uid}.jsonl")
590
 
591
-
592
-
593
  return (
594
-
595
- uid, samples, user_seen, idx,
596
-
 
597
  load_image(sample.masked_gt_path),
598
-
599
  load_image(sample.unmasked_gt_path),
600
-
601
  load_image(sample.sr_path),
602
-
603
  load_image(sample.original_path),
604
-
605
  load_image(sample.image_5_path),
606
-
607
  status,
608
-
609
  user_file_path,
610
-
611
  gr.update(visible=True),
612
-
613
  gr.update(visible=False),
614
-
615
  gr.update(visible=False),
616
-
617
  )
618
 
619
 
620
-
621
  def _save_record_and_progress(
622
-
623
  name: str,
624
-
625
  email: str,
626
-
627
  uid: str,
628
-
629
  samples: List[Sample],
630
-
631
  user_seen: List[str],
632
-
633
  idx: int,
634
-
635
  score_1: float,
636
-
637
  score_2: float,
638
-
639
  score_3: float,
640
-
641
  score_4: float,
642
-
643
  score_5: float,
644
-
645
  q1_notes: str,
646
-
647
  ):
648
-
649
  if not name or not email:
650
-
651
  raise gr.Error("Please enter your name and email.")
652
 
653
-
654
-
655
  if idx is None or idx < 0 or idx >= len(samples):
656
-
657
  return load_progress()
658
 
659
-
660
-
661
- # --- CALCULATE RANK FROM SLIDERS ---
662
-
663
  rank_dict = convert_scores_to_rank(score_1, score_2, score_3, score_4, score_5)
664
 
665
-
666
-
667
  sample = samples[idx]
668
-
669
  progress = load_progress()
670
-
671
  progress.setdefault(uid, {"seen": []})
672
-
673
  seen = set(progress[uid].get("seen", []))
674
 
675
-
676
-
677
  if sample.sample_id in seen:
678
-
679
  return progress
680
 
681
-
682
-
683
- # We allow saving even if target met, just in case, but usually UI stops them.
684
-
685
-
686
-
687
  record: Dict[str, Any] = {
688
-
689
  "timestamp": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
690
-
691
  "user_id": uid,
692
-
693
  "name": name if SAVE_PII else None,
694
-
695
  "email": email if SAVE_PII else None,
696
-
697
  "sample_id": sample.sample_id,
698
-
699
- # "image_paths": {
700
-
701
- # "masked_gt": sample.masked_gt_path,
702
-
703
- # "unmasked_gt": sample.unmasked_gt_path,
704
-
705
- # "sr": sample.sr_path,
706
-
707
- # "original": sample.original_path,
708
-
709
- # "image_5": sample.image_5_path,
710
-
711
- # },
712
-
713
- # Save raw scores (1-10) and the ranking
714
-
715
  "raw_scores": {
716
-
717
  "image_1": score_1,
718
-
719
  "image_2": score_2,
720
-
721
  "image_3": score_3,
722
-
723
  "image_4": score_4,
724
-
725
  "image_5": score_5,
726
-
727
  },
728
-
729
  "responses": {
730
-
731
- # "image_ranking": rank_dict, # Format: {"image_1": 1, "image_2": 4, ...}
732
-
733
  "notes": q1_notes or "",
734
-
735
  },
736
-
737
  }
738
 
739
-
740
-
741
  os.makedirs(RESULTS_DIR, exist_ok=True)
742
-
743
  append_jsonl(os.path.join(RESULTS_DIR, f"{uid}.jsonl"), record)
744
-
745
  append_jsonl(ALL_RESULTS_JSONL, record)
746
 
747
- # push_results_to_private_repo(uid)
748
-
749
- thread = threading.Thread(target=push_results_to_private_repo, args=(uid,))
750
-
751
- thread.start()
752
-
753
-
754
 
755
  seen.add(sample.sample_id)
756
-
757
  progress[uid]["seen"] = sorted(list(seen))
758
-
759
  save_progress(progress)
760
-
761
  return progress
762
 
763
-
764
-
765
  # ----------------------
766
-
767
  # Buttons
768
-
769
  # ----------------------
770
-
771
  def submit_finish(
772
-
773
  name: str,
774
-
775
  email: str,
776
-
777
  uid: str,
778
-
779
  samples: List[Sample],
780
-
781
  user_seen: List[str],
782
-
783
  idx: int,
784
-
785
  s1: float, s2: float, s3: float, s4: float, s5: float,
786
-
787
  q1_notes: str
788
-
789
  ):
790
-
791
  try:
792
-
793
  _save_record_and_progress(
794
-
795
  name, email, uid, samples, user_seen, idx,
796
-
797
  s1, s2, s3, s4, s5,
798
-
799
  q1_notes
800
-
801
  )
802
-
803
  except gr.Error:
804
-
805
  return (
806
-
807
  user_seen, idx,
808
-
809
  gr.update(), gr.update(), gr.update(), gr.update(), gr.update(),
810
-
811
- gr.update(),
812
-
813
  gr.update(), gr.update(), gr.update(), gr.update(), gr.update(),
814
-
815
  gr.update(),
816
-
817
  )
818
 
819
-
820
-
821
  return (
822
-
823
  user_seen, idx,
824
-
825
  gr.update(value=None), gr.update(value=None), gr.update(value=None), gr.update(value=None), gr.update(value=None),
826
-
827
- gr.update(value=""),
828
-
829
  gr.update(value="Finished!"),
830
-
831
  gr.update(value=5), gr.update(value=5), gr.update(value=5), gr.update(value=5), gr.update(value=5),
832
-
833
- gr.update(value=None),
834
-
835
  )
836
 
837
-
838
-
839
  def pause_exit(user_seen, samples):
840
-
841
  return user_seen, samples
842
 
843
-
844
-
845
  def submit_next_image(
846
-
847
  name: str,
848
-
849
  email: str,
850
-
851
  uid: str,
852
-
853
  samples: List[Sample],
854
-
855
- user_seen: List[str],
856
-
857
  idx: int,
858
-
859
  s1: float, s2: float, s3: float, s4: float, s5: float,
860
-
861
  q1_notes: str
862
-
863
  ):
864
-
865
  try:
866
-
867
  progress = _save_record_and_progress(
868
-
869
  name, email, uid, samples, user_seen, idx,
870
-
871
  s1, s2, s3, s4, s5,
872
-
873
  q1_notes
874
-
875
  )
 
 
876
 
877
- except gr.Error as e:
878
-
879
- raise e
880
-
881
- # return (
882
-
883
- # user_seen, idx,
884
-
885
- # gr.update(), gr.update(), gr.update(), gr.update(), gr.update(),
886
-
887
- # gr.update(),
888
-
889
- # gr.update(), gr.update(), gr.update(), gr.update(), gr.update(),
890
-
891
- # gr.update(),
892
-
893
- # )
894
-
895
-
896
-
897
- # left_after = user_left_count(progress[uid]["seen"], samples)
898
-
899
- # target = user_target_count(samples)
900
-
901
- updated_seen = progress[uid]["seen"]
902
-
903
-
904
-
905
- if len(updated_seen) >= TARGET_PER_PERSON:
906
-
907
- status = (
908
-
909
- f"Saved! You’ve completed all {target} images. 🎉 "
910
-
911
- f"Click **Exit** to close this session."
912
-
913
- )
914
-
915
- return (
916
-
917
- updated_seen, -1,
918
-
919
- None, None, None, None, None, # No more images to load
920
-
921
- gr.update(value="Target reached!"), # Status
922
-
923
- gr.update(value=""), # Clear notes
924
-
925
- 5, 5, 5, 5, 5 # Reset sliders
926
-
927
- )
928
-
929
-
930
-
931
- # if left_after == 0:
932
-
933
- # status = (
934
-
935
- # f"Saved! You’ve completed all {target} images. 🎉 "
936
-
937
- # f"Click **Exit** to close this session."
938
-
939
- # )
940
-
941
- # return (
942
-
943
- # updated_seen, -1,
944
-
945
- # None, None, None, None, None, # Return None to avoid image load errors
946
-
947
- # gr.update(value="Target reached! Processing..."),
948
-
949
- # gr.update(value=""),
950
-
951
- # 5, 5, 5, 5, 5,
952
-
953
- # gr.update(value=None)
954
-
955
- # )
956
-
957
-
958
-
959
- idx_next = pick_next_index(updated_seen, samples)
960
-
961
- if idx_next == -1:
962
-
963
- return (updated_seen, -1, None, None, None, None, None, "No more images.", "", 5, 5, 5, 5, 5)
964
-
965
- # Fallback if no images left
966
-
967
- # return (
968
-
969
- # progress[uid]["seen"], -1,
970
-
971
- # None, None, None, None, None,
972
-
973
- # gr.update(value="No more images."),
974
-
975
- # gr.update(value=""),
976
-
977
- # gr.update(value=5), gr.update(value=5), gr.update(value=5), gr.update(value=5), gr.update(value=5),
978
-
979
- # gr.update(value=None),
980
-
981
- # )
982
-
983
-
984
-
985
- sample_next = samples[idx_next]
986
-
987
-
988
-
989
- status =""
990
 
991
- # (
 
992
 
993
- # f"Saved! Personal progress images left: {left_after} of {target}.\n"
 
 
 
 
 
 
 
 
 
 
 
 
994
 
995
- # f"Next sample: {sample_next.sample_id}"
 
 
 
 
 
 
 
 
 
996
 
997
- # )
 
998
 
999
  return (
1000
-
1001
- updated_seen, idx_next,
1002
-
1003
  load_image(sample_next.masked_gt_path),
1004
-
1005
  load_image(sample_next.unmasked_gt_path),
1006
-
1007
  load_image(sample_next.sr_path),
1008
-
1009
  load_image(sample_next.original_path),
1010
-
1011
  load_image(sample_next.image_5_path),
1012
-
1013
  gr.update(value=""),
1014
-
1015
  gr.update(value=""),
1016
-
1017
  5, 5, 5, 5, 5,
1018
-
1019
-
1020
-
1021
  )
1022
 
1023
- # return (
1024
-
1025
- # progress[uid]["seen"], idx_next,
1026
-
1027
- # load_image(sample_next.masked_gt_path),
1028
-
1029
- # load_image(sample_next.unmasked_gt_path),
1030
-
1031
- # load_image(sample_next.sr_path),
1032
-
1033
- # load_image(sample_next.original_path),
1034
-
1035
- # load_image(sample_next.image_5_path),
1036
-
1037
- # gr.update(value=status),
1038
-
1039
- # gr.update(value=""),
1040
-
1041
- # gr.update(value=5), gr.update(value=5), gr.update(value=5), gr.update(value=5), gr.update(value=5),
1042
-
1043
- # gr.update(value=None),
1044
-
1045
- # )
1046
-
1047
-
1048
-
1049
-
1050
 
1051
  def to_thanks(name: str, user_seen: List[str], samples: List[Sample]):
1052
-
1053
- # Calculate how many are left based on the updated seen list
1054
-
1055
  left = user_left_count(user_seen, samples)
1056
-
1057
  target = user_target_count(samples)
1058
-
1059
  if left > 0:
1060
-
1061
- # Message for users who are leaving early
1062
-
1063
- msg = (
1064
-
1065
  f"### ⏸️ Session Paused!\n\n"
1066
-
1067
  f"### ✅ Thanks, {name}! Your progress has been saved.\n\n"
1068
-
1069
- f"We’re grateful for your time and expertise. Our suggested target is "
1070
-
1071
- f"{TARGET_PER_PERSON} images per reviewer"
1072
-
1073
- f"You have **{left}** images left.\n\n"
1074
-
1075
- f"You can close this tab and return whenever you like—just use the same Name and Email to **continue where you left off**.\n\n"
1076
-
1077
- f"If you have questions, issues, or suggestions, please email **{CONTACT_EMAIL}**.\n\n"
1078
-
1079
- f"Click **Start Again** to evaluate another image."
1080
-
1081
  )
1082
-
1083
  else:
1084
-
1085
- # Message for users who completed the target
1086
-
1087
  msg = (
1088
-
1089
  f"### ✅ All Done, {name}!\n\n"
1090
-
1091
  f"You’ve completed the target of **{target}** images. Your responses are securely saved.\n\n"
1092
-
1093
  f"We’re extremely grateful for your time and expertise. You are welcome to continue with more images if you wish, or you can finish here.\n\n"
1094
-
1095
  f"If you have questions, issues, or suggestions, please email **{CONTACT_EMAIL}**.\n\n"
1096
-
1097
  )
1098
-
1099
  return gr.update(visible=False), gr.update(visible=True), gr.update(value=msg)
1100
 
1101
-
1102
-
1103
  def hide_thanks():
1104
-
1105
  return gr.update(visible=False)
1106
 
1107
-
1108
-
1109
  def maybe_show_thanks(name: str, seen: List[str], samples: List[Sample]):
1110
-
1111
- # Check if the user has reached the target
1112
-
1113
  if len(set(seen or [])) >= TARGET_PER_PERSON:
1114
-
1115
  return to_thanks(name, seen, samples)
1116
-
1117
-
1118
-
1119
- # If not done, keep evaluation panel visible
1120
-
1121
  return gr.update(visible=True), gr.update(visible=False), gr.update()
1122
 
1123
-
1124
-
1125
  def reset_to_start():
1126
-
1127
- """
1128
-
1129
- Clears inputs and resets the view to the login page.
1130
-
1131
- """
1132
-
1133
  return (
1134
-
1135
  gr.update(value=""), # Clear Name
1136
-
1137
  gr.update(value=""), # Clear Email
1138
-
1139
  gr.update(visible=True), # Show Start Group
1140
-
1141
  gr.update(visible=True), # Show Intro
1142
-
1143
  gr.update(visible=False), # Hide Eval
1144
-
1145
  gr.update(visible=False), # Hide Thanks
1146
-
1147
  )
1148
 
1149
-
1150
-
1151
  # ----------------------
1152
-
1153
  # UI
1154
-
1155
  # ----------------------
1156
-
1157
  with gr.Blocks(title="RTS Human Evaluation", theme=gr.themes.Soft()) as demo:
1158
-
1159
  intro_md = gr.Markdown(
1160
-
1161
  f"""
1162
-
1163
  # Retrogressive Thaw Slump (RTS) Human Evaluation
1164
 
1165
-
1166
-
1167
  ### 👋 Welcome, and thanks for lending your expertise!
1168
-
1169
  We’re inviting domain experts to help evaluate satellite image patches for RTS.
1170
 
1171
-
1172
-
1173
  ---
1174
 
1175
-
1176
-
1177
  ### 📋 Instructions
1178
-
1179
  * **Suggested target:** ~{TARGET_PER_PERSON} images per reviewer.
1180
-
1181
  * **The Task:** For each set, you will see 5 variations of the same satellite image.
1182
-
1183
  * **Rating:** Rate each image from **1 (Poor)** to **10 (Excellent)** based on how clearly the RTS feature (indicated by the **Red Box**) is depicted.
1184
 
1185
-
1186
-
1187
  ### ⏸️ Saving & Resuming
1188
-
1189
  * **Automatic Saving:** Your progress is saved automatically after every "Submit".
1190
-
1191
  * **Take a Break:** You can close this tab at any time.
1192
-
1193
  * **How to Resume:** Simply return here and enter the **exact same Name and Email**. The system will pick up exactly where you left off.
1194
 
1195
-
1196
-
1197
  ---
1198
-
1199
  **Questions or issues?** Email **{CONTACT_EMAIL}** — we appreciate your feedback and suggestions.
1200
 
1201
-
1202
-
1203
  **Ready?** Enter your details below to begin.
1204
-
1205
  """
1206
-
1207
  )
1208
 
1209
-
1210
-
1211
  # Hidden states
1212
-
1213
  state_uid = gr.State("")
1214
-
1215
  state_samples = gr.State([])
1216
-
1217
  state_seen = gr.State([])
1218
-
1219
  state_idx = gr.State(-1)
1220
 
1221
-
1222
-
1223
  with gr.Group() as start_group:
1224
-
1225
  with gr.Row():
1226
-
1227
  name = gr.Textbox(label="Full name", placeholder="Jane Doe", autofocus=True)
1228
-
1229
  email = gr.Textbox(label="Email address", placeholder="jane@example.com")
1230
-
1231
  start_btn = gr.Button("Start / Resume", variant="primary")
1232
-
1233
  status = gr.Markdown("\n")
1234
 
1235
-
1236
-
1237
  eval_panel = gr.Group(visible=False)
1238
-
1239
  with eval_panel:
1240
-
1241
-
1242
-
1243
- # --- NEW LAYOUT: 5 COLUMNS, 1-10 SLIDERS ---
1244
-
1245
  gr.Markdown(
1246
-
1247
  """
1248
-
1249
  Focus your attention on the area inside the **Red Box**. This marks the potential location of the Retrogressive Thaw Slump (RTS). Compare the five images below. Rate how clearly and accurately each image depicts the **RTS** feature.
1250
 
1251
-
1252
-
1253
  **Rating Scale (1 - 10):**
1254
-
1255
  * **10 (Excellent):** The RTS feature is sharp, distinct, and clearly visible.
1256
-
1257
  * **1 (Poor):** The RTS feature is blurry, distorted, or impossible to distinguish.
1258
-
1259
  """
1260
-
1261
  )
1262
 
1263
-
1264
-
1265
  with gr.Row():
1266
-
1267
  with gr.Column(scale=1, min_width=150):
1268
-
1269
  gr.Markdown("<div style='text-align:center; font-weight:600;'>Image 1</div>")
1270
-
1271
  image_1 = gr.Image(show_label=False, interactive=False, height=256, show_download_button=False)
1272
-
1273
  score_1 = gr.Slider(minimum=1, maximum=10, step=1, value=5, label="Score (1-10)")
1274
 
1275
-
1276
-
1277
  with gr.Column(scale=1, min_width=150):
1278
-
1279
  gr.Markdown("<div style='text-align:center; font-weight:600;'>Image 2</div>")
1280
-
1281
  image_2 = gr.Image(show_label=False, interactive=False, height=256, show_download_button=False)
1282
-
1283
  score_2 = gr.Slider(minimum=1, maximum=10, step=1, value=5, label="Score (1-10)")
1284
 
1285
-
1286
-
1287
  with gr.Column(scale=1, min_width=150):
1288
-
1289
  gr.Markdown("<div style='text-align:center; font-weight:600;'>Image 3</div>")
1290
-
1291
  image_3 = gr.Image(show_label=False, interactive=False, height=256, show_download_button=False)
1292
-
1293
  score_3 = gr.Slider(minimum=1, maximum=10, step=1, value=5, label="Score (1-10)")
1294
 
1295
-
1296
-
1297
  with gr.Column(scale=1, min_width=150):
1298
-
1299
  gr.Markdown("<div style='text-align:center; font-weight:600;'>Image 4</div>")
1300
-
1301
  image_4 = gr.Image(show_label=False, interactive=False, height=256, show_download_button=False)
1302
-
1303
  score_4 = gr.Slider(minimum=1, maximum=10, step=1, value=5, label="Score (1-10)")
1304
 
1305
-
1306
-
1307
  with gr.Column(scale=1, min_width=150):
1308
-
1309
  gr.Markdown("<div style='text-align:center; font-weight:600;'>Image 5</div>")
1310
-
1311
  image_5 = gr.Image(show_label=False, interactive=False, height=256, show_download_button=False)
1312
-
1313
  score_5 = gr.Slider(minimum=1, maximum=10, step=1, value=5, label="Score (1-10)")
1314
 
1315
-
1316
-
1317
  notes_q1 = gr.Textbox(
1318
-
1319
  label="Notes (Optional)",
1320
-
1321
  lines=2,
1322
-
1323
  placeholder="If there are multiple RTS or ambiguities, please note here."
1324
-
1325
  )
1326
 
1327
-
1328
-
1329
  with gr.Row():
1330
-
1331
  submit_next_btn = gr.Button("Submit & Next Image", variant="primary")
1332
-
1333
  pause_exit_btn = gr.Button("Exit", variant="secondary")
1334
 
1335
-
1336
-
1337
- # your_jsonl_path = gr.Textbox(label="Your results file path (for reference)", interactive=False)
1338
-
1339
  your_jsonl_path = gr.State()
1340
 
1341
  with gr.Group(visible=False) as thanks_group:
1342
-
1343
  thanks_md = gr.Markdown("### ✅ Thanks! Your responses were saved.\n\nClick **Start Again** to evaluate another image.")
1344
-
1345
  restart_btn = gr.Button("Start Again", variant="primary")
1346
 
1347
-
1348
-
1349
  # --- Wiring ---
1350
-
1351
-
1352
-
1353
  start_event = start_btn.click(
1354
-
1355
  start_or_resume,
1356
-
1357
  inputs=[name, email],
1358
-
1359
  outputs=[
1360
-
1361
  state_uid, state_samples, state_seen, state_idx,
1362
-
1363
  image_1, image_2, image_3, image_4, image_5,
1364
-
1365
  status, your_jsonl_path,
1366
-
1367
  eval_panel, intro_md, start_group
1368
-
1369
  ],
1370
-
1371
  )
1372
-
1373
  start_event.then(hide_thanks, inputs=None, outputs=[thanks_group])
1374
 
1375
-
1376
-
1377
  # 1. When Pause is clicked, just pass the state through
1378
-
1379
  pause_event = pause_exit_btn.click(
1380
-
1381
  pause_exit,
1382
-
1383
  inputs=[state_seen, state_samples],
1384
-
1385
  outputs=[state_seen, state_samples],
1386
-
1387
  )
1388
 
1389
-
1390
-
1391
  # 2. Then show the "Thanks/Resume" screen with the 'how many left' message
1392
-
1393
  pause_event.then(
1394
-
1395
  to_thanks,
1396
-
1397
  inputs=[name, state_seen, state_samples],
1398
-
1399
  outputs=[eval_panel, thanks_group, thanks_md],
1400
-
1401
  )
1402
 
1403
-
1404
-
1405
  nextimg_event = submit_next_btn.click(
1406
-
1407
  submit_next_image,
1408
-
1409
  inputs=[name, email, state_uid, state_samples, state_seen, state_idx,
1410
-
1411
  score_1, score_2, score_3, score_4, score_5, notes_q1],
1412
-
1413
  outputs=[state_seen, state_idx,
1414
-
1415
  image_1, image_2, image_3, image_4, image_5,
1416
-
1417
  status, notes_q1,
1418
-
1419
  score_1, score_2, score_3, score_4, score_5],
1420
-
1421
  )
1422
-
1423
  nextimg_event.then(
1424
-
1425
  maybe_show_thanks,
1426
-
1427
  inputs=[name, state_seen, state_samples],
1428
-
1429
  outputs=[eval_panel, thanks_group, thanks_md],
1430
-
1431
  )
1432
 
1433
-
1434
-
1435
- # --- CHANGED: Calls reset_to_start instead of start_or_resume ---
1436
-
1437
  restart_event = restart_btn.click(
1438
-
1439
  reset_to_start,
1440
-
1441
  inputs=[],
1442
-
1443
  outputs=[
1444
-
1445
  name, email,
1446
-
1447
  start_group, intro_md,
1448
-
1449
  eval_panel, thanks_group
1450
-
1451
  ],
1452
-
1453
  )
1454
 
1455
-
1456
-
1457
  if __name__ == "__main__":
1458
-
1459
- # --- DYNAMIC READ FROM HF_RESULTS_REPO ---
1460
-
1461
  if HF_RESULTS_REPO:
1462
-
1463
  from huggingface_hub import snapshot_download
1464
-
1465
- # print(f"Reading images and metadata from: {HF_RESULTS_REPO}...")
1466
-
1467
  try:
1468
-
1469
- # This pulls your repo's 'data' folder into the current workspace
1470
-
1471
  snapshot_download(
1472
-
1473
  repo_id=HF_RESULTS_REPO,
1474
-
1475
  repo_type="dataset",
1476
-
1477
- local_dir=".",
1478
-
1479
  allow_patterns=["data/*", "results/*"],
1480
-
1481
  token=HF_TOKEN
1482
-
1483
  )
1484
-
1485
  except Exception as e:
1486
-
1487
  print(f"Error reading from HF: {e}")
1488
 
1489
-
1490
-
1491
  ensure_paths()
1492
-
1493
- # Pre-check dataset load from the newly downloaded files
1494
-
1495
  _ = load_dataset(CAPTIONS_JSON_PATH, GT_MASKED_DIR, GT_UNMASKED_DIR, SR_DIR, ORIGINAL_DIR, IMAGE_5_DIR)
1496
 
1497
-
1498
-
1499
- print("✅ Successfully synced with HF Repo. Launching app.")
1500
-
1501
  demo.queue()
1502
-
1503
- demo.launch()
 
1
  import os
 
2
  import json
 
3
  import hashlib
 
4
  import random
 
5
  import threading
 
6
  import time
 
7
  from dataclasses import dataclass
 
8
  from typing import List, Dict, Any
9
 
 
 
10
  import gradio as gr
 
11
  from PIL import Image
 
12
  from huggingface_hub import HfApi, CommitOperationAdd
13
 
 
 
14
  # ----------------------
 
15
  # Configuration
 
16
  # ----------------------
 
17
  # --- HF Repo ---
 
18
  HF_RESULTS_REPO = os.getenv("HF_RESULTS_REPO")
 
19
  HF_RESULTS_REPO_TYPE = "dataset"
 
20
  HF_TOKEN = os.getenv("HF_TOKEN")
 
21
  _hf_api = HfApi(token=HF_TOKEN)
22
 
 
 
23
  # --- Main settings ---
 
 
 
24
  TARGET_PER_PERSON = 20
 
25
  CONTACT_EMAIL = "ffallah@asu.edu"
26
 
 
 
27
  # --- Paths ---
 
 
 
28
  CAPTIONS_JSON_PATH = os.environ.get("CAPTIONS_JSON_PATH", "data/captions.json")
29
 
 
 
 
 
30
  GT_MASKED_DIR = "data/gt_b" # Image 1
 
31
  GT_UNMASKED_DIR = "data/adc_b" # Image 2
 
32
  SR_DIR = "data/sr_b" # Image 3
 
33
  ORIGINAL_DIR = "data/lr_b" # Image 4
 
34
  IMAGE_5_DIR = "data/see_b" # Image 5
35
 
 
 
36
  # --- Results ---
 
37
  RESULTS_DIR = "results"
 
38
  PROGRESS_PATH = os.path.join(RESULTS_DIR, "progress.json")
 
39
  ALL_RESULTS_JSONL = os.path.join(RESULTS_DIR, "all_results.jsonl")
40
+ SAVE_PII = True
 
 
 
41
 
42
  WRITE_LOCK = threading.Lock()
43
+ STRICT_ENFORCEMENT = False
 
 
 
44
 
45
  # ----------------------
 
46
  # Data model
 
47
  # ----------------------
 
48
  @dataclass
 
49
  class Sample:
 
50
  sample_id: str
 
51
  masked_gt_path: str # Image 1
 
52
  unmasked_gt_path: str # Image 2
 
53
  sr_path: str # Image 3
 
54
  original_path: str # Image 4
 
55
  image_5_path: str # Image 5
56
 
 
 
57
  # ----------------------
 
58
  # Helpers
 
59
  # ----------------------
60
+ # def ensure_sample_objects(samples_input):
61
+ # """
62
+ # Accepts either:
63
+ # - list[Sample] (already objects), or
64
+ # - list[dict] (serialized Sample.__dict__)
65
+ # Returns list[Sample].
66
+ # """
67
+ # if not samples_input:
68
+ # return []
69
+ # if isinstance(samples_input, list):
70
+ # if len(samples_input) == 0:
71
+ # return []
72
+ # first = samples_input[0]
73
+ # if isinstance(first, dict):
74
+ # try:
75
+ # return [Sample(**s) for s in samples_input]
76
+ # except Exception:
77
+ # # fall through to returning empty to avoid crashes
78
+ # return []
79
+ # elif isinstance(first, Sample):
80
+ # return samples_input
81
+ # return []
82
 
83
  def user_target_count(samples: List[Sample]) -> int:
 
 
 
 
 
 
 
84
  return min(len(samples), TARGET_PER_PERSON)
85
 
 
 
86
  def user_left_count(user_seen: List[str], samples: List[Sample]) -> int:
 
87
  target = user_target_count(samples)
88
+ seen = set(user_seen or [])
89
+ allowed_ids = {s.sample_id for s in samples}
 
 
 
 
 
90
  seen_in_allowed = len([sid for sid in seen if sid in allowed_ids])
 
91
  return max(0, target - seen_in_allowed)
92
 
 
 
93
  def _ensure_private_repo(repo_id: str):
 
94
  try:
 
95
  _hf_api.repo_info(repo_id, repo_type=HF_RESULTS_REPO_TYPE)
 
96
  except Exception:
 
97
  _hf_api.create_repo(repo_id=repo_id, repo_type=HF_RESULTS_REPO_TYPE, private=True)
98
 
 
 
99
  def push_results_to_private_repo(uid: str):
 
100
  if not HF_TOKEN or not HF_RESULTS_REPO:
 
101
  return
 
102
  try:
 
 
 
103
  os.makedirs(RESULTS_DIR, exist_ok=True)
 
 
 
 
 
 
 
 
 
104
  user_file = os.path.join(RESULTS_DIR, f"{uid}.jsonl")
105
 
 
 
 
 
 
 
106
  ops = [
 
107
  CommitOperationAdd(
 
108
  path_in_repo="results/all_results.jsonl",
 
109
  path_or_fileobj=ALL_RESULTS_JSONL
 
110
  ),
 
111
  CommitOperationAdd(
 
112
  path_in_repo=f"results/users/{uid}.jsonl",
 
113
  path_or_fileobj=user_file
 
114
  ),
 
115
  CommitOperationAdd(
 
116
  path_in_repo="results/progress.json",
 
117
  path_or_fileobj=PROGRESS_PATH
 
118
  ),
 
119
  ]
 
120
  _hf_api.create_commit(
 
121
  repo_id=HF_RESULTS_REPO,
 
122
  repo_type=HF_RESULTS_REPO_TYPE,
 
123
  operations=ops,
 
124
  commit_message="Update RTS eval results"
 
125
  )
 
126
  except Exception as e:
 
127
  print("[WARN] push_results_to_private_repo failed:", e)
128
 
 
 
129
  def ensure_paths():
 
130
  os.makedirs(RESULTS_DIR, exist_ok=True)
 
131
  for pth, name in [
 
132
  (GT_MASKED_DIR, "GT_MASKED_DIR"),
 
133
  (GT_UNMASKED_DIR, "GT_UNMASKED_DIR"),
 
134
  (SR_DIR, "SR_DIR"),
 
135
  (ORIGINAL_DIR, "ORIGINAL_DIR"),
 
136
  (IMAGE_5_DIR, "IMAGE_5_DIR"),
 
137
  ]:
 
138
  if not os.path.isdir(pth):
 
 
 
139
  print(f"Warning: Directory '{pth}' for {name} not found.")
140
 
 
 
141
  def load_image(path: str) -> Image.Image:
142
+ if not path or not os.path.exists(path):
143
+ # return a simple placeholder image so UI doesn't crash
144
+ return Image.new("RGB", (256, 256), color="gray")
145
+ try:
146
+ return Image.open(path).convert("RGB")
147
+ except Exception:
148
+ return Image.new("RGB", (256, 256), color="gray")
 
149
 
150
  def load_dataset(captions_path: str, gt_masked_dir: str, gt_unmasked_dir: str, sr_dir: str, original_dir: str, image_5_dir: str) -> List[Sample]:
 
151
  if not os.path.exists(captions_path):
152
+ print(f"Captions file not found at {captions_path}")
153
  return []
154
 
 
 
155
  with open(captions_path, "r", encoding="utf-8") as f:
156
+ try:
157
+ captions_data = json.load(f)
158
+ except Exception:
159
+ print("Failed to parse captions JSON.")
160
+ return []
161
 
162
  samples: List[Sample] = []
 
163
  for item in captions_data:
 
164
  base_filename = item.get("image")
 
165
  if not base_filename:
 
166
  continue
167
 
 
 
168
  sample_id = os.path.splitext(base_filename)[0]
 
 
 
 
 
169
  paths = {
 
170
  "masked": os.path.join(gt_masked_dir, base_filename),
 
171
  "unmasked": os.path.join(gt_unmasked_dir, base_filename),
 
172
  "sr": os.path.join(sr_dir, base_filename),
 
173
  "original": os.path.join(original_dir, base_filename),
 
174
  "img5": os.path.join(image_5_dir, base_filename)
 
175
  }
176
 
177
+ # If strict enforcement required, require all five files to exist.
178
+ if STRICT_ENFORCEMENT:
179
+ if not all(os.path.exists(p) for p in paths.values()):
180
+ missing = [k for k, v in paths.items() if not os.path.exists(v)]
181
+ print(f"Skipping {base_filename}: Missing in folders {missing}")
182
+ continue
183
+
184
+ # In non-strict mode, it's okay to include samples even if some files missing;
185
+ # we will supply placeholders at load time.
186
+ samples.append(
187
+ Sample(
188
+ sample_id=sample_id,
189
+ masked_gt_path=paths["masked"],
190
+ unmasked_gt_path=paths["unmasked"],
191
+ sr_path=paths["sr"],
192
+ original_path=paths["original"],
193
+ image_5_path=paths["img5"]
 
 
 
 
 
 
 
194
  )
195
+ )
 
 
 
 
 
 
 
 
 
196
 
197
  return samples
198
 
 
 
199
  # ----------------------
 
200
  # Progress & results I/O
 
201
  # ----------------------
 
202
  def hash_user_id(name: str, email: str) -> str:
 
203
  norm = (name or "").strip().lower() + "|" + (email or "").strip().lower()
 
204
  return hashlib.sha256(norm.encode("utf-8")).hexdigest()[:16]
205
 
 
 
206
  def load_progress() -> Dict[str, Dict[str, Any]]:
 
207
  if not os.path.exists(PROGRESS_PATH):
 
208
  return {}
 
209
  try:
 
210
  with open(PROGRESS_PATH, "r", encoding="utf-8") as f:
 
211
  return json.load(f)
 
212
  except Exception:
 
213
  return {}
214
 
 
 
215
  def save_progress(progress: Dict[str, Dict[str, Any]]):
 
216
  with WRITE_LOCK:
 
217
  with open(PROGRESS_PATH, "w", encoding="utf-8") as f:
 
218
  json.dump(progress, f, ensure_ascii=False, indent=2)
219
 
 
 
220
  def append_jsonl(path: str, record: Dict[str, Any]):
 
221
  line = json.dumps(record, ensure_ascii=False)
 
222
  with WRITE_LOCK:
 
223
  with open(path, "a", encoding="utf-8") as f:
 
224
  f.write(line + "\n")
225
 
 
 
226
  # ----------------------
 
227
  # LOGIC FOR CONVERTING SLIDERS TO RANK
 
228
  # ----------------------
 
229
  def convert_scores_to_rank(s1, s2, s3, s4, s5) -> Dict[str, int]:
 
 
 
 
 
 
 
 
 
 
 
230
  scores = [
 
231
  ("image_1", s1),
 
232
  ("image_2", s2),
 
233
  ("image_3", s3),
 
234
  ("image_4", s4),
 
235
  ("image_5", s5)
 
236
  ]
 
 
 
237
  scores.sort(key=lambda x: x[1], reverse=True)
 
 
 
238
  ranks = {}
 
239
  current_rank = 1
 
240
  for img_key, score in scores:
 
241
  ranks[img_key] = current_rank
 
242
  current_rank += 1
 
 
 
243
  return ranks
244
 
 
 
245
  # ----------------------
 
246
  # App logic
 
247
  # ----------------------
 
248
  def pick_next_index(user_seen: List[str], samples: List[Sample]) -> int:
249
+ # FIX: define seen_set and use samples directly
250
+ seen_set = set(user_seen or [])
 
 
 
251
  remaining = [i for i, s in enumerate(samples) if s.sample_id not in seen_set]
 
 
 
252
  if not remaining:
 
253
  return -1
 
254
  return random.choice(remaining)
255
 
 
 
256
  def start_or_resume(name: str, email: str):
 
257
  if not name or not email:
 
258
  raise gr.Error("Please enter your name and email to begin.")
259
 
 
 
260
  ensure_paths()
 
261
  samples = load_dataset(CAPTIONS_JSON_PATH, GT_MASKED_DIR, GT_UNMASKED_DIR, SR_DIR, ORIGINAL_DIR, IMAGE_5_DIR)
262
 
 
 
263
  if not samples:
264
+ raise gr.Error("No images found. Please check dataset configuration.")
 
 
 
265
 
266
  uid = hash_user_id(name, email)
 
267
  progress = load_progress()
 
268
  if uid not in progress:
 
269
  progress[uid] = {"seen": []}
 
270
  save_progress(progress)
271
 
 
 
272
  user_seen: List[str] = progress[uid].get("seen", [])
 
273
  left = user_left_count(user_seen, samples)
274
 
275
+ # placeholder image to avoid Gradio trying to load None
276
+ placeholder_img = Image.new("RGB", (256, 256), color="gray")
277
 
278
  # If the user has finished their target
 
279
  if left == 0 and len(user_seen) >= user_target_count(samples):
 
280
  status = (
 
281
  f"Welcome back, {name}. You’ve completed all {user_target_count(samples)} images. 🎉\n"
 
282
  f"Your personal results file: {os.path.join(RESULTS_DIR, f'{uid}.jsonl')}"
 
283
  )
 
284
  return (
285
+ uid,
286
+ samples,
287
+ user_seen,
288
+ -1,
289
+ placeholder_img, placeholder_img, placeholder_img, placeholder_img, placeholder_img,
290
  status,
 
291
  os.path.join(RESULTS_DIR, f"{uid}.jsonl"),
 
292
  gr.update(visible=False),
 
293
  gr.update(visible=True),
 
294
  gr.update(visible=True),
 
295
  )
296
 
 
 
297
  idx = pick_next_index(user_seen, samples)
298
+ if idx == -1:
299
+ return (
300
+ uid,
301
+ samples,
302
+ user_seen,
303
+ -1,
304
+ placeholder_img, placeholder_img, placeholder_img, placeholder_img, placeholder_img,
305
+ "No more new images available.",
306
+ "",
307
+ gr.update(visible=False),
308
+ gr.update(visible=True),
309
+ gr.update(visible=True)
310
+ )
311
 
312
  sample = samples[idx]
313
 
 
 
314
  status = (
 
315
  f"Welcome, {name}. Personal progress — images left: {left} of {user_target_count(samples)}.\n"
 
316
  f"Current sample: {sample.sample_id}"
 
317
  )
318
 
 
 
319
  os.makedirs(RESULTS_DIR, exist_ok=True)
 
320
  user_file_path = os.path.join(RESULTS_DIR, f"{uid}.jsonl")
321
 
 
 
322
  return (
323
+ uid,
324
+ samples,
325
+ user_seen,
326
+ idx,
327
  load_image(sample.masked_gt_path),
 
328
  load_image(sample.unmasked_gt_path),
 
329
  load_image(sample.sr_path),
 
330
  load_image(sample.original_path),
 
331
  load_image(sample.image_5_path),
 
332
  status,
 
333
  user_file_path,
 
334
  gr.update(visible=True),
 
335
  gr.update(visible=False),
 
336
  gr.update(visible=False),
 
337
  )
338
 
339
 
 
340
  def _save_record_and_progress(
 
341
  name: str,
 
342
  email: str,
 
343
  uid: str,
 
344
  samples: List[Sample],
 
345
  user_seen: List[str],
 
346
  idx: int,
 
347
  score_1: float,
 
348
  score_2: float,
 
349
  score_3: float,
 
350
  score_4: float,
 
351
  score_5: float,
 
352
  q1_notes: str,
 
353
  ):
 
354
  if not name or not email:
 
355
  raise gr.Error("Please enter your name and email.")
356
 
357
+ # FIX: use samples directly
 
358
  if idx is None or idx < 0 or idx >= len(samples):
 
359
  return load_progress()
360
 
 
 
 
 
361
  rank_dict = convert_scores_to_rank(score_1, score_2, score_3, score_4, score_5)
362
 
 
 
363
  sample = samples[idx]
 
364
  progress = load_progress()
 
365
  progress.setdefault(uid, {"seen": []})
 
366
  seen = set(progress[uid].get("seen", []))
367
 
 
 
368
  if sample.sample_id in seen:
 
369
  return progress
370
 
 
 
 
 
 
 
371
  record: Dict[str, Any] = {
 
372
  "timestamp": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
 
373
  "user_id": uid,
 
374
  "name": name if SAVE_PII else None,
 
375
  "email": email if SAVE_PII else None,
 
376
  "sample_id": sample.sample_id,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
377
  "raw_scores": {
 
378
  "image_1": score_1,
 
379
  "image_2": score_2,
 
380
  "image_3": score_3,
 
381
  "image_4": score_4,
 
382
  "image_5": score_5,
 
383
  },
 
384
  "responses": {
 
 
 
385
  "notes": q1_notes or "",
386
+ "image_ranking": rank_dict,
387
  },
 
388
  }
389
 
 
 
390
  os.makedirs(RESULTS_DIR, exist_ok=True)
 
391
  append_jsonl(os.path.join(RESULTS_DIR, f"{uid}.jsonl"), record)
 
392
  append_jsonl(ALL_RESULTS_JSONL, record)
393
 
394
+ # start background push but don't let failures crash the app
395
+ try:
396
+ thread = threading.Thread(target=push_results_to_private_repo, args=(uid,))
397
+ thread.daemon = True
398
+ thread.start()
399
+ except Exception:
400
+ pass
401
 
402
  seen.add(sample.sample_id)
 
403
  progress[uid]["seen"] = sorted(list(seen))
 
404
  save_progress(progress)
 
405
  return progress
406
 
 
 
407
  # ----------------------
 
408
  # Buttons
 
409
  # ----------------------
 
410
  def submit_finish(
 
411
  name: str,
 
412
  email: str,
 
413
  uid: str,
 
414
  samples: List[Sample],
 
415
  user_seen: List[str],
 
416
  idx: int,
 
417
  s1: float, s2: float, s3: float, s4: float, s5: float,
 
418
  q1_notes: str
 
419
  ):
 
420
  try:
 
421
  _save_record_and_progress(
 
422
  name, email, uid, samples, user_seen, idx,
 
423
  s1, s2, s3, s4, s5,
 
424
  q1_notes
 
425
  )
 
426
  except gr.Error:
 
427
  return (
 
428
  user_seen, idx,
 
429
  gr.update(), gr.update(), gr.update(), gr.update(), gr.update(),
430
+ gr.update(),
 
 
431
  gr.update(), gr.update(), gr.update(), gr.update(), gr.update(),
 
432
  gr.update(),
 
433
  )
434
 
 
 
435
  return (
 
436
  user_seen, idx,
 
437
  gr.update(value=None), gr.update(value=None), gr.update(value=None), gr.update(value=None), gr.update(value=None),
438
+ gr.update(value=""),
 
 
439
  gr.update(value="Finished!"),
 
440
  gr.update(value=5), gr.update(value=5), gr.update(value=5), gr.update(value=5), gr.update(value=5),
441
+ gr.update(value=None),
 
 
442
  )
443
 
 
 
444
  def pause_exit(user_seen, samples):
 
445
  return user_seen, samples
446
 
 
 
447
  def submit_next_image(
 
448
  name: str,
 
449
  email: str,
 
450
  uid: str,
 
451
  samples: List[Sample],
452
+ user_seen: List[Sample],
 
 
453
  idx: int,
 
454
  s1: float, s2: float, s3: float, s4: float, s5: float,
 
455
  q1_notes: str
 
456
  ):
 
457
  try:
 
458
  progress = _save_record_and_progress(
 
459
  name, email, uid, samples, user_seen, idx,
 
460
  s1, s2, s3, s4, s5,
 
461
  q1_notes
 
462
  )
463
+ except gr.Error as e:
464
+ raise e
465
 
466
+ seen_list = progress.get(uid, {}).get("seen", [])
467
+ left_after = user_left_count(seen_list, samples)
468
+ target = user_target_count(samples)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
469
 
470
+ # placeholder image to avoid Gradio trying to load None
471
+ placeholder_img = Image.new("RGB", (256, 256), color="gray")
472
 
473
+ # If user reached the target, return placeholders for images and let the then() chain show thanks
474
+ if left_after == 0:
475
+ status = (
476
+ f"Saved! You’ve completed all {target} images. 🎉 "
477
+ f"Click **Exit** to close this session."
478
+ )
479
+ return (
480
+ seen_list, -1,
481
+ placeholder_img, placeholder_img, placeholder_img, placeholder_img, placeholder_img,
482
+ gr.update(value=status),
483
+ gr.update(value=""),
484
+ 5, 5, 5, 5, 5,
485
+ )
486
 
487
+ idx_next = pick_next_index(seen_list, samples)
488
+ if idx_next == -1:
489
+ # no more images but target not met (rare). return placeholders too.
490
+ return (
491
+ seen_list, -1,
492
+ placeholder_img, placeholder_img, placeholder_img, placeholder_img, placeholder_img,
493
+ "No more images.",
494
+ "",
495
+ 5, 5, 5, 5, 5,
496
+ )
497
 
498
+ # FIX: define sample_next correctly
499
+ sample_next = samples[idx_next]
500
 
501
  return (
502
+ seen_list, idx_next,
 
 
503
  load_image(sample_next.masked_gt_path),
 
504
  load_image(sample_next.unmasked_gt_path),
 
505
  load_image(sample_next.sr_path),
 
506
  load_image(sample_next.original_path),
 
507
  load_image(sample_next.image_5_path),
 
508
  gr.update(value=""),
 
509
  gr.update(value=""),
 
510
  5, 5, 5, 5, 5,
 
 
 
511
  )
512
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
513
 
514
  def to_thanks(name: str, user_seen: List[str], samples: List[Sample]):
 
 
 
515
  left = user_left_count(user_seen, samples)
 
516
  target = user_target_count(samples)
 
517
  if left > 0:
518
+ msg = (
 
 
 
 
519
  f"### ⏸️ Session Paused!\n\n"
 
520
  f"### ✅ Thanks, {name}! Your progress has been saved.\n\n"
521
+ f"We’re grateful for your time and expertise. Our suggested target is "
522
+ f"{TARGET_PER_PERSON} images per reviewer.\n\n"
523
+ f"You have **{left}** images left.\n\n"
524
+ f"You can close this tab and return whenever you like—just use the same Name and Email to **continue where you left off**.\n\n"
525
+ f"If you have questions, issues, or suggestions, please email **{CONTACT_EMAIL}**.\n\n"
526
+ f"Click **Start Again** to evaluate another image."
 
 
 
 
 
 
 
527
  )
 
528
  else:
 
 
 
529
  msg = (
 
530
  f"### ✅ All Done, {name}!\n\n"
 
531
  f"You’ve completed the target of **{target}** images. Your responses are securely saved.\n\n"
 
532
  f"We’re extremely grateful for your time and expertise. You are welcome to continue with more images if you wish, or you can finish here.\n\n"
 
533
  f"If you have questions, issues, or suggestions, please email **{CONTACT_EMAIL}**.\n\n"
 
534
  )
 
535
  return gr.update(visible=False), gr.update(visible=True), gr.update(value=msg)
536
 
 
 
537
  def hide_thanks():
 
538
  return gr.update(visible=False)
539
 
 
 
540
  def maybe_show_thanks(name: str, seen: List[str], samples: List[Sample]):
 
 
 
541
  if len(set(seen or [])) >= TARGET_PER_PERSON:
 
542
  return to_thanks(name, seen, samples)
 
 
 
 
 
543
  return gr.update(visible=True), gr.update(visible=False), gr.update()
544
 
 
 
545
  def reset_to_start():
 
 
 
 
 
 
 
546
  return (
 
547
  gr.update(value=""), # Clear Name
 
548
  gr.update(value=""), # Clear Email
 
549
  gr.update(visible=True), # Show Start Group
 
550
  gr.update(visible=True), # Show Intro
 
551
  gr.update(visible=False), # Hide Eval
 
552
  gr.update(visible=False), # Hide Thanks
 
553
  )
554
 
 
 
555
  # ----------------------
 
556
  # UI
 
557
  # ----------------------
 
558
  with gr.Blocks(title="RTS Human Evaluation", theme=gr.themes.Soft()) as demo:
 
559
  intro_md = gr.Markdown(
 
560
  f"""
 
561
  # Retrogressive Thaw Slump (RTS) Human Evaluation
562
 
 
 
563
  ### 👋 Welcome, and thanks for lending your expertise!
 
564
  We’re inviting domain experts to help evaluate satellite image patches for RTS.
565
 
 
 
566
  ---
567
 
 
 
568
  ### 📋 Instructions
 
569
  * **Suggested target:** ~{TARGET_PER_PERSON} images per reviewer.
 
570
  * **The Task:** For each set, you will see 5 variations of the same satellite image.
 
571
  * **Rating:** Rate each image from **1 (Poor)** to **10 (Excellent)** based on how clearly the RTS feature (indicated by the **Red Box**) is depicted.
572
 
 
 
573
  ### ⏸️ Saving & Resuming
 
574
  * **Automatic Saving:** Your progress is saved automatically after every "Submit".
 
575
  * **Take a Break:** You can close this tab at any time.
 
576
  * **How to Resume:** Simply return here and enter the **exact same Name and Email**. The system will pick up exactly where you left off.
577
 
 
 
578
  ---
 
579
  **Questions or issues?** Email **{CONTACT_EMAIL}** — we appreciate your feedback and suggestions.
580
 
 
 
581
  **Ready?** Enter your details below to begin.
 
582
  """
 
583
  )
584
 
 
 
585
  # Hidden states
 
586
  state_uid = gr.State("")
 
587
  state_samples = gr.State([])
 
588
  state_seen = gr.State([])
 
589
  state_idx = gr.State(-1)
590
 
 
 
591
  with gr.Group() as start_group:
 
592
  with gr.Row():
 
593
  name = gr.Textbox(label="Full name", placeholder="Jane Doe", autofocus=True)
 
594
  email = gr.Textbox(label="Email address", placeholder="jane@example.com")
 
595
  start_btn = gr.Button("Start / Resume", variant="primary")
 
596
  status = gr.Markdown("\n")
597
 
 
 
598
  eval_panel = gr.Group(visible=False)
 
599
  with eval_panel:
 
 
 
 
 
600
  gr.Markdown(
 
601
  """
 
602
  Focus your attention on the area inside the **Red Box**. This marks the potential location of the Retrogressive Thaw Slump (RTS). Compare the five images below. Rate how clearly and accurately each image depicts the **RTS** feature.
603
 
 
 
604
  **Rating Scale (1 - 10):**
 
605
  * **10 (Excellent):** The RTS feature is sharp, distinct, and clearly visible.
 
606
  * **1 (Poor):** The RTS feature is blurry, distorted, or impossible to distinguish.
 
607
  """
 
608
  )
609
 
 
 
610
  with gr.Row():
 
611
  with gr.Column(scale=1, min_width=150):
 
612
  gr.Markdown("<div style='text-align:center; font-weight:600;'>Image 1</div>")
 
613
  image_1 = gr.Image(show_label=False, interactive=False, height=256, show_download_button=False)
 
614
  score_1 = gr.Slider(minimum=1, maximum=10, step=1, value=5, label="Score (1-10)")
615
 
 
 
616
  with gr.Column(scale=1, min_width=150):
 
617
  gr.Markdown("<div style='text-align:center; font-weight:600;'>Image 2</div>")
 
618
  image_2 = gr.Image(show_label=False, interactive=False, height=256, show_download_button=False)
 
619
  score_2 = gr.Slider(minimum=1, maximum=10, step=1, value=5, label="Score (1-10)")
620
 
 
 
621
  with gr.Column(scale=1, min_width=150):
 
622
  gr.Markdown("<div style='text-align:center; font-weight:600;'>Image 3</div>")
 
623
  image_3 = gr.Image(show_label=False, interactive=False, height=256, show_download_button=False)
 
624
  score_3 = gr.Slider(minimum=1, maximum=10, step=1, value=5, label="Score (1-10)")
625
 
 
 
626
  with gr.Column(scale=1, min_width=150):
 
627
  gr.Markdown("<div style='text-align:center; font-weight:600;'>Image 4</div>")
 
628
  image_4 = gr.Image(show_label=False, interactive=False, height=256, show_download_button=False)
 
629
  score_4 = gr.Slider(minimum=1, maximum=10, step=1, value=5, label="Score (1-10)")
630
 
 
 
631
  with gr.Column(scale=1, min_width=150):
 
632
  gr.Markdown("<div style='text-align:center; font-weight:600;'>Image 5</div>")
 
633
  image_5 = gr.Image(show_label=False, interactive=False, height=256, show_download_button=False)
 
634
  score_5 = gr.Slider(minimum=1, maximum=10, step=1, value=5, label="Score (1-10)")
635
 
 
 
636
  notes_q1 = gr.Textbox(
 
637
  label="Notes (Optional)",
 
638
  lines=2,
 
639
  placeholder="If there are multiple RTS or ambiguities, please note here."
 
640
  )
641
 
 
 
642
  with gr.Row():
 
643
  submit_next_btn = gr.Button("Submit & Next Image", variant="primary")
 
644
  pause_exit_btn = gr.Button("Exit", variant="secondary")
645
 
 
 
 
 
646
  your_jsonl_path = gr.State()
647
 
648
  with gr.Group(visible=False) as thanks_group:
 
649
  thanks_md = gr.Markdown("### ✅ Thanks! Your responses were saved.\n\nClick **Start Again** to evaluate another image.")
 
650
  restart_btn = gr.Button("Start Again", variant="primary")
651
 
 
 
652
  # --- Wiring ---
 
 
 
653
  start_event = start_btn.click(
 
654
  start_or_resume,
 
655
  inputs=[name, email],
 
656
  outputs=[
 
657
  state_uid, state_samples, state_seen, state_idx,
 
658
  image_1, image_2, image_3, image_4, image_5,
 
659
  status, your_jsonl_path,
 
660
  eval_panel, intro_md, start_group
 
661
  ],
 
662
  )
 
663
  start_event.then(hide_thanks, inputs=None, outputs=[thanks_group])
664
 
 
 
665
  # 1. When Pause is clicked, just pass the state through
 
666
  pause_event = pause_exit_btn.click(
 
667
  pause_exit,
 
668
  inputs=[state_seen, state_samples],
 
669
  outputs=[state_seen, state_samples],
 
670
  )
671
 
 
 
672
  # 2. Then show the "Thanks/Resume" screen with the 'how many left' message
 
673
  pause_event.then(
 
674
  to_thanks,
 
675
  inputs=[name, state_seen, state_samples],
 
676
  outputs=[eval_panel, thanks_group, thanks_md],
 
677
  )
678
 
 
 
679
  nextimg_event = submit_next_btn.click(
 
680
  submit_next_image,
 
681
  inputs=[name, email, state_uid, state_samples, state_seen, state_idx,
 
682
  score_1, score_2, score_3, score_4, score_5, notes_q1],
 
683
  outputs=[state_seen, state_idx,
 
684
  image_1, image_2, image_3, image_4, image_5,
 
685
  status, notes_q1,
 
686
  score_1, score_2, score_3, score_4, score_5],
 
687
  )
 
688
  nextimg_event.then(
 
689
  maybe_show_thanks,
 
690
  inputs=[name, state_seen, state_samples],
 
691
  outputs=[eval_panel, thanks_group, thanks_md],
 
692
  )
693
 
 
 
 
 
694
  restart_event = restart_btn.click(
 
695
  reset_to_start,
 
696
  inputs=[],
 
697
  outputs=[
 
698
  name, email,
 
699
  start_group, intro_md,
 
700
  eval_panel, thanks_group
 
701
  ],
 
702
  )
703
 
 
 
704
  if __name__ == "__main__":
 
 
 
705
  if HF_RESULTS_REPO:
 
706
  from huggingface_hub import snapshot_download
 
 
 
707
  try:
 
 
 
708
  snapshot_download(
 
709
  repo_id=HF_RESULTS_REPO,
 
710
  repo_type="dataset",
711
+ local_dir=".",
 
 
712
  allow_patterns=["data/*", "results/*"],
 
713
  token=HF_TOKEN
 
714
  )
 
715
  except Exception as e:
 
716
  print(f"Error reading from HF: {e}")
717
 
 
 
718
  ensure_paths()
 
 
 
719
  _ = load_dataset(CAPTIONS_JSON_PATH, GT_MASKED_DIR, GT_UNMASKED_DIR, SR_DIR, ORIGINAL_DIR, IMAGE_5_DIR)
720
 
721
+ print("✅ Launching app.")
 
 
 
722
  demo.queue()
723
+ demo.launch()