Spaces:

forouzanfallah
/

Super_Resolution_Evaluation

Running

App Files Files Community

forouzanfallah commited on 20 days ago

Commit

71cbe5b

verified ·

1 Parent(s): 6032b28

Update app.py

Browse files

Files changed (1) hide show

app.py +174 -193

app.py CHANGED Viewed

@@ -21,15 +21,12 @@ HF_TOKEN = os.getenv("HF_TOKEN")
 _hf_api = HfApi(token=HF_TOKEN)
 # --- Main settings ---
-# UPDATED: Set to 20 as requested
 TARGET_PER_PERSON = 20
 CONTACT_EMAIL = "ffallah@asu.edu"
 # --- Paths ---
-# We still use the JSON file to get the list of filenames,
 CAPTIONS_JSON_PATH = os.environ.get("CAPTIONS_JSON_PATH", "data/captions.json")
-# Folders with matching filenames across all FIVE folders:
 GT_MASKED_DIR   = "data/gt_b"   # Image 1
 GT_UNMASKED_DIR = "data/adc_b"  # Image 2
 SR_DIR          = "data/sr_b"   # Image 3
@@ -40,10 +37,10 @@ IMAGE_5_DIR     = "data/see_b"  # Image 5
 RESULTS_DIR = "results"
 PROGRESS_PATH = os.path.join(RESULTS_DIR, "progress.json")
 ALL_RESULTS_JSONL = os.path.join(RESULTS_DIR, "all_results.jsonl")
-SAVE_PII = True # Set to False to anonymize saved name/email
 WRITE_LOCK = threading.Lock()
-STRICT_ENFORCEMENT = False  # UPDATED: Set to False to prevent crashes if dataset size != target
 # ----------------------
 # Data model
@@ -60,17 +57,36 @@ class Sample:
 # ----------------------
 # Helpers
 # ----------------------
 def user_target_count(samples: List[Sample]) -> int:
-    # UPDATED: Logic to be more robust.
-    # It takes the minimum of the available samples or the target.
-    # This prevents crashes if you have fewer or more images than 20.
     return min(len(samples), TARGET_PER_PERSON)
 def user_left_count(user_seen: List[str], samples: List[Sample]) -> int:
     target = user_target_count(samples)
-    seen = set(user_seen)
-    # Only count seen images that are actually in the current sample list
-    allowed_ids = {s.sample_id for s in samples}
     seen_in_allowed = len([sid for sid in seen if sid in allowed_ids])
     return max(0, target - seen_in_allowed)
@@ -84,15 +100,8 @@ def push_results_to_private_repo(uid: str):
     if not HF_TOKEN or not HF_RESULTS_REPO:
         return
     try:
-        # _ensure_private_repo(HF_RESULTS_REPO)
         os.makedirs(RESULTS_DIR, exist_ok=True)
-        # # Ensure files exist
-        # if not os.path.exists(ALL_RESULTS_JSONL):
-        #     open(ALL_RESULTS_JSONL, "a").close()
         user_file = os.path.join(RESULTS_DIR, f"{uid}.jsonl")
-        # if not os.path.exists(user_file):
-        #     open(user_file, "a").close()
         ops = [
             CommitOperationAdd(
@@ -127,20 +136,28 @@ def ensure_paths():
         (IMAGE_5_DIR, "IMAGE_5_DIR"),
     ]:
         if not os.path.isdir(pth):
-            # We just print a warning instead of crashing, to allow partial setup
             print(f"Warning: Directory '{pth}' for {name} not found.")
 def load_image(path: str) -> Image.Image:
-    if not os.path.exists(path):
-        return Image.new("RGB", (256, 256), color="gray") # Return placeholder if missing
-    return Image.open(path).convert("RGB")
 def load_dataset(captions_path: str, gt_masked_dir: str, gt_unmasked_dir: str, sr_dir: str, original_dir: str, image_5_dir: str) -> List[Sample]:
     if not os.path.exists(captions_path):
         return []
     with open(captions_path, "r", encoding="utf-8") as f:
-        captions_data = json.load(f)
     samples: List[Sample] = []
     for item in captions_data:
@@ -149,8 +166,6 @@ def load_dataset(captions_path: str, gt_masked_dir: str, gt_unmasked_dir: str, s
             continue
         sample_id = os.path.splitext(base_filename)[0]
-        # Define all 5 target paths
         paths = {
             "masked": os.path.join(gt_masked_dir, base_filename),
             "unmasked": os.path.join(gt_unmasked_dir, base_filename),
@@ -159,22 +174,25 @@ def load_dataset(captions_path: str, gt_masked_dir: str, gt_unmasked_dir: str, s
             "img5": os.path.join(image_5_dir, base_filename)
         }
-        # STRICT CHECK: All 5 must exist
-        if all(os.path.exists(p) for p in paths.values()):
-            samples.append(
-                Sample(
-                    sample_id=sample_id,
-                    masked_gt_path=paths["masked"],
-                    unmasked_gt_path=paths["unmasked"],
-                    sr_path=paths["sr"],
-                    original_path=paths["original"],
-                    image_5_path=paths["img5"]
-                )
             )
-        else:
-            # Debugging: Find out which folder is the culprit
-            missing = [k for k, v in paths.items() if not os.path.exists(v)]
-            print(f"Skipping {base_filename}: Missing in folders {missing}")
     return samples
@@ -209,11 +227,6 @@ def append_jsonl(path: str, record: Dict[str, Any]):
 # LOGIC FOR CONVERTING SLIDERS TO RANK
 # ----------------------
 def convert_scores_to_rank(s1, s2, s3, s4, s5) -> Dict[str, int]:
-    """
-    Takes 5 scores (1-10). Returns a dictionary:
-    {'image_1': rank, 'image_2': rank...}
-    where Rank 1 is the Highest Score.
-    """
     scores = [
         ("image_1", s1),
         ("image_2", s2),
@@ -221,25 +234,21 @@ def convert_scores_to_rank(s1, s2, s3, s4, s5) -> Dict[str, int]:
         ("image_4", s4),
         ("image_5", s5)
     ]
-    # Sort by score descending (High score first)
     scores.sort(key=lambda x: x[1], reverse=True)
     ranks = {}
     current_rank = 1
     for img_key, score in scores:
         ranks[img_key] = current_rank
         current_rank += 1
     return ranks
 # ----------------------
 # App logic
 # ----------------------
 def pick_next_index(user_seen: List[str], samples: List[Sample]) -> int:
-    # Logic: Only pick from samples that match the criteria
-    seen_set = set(user_seen)
-    remaining = [i for i, s in enumerate(samples) if s.sample_id not in seen_set]
     if not remaining:
         return -1
     return random.choice(remaining)
@@ -250,9 +259,9 @@ def start_or_resume(name: str, email: str):
     ensure_paths()
     samples = load_dataset(CAPTIONS_JSON_PATH, GT_MASKED_DIR, GT_UNMASKED_DIR, SR_DIR, ORIGINAL_DIR, IMAGE_5_DIR)
     if not samples:
-         raise gr.Error("No images found. Please check dataset configuration.")
     uid = hash_user_id(name, email)
     progress = load_progress()
@@ -263,15 +272,22 @@ def start_or_resume(name: str, email: str):
     user_seen: List[str] = progress[uid].get("seen", [])
     left = user_left_count(user_seen, samples)
     # If the user has finished their target
     if left == 0 and len(user_seen) >= user_target_count(samples):
         status = (
             f"Welcome back, {name}. You’ve completed all {user_target_count(samples)} images. 🎉\n"
             f"Your personal results file: {os.path.join(RESULTS_DIR, f'{uid}.jsonl')}"
         )
         return (
-            uid, samples, user_seen, -1,
-            None, None, None, None, None, # images
             status,
             os.path.join(RESULTS_DIR, f"{uid}.jsonl"),
             gr.update(visible=False),
@@ -280,9 +296,20 @@ def start_or_resume(name: str, email: str):
         )
     idx = pick_next_index(user_seen, samples)
-    if idx == -1:
-        # Case where target not reached but no fresh images left
-        return (uid, samples, user_seen, -1, None, None, None, None, None, "No more new images available.", "", gr.update(visible=False), gr.update(visible=True), gr.update(visible=True))
     sample = samples[idx]
@@ -294,8 +321,13 @@ def start_or_resume(name: str, email: str):
     os.makedirs(RESULTS_DIR, exist_ok=True)
     user_file_path = os.path.join(RESULTS_DIR, f"{uid}.jsonl")
     return (
-        uid, samples, user_seen, idx,
         load_image(sample.masked_gt_path),
         load_image(sample.unmasked_gt_path),
         load_image(sample.sr_path),
@@ -308,6 +340,7 @@ def start_or_resume(name: str, email: str):
         gr.update(visible=False),
     )
 def _save_record_and_progress(
     name: str,
     email: str,
@@ -325,21 +358,20 @@ def _save_record_and_progress(
     if not name or not email:
         raise gr.Error("Please enter your name and email.")
-    if idx is None or idx < 0 or idx >= len(samples):
         return load_progress()
-    # --- CALCULATE RANK FROM SLIDERS ---
     rank_dict = convert_scores_to_rank(score_1, score_2, score_3, score_4, score_5)
-    sample = samples[idx]
     progress = load_progress()
     progress.setdefault(uid, {"seen": []})
     seen = set(progress[uid].get("seen", []))
     if sample.sample_id in seen:
         return progress
-    # We allow saving even if target met, just in case, but usually UI stops them.
     record: Dict[str, Any] = {
         "timestamp": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
@@ -347,14 +379,6 @@ def _save_record_and_progress(
         "name": name if SAVE_PII else None,
         "email": email if SAVE_PII else None,
         "sample_id": sample.sample_id,
-        # "image_paths": {
-        #     "masked_gt": sample.masked_gt_path,
-        #     "unmasked_gt": sample.unmasked_gt_path,
-        #     "sr": sample.sr_path,
-        #     "original": sample.original_path,
-        #     "image_5": sample.image_5_path,
-        # },
-        # Save raw scores (1-10) and the ranking
         "raw_scores": {
             "image_1": score_1,
             "image_2": score_2,
@@ -363,17 +387,22 @@ def _save_record_and_progress(
             "image_5": score_5,
         },
         "responses": {
-            # "image_ranking": rank_dict, # Format: {"image_1": 1, "image_2": 4, ...}
             "notes": q1_notes or "",
         },
     }
     os.makedirs(RESULTS_DIR, exist_ok=True)
     append_jsonl(os.path.join(RESULTS_DIR, f"{uid}.jsonl"), record)
     append_jsonl(ALL_RESULTS_JSONL, record)
-    # push_results_to_private_repo(uid)
-    thread = threading.Thread(target=push_results_to_private_repo, args=(uid,))
-    thread.start()
     seen.add(sample.sample_id)
     progress[uid]["seen"] = sorted(list(seen))
@@ -403,7 +432,7 @@ def submit_finish(
         return (
             user_seen, idx,
             gr.update(), gr.update(), gr.update(), gr.update(), gr.update(),
-            gr.update(),
             gr.update(), gr.update(), gr.update(), gr.update(), gr.update(),
             gr.update(),
         )
@@ -411,15 +440,15 @@ def submit_finish(
     return (
         user_seen, idx,
         gr.update(value=None), gr.update(value=None), gr.update(value=None), gr.update(value=None), gr.update(value=None),
-        gr.update(value=""),
         gr.update(value="Finished!"),
         gr.update(value=5), gr.update(value=5), gr.update(value=5), gr.update(value=5), gr.update(value=5),
-        gr.update(value=None),
     )
 def pause_exit(user_seen, samples):
     return user_seen, samples
 def submit_next_image(
     name: str,
     email: str,
@@ -430,75 +459,54 @@ def submit_next_image(
     s1: float, s2: float, s3: float, s4: float, s5: float,
     q1_notes: str
 ):
     try:
         progress = _save_record_and_progress(
-            name, email, uid, samples, user_seen, idx,
             s1, s2, s3, s4, s5,
             q1_notes
         )
     except gr.Error as e:
         raise e
-        # return (
-        #     user_seen, idx,
-        #     gr.update(), gr.update(), gr.update(), gr.update(), gr.update(),
-        #     gr.update(),
-        #     gr.update(), gr.update(), gr.update(), gr.update(), gr.update(),
-        #     gr.update(),
-        # )
-    # left_after = user_left_count(progress[uid]["seen"], samples)
-    # target = user_target_count(samples)
-    updated_seen = progress[uid]["seen"]
-    if len(updated_seen) >= TARGET_PER_PERSON:
         status = (
             f"Saved! You’ve completed all {target} images. 🎉 "
             f"Click **Exit** to close this session."
         )
         return (
-            updated_seen, -1,
-            None, None, None, None, None, # No more images to load
-            gr.update(value="Target reached!"), # Status
-            gr.update(value=""), # Clear notes
-            5, 5, 5, 5, 5 # Reset sliders
         )
-    # if left_after == 0:
-    #     status = (
-    #         f"Saved! You’ve completed all {target} images. 🎉 "
-    #         f"Click **Exit** to close this session."
-    #     )
-    #     return (
-    #                 updated_seen, -1,
-    #                 None, None, None, None, None,  # Return None to avoid image load errors
-    #                 gr.update(value="Target reached! Processing..."),
-    #                 gr.update(value=""),
-    #                 5, 5, 5, 5, 5,
-    #                 gr.update(value=None)
-    #             )
-    idx_next = pick_next_index(updated_seen, samples)
     if idx_next == -1:
-        return (updated_seen, -1, None, None, None, None, None, "No more images.", "", 5, 5, 5, 5, 5)
-         # Fallback if no images left
-         # return (
-         #    progress[uid]["seen"], -1,
-         #    None, None, None, None, None,
-         #    gr.update(value="No more images."),
-         #    gr.update(value=""),
-         #    gr.update(value=5), gr.update(value=5), gr.update(value=5), gr.update(value=5), gr.update(value=5),
-         #    gr.update(value=None),
-         # )
-    sample_next = samples[idx_next]
-    status =""
-    # (
-    #     f"Saved! Personal progress — images left: {left_after} of {target}.\n"
-    #     f"Next sample: {sample_next.sample_id}"
-    # )
     return (
-        updated_seen, idx_next,
         load_image(sample_next.masked_gt_path),
         load_image(sample_next.unmasked_gt_path),
         load_image(sample_next.sr_path),
@@ -507,40 +515,26 @@ def submit_next_image(
         gr.update(value=""),
         gr.update(value=""),
         5, 5, 5, 5, 5,
     )
-    # return (
-    #     progress[uid]["seen"], idx_next,
-    #     load_image(sample_next.masked_gt_path),
-    #     load_image(sample_next.unmasked_gt_path),
-    #     load_image(sample_next.sr_path),
-    #     load_image(sample_next.original_path),
-    #     load_image(sample_next.image_5_path),
-    #     gr.update(value=status),
-    #     gr.update(value=""),
-    #     gr.update(value=5), gr.update(value=5), gr.update(value=5), gr.update(value=5), gr.update(value=5),
-    #     gr.update(value=None),
-    # )
 def to_thanks(name: str, user_seen: List[str], samples: List[Sample]):
-    # Calculate how many are left based on the updated seen list
-    left = user_left_count(user_seen, samples)
-    target = user_target_count(samples)
     if left > 0:
-            # Message for users who are leaving early
-            msg = (
             f"### ⏸️ Session Paused!\n\n"
             f"### ✅ Thanks, {name}! Your progress has been saved.\n\n"
-                f"We’re grateful for your time and expertise. Our suggested target is "
-                f"{TARGET_PER_PERSON} images per reviewer"
-                f"You have **{left}** images left.\n\n"
-                f"You can close this tab and return whenever you like—just use the same Name and Email to **continue where you left off**.\n\n"
-                f"If you have questions, issues, or suggestions, please email **{CONTACT_EMAIL}**.\n\n"
-                f"Click **Start Again** to evaluate another image."
         )
     else:
-        # Message for users who completed the target
         msg = (
             f"### ✅ All Done, {name}!\n\n"
             f"You’ve completed the target of **{target}** images. Your responses are securely saved.\n\n"
@@ -553,17 +547,12 @@ def hide_thanks():
     return gr.update(visible=False)
 def maybe_show_thanks(name: str, seen: List[str], samples: List[Sample]):
-    # Check if the user has reached the target
     if len(set(seen or [])) >= TARGET_PER_PERSON:
-        return to_thanks(name, seen, samples)
-    # If not done, keep evaluation panel visible
     return gr.update(visible=True), gr.update(visible=False), gr.update()
 def reset_to_start():
-    """
-    Clears inputs and resets the view to the login page.
-    """
     return (
         gr.update(value=""), # Clear Name
         gr.update(value=""), # Clear Email
@@ -583,14 +572,14 @@ with gr.Blocks(title="RTS Human Evaluation", theme=gr.themes.Soft()) as demo:
     ### 👋 Welcome, and thanks for lending your expertise!
     We’re inviting domain experts to help evaluate satellite image patches for RTS.
     ---
     ### 📋 Instructions
     * **Suggested target:** ~{TARGET_PER_PERSON} images per reviewer.
     * **The Task:** For each set, you will see 5 variations of the same satellite image.
     * **Rating:** Rate each image from **1 (Poor)** to **10 (Excellent)** based on how clearly the RTS feature (indicated by the **Red Box**) is depicted.
     ### ⏸️ Saving & Resuming
     * **Automatic Saving:** Your progress is saved automatically after every "Submit".
     * **Take a Break:** You can close this tab at any time.
@@ -598,7 +587,7 @@ with gr.Blocks(title="RTS Human Evaluation", theme=gr.themes.Soft()) as demo:
     ---
     **Questions or issues?** Email **{CONTACT_EMAIL}** — we appreciate your feedback and suggestions.
     **Ready?** Enter your details below to begin.
     """
     )
@@ -618,39 +607,37 @@ with gr.Blocks(title="RTS Human Evaluation", theme=gr.themes.Soft()) as demo:
     eval_panel = gr.Group(visible=False)
     with eval_panel:
-        # --- NEW LAYOUT: 5 COLUMNS, 1-10 SLIDERS ---
         gr.Markdown(
             """
             Focus your attention on the area inside the **Red Box**. This marks the potential location of the Retrogressive Thaw Slump (RTS). Compare the five images below. Rate how clearly and accurately each image depicts the **RTS** feature.
             **Rating Scale (1 - 10):**
             * **10 (Excellent):** The RTS feature is sharp, distinct, and clearly visible.
             * **1 (Poor):** The RTS feature is blurry, distorted, or impossible to distinguish.
             """
         )
         with gr.Row():
             with gr.Column(scale=1, min_width=150):
                 gr.Markdown("<div style='text-align:center; font-weight:600;'>Image 1</div>")
                 image_1 = gr.Image(show_label=False, interactive=False, height=256, show_download_button=False)
                 score_1 = gr.Slider(minimum=1, maximum=10, step=1, value=5, label="Score (1-10)")
             with gr.Column(scale=1, min_width=150):
                 gr.Markdown("<div style='text-align:center; font-weight:600;'>Image 2</div>")
                 image_2 = gr.Image(show_label=False, interactive=False, height=256, show_download_button=False)
                 score_2 = gr.Slider(minimum=1, maximum=10, step=1, value=5, label="Score (1-10)")
             with gr.Column(scale=1, min_width=150):
                 gr.Markdown("<div style='text-align:center; font-weight:600;'>Image 3</div>")
                 image_3 = gr.Image(show_label=False, interactive=False, height=256, show_download_button=False)
                 score_3 = gr.Slider(minimum=1, maximum=10, step=1, value=5, label="Score (1-10)")
             with gr.Column(scale=1, min_width=150):
                 gr.Markdown("<div style='text-align:center; font-weight:600;'>Image 4</div>")
                 image_4 = gr.Image(show_label=False, interactive=False, height=256, show_download_button=False)
                 score_4 = gr.Slider(minimum=1, maximum=10, step=1, value=5, label="Score (1-10)")
             with gr.Column(scale=1, min_width=150):
                 gr.Markdown("<div style='text-align:center; font-weight:600;'>Image 5</div>")
                 image_5 = gr.Image(show_label=False, interactive=False, height=256, show_download_button=False)
@@ -661,19 +648,18 @@ with gr.Blocks(title="RTS Human Evaluation", theme=gr.themes.Soft()) as demo:
             lines=2,
             placeholder="If there are multiple RTS or ambiguities, please note here."
         )
         with gr.Row():
             submit_next_btn = gr.Button("Submit & Next Image", variant="primary")
             pause_exit_btn = gr.Button("Exit", variant="secondary")
-        # your_jsonl_path = gr.Textbox(label="Your results file path (for reference)", interactive=False)
         your_jsonl_path = gr.State()
     with gr.Group(visible=False) as thanks_group:
         thanks_md = gr.Markdown("### ✅ Thanks! Your responses were saved.\n\nClick **Start Again** to evaluate another image.")
         restart_btn = gr.Button("Start Again", variant="primary")
     # --- Wiring ---
     start_event = start_btn.click(
         start_or_resume,
         inputs=[name, email],
@@ -692,7 +678,7 @@ with gr.Blocks(title="RTS Human Evaluation", theme=gr.themes.Soft()) as demo:
         inputs=[state_seen, state_samples],
         outputs=[state_seen, state_samples],
     )
     # 2. Then show the "Thanks/Resume" screen with the 'how many left' message
     pause_event.then(
         to_thanks,
@@ -715,7 +701,6 @@ with gr.Blocks(title="RTS Human Evaluation", theme=gr.themes.Soft()) as demo:
         outputs=[eval_panel, thanks_group, thanks_md],
     )
-    # --- CHANGED: Calls reset_to_start instead of start_or_resume ---
     restart_event = restart_btn.click(
         reset_to_start,
         inputs=[],
@@ -727,16 +712,13 @@ with gr.Blocks(title="RTS Human Evaluation", theme=gr.themes.Soft()) as demo:
     )
 if __name__ == "__main__":
-    # --- DYNAMIC READ FROM HF_RESULTS_REPO ---
     if HF_RESULTS_REPO:
         from huggingface_hub import snapshot_download
-        # print(f"Reading images and metadata from: {HF_RESULTS_REPO}...")
         try:
-            # This pulls your repo's 'data' folder into the current workspace
             snapshot_download(
                 repo_id=HF_RESULTS_REPO,
                 repo_type="dataset",
-                local_dir=".",
                 allow_patterns=["data/*", "results/*"],
                 token=HF_TOKEN
             )
@@ -744,9 +726,8 @@ if __name__ == "__main__":
             print(f"Error reading from HF: {e}")
     ensure_paths()
-    # Pre-check dataset load from the newly downloaded files
     _ = load_dataset(CAPTIONS_JSON_PATH, GT_MASKED_DIR, GT_UNMASKED_DIR, SR_DIR, ORIGINAL_DIR, IMAGE_5_DIR)
-    print("✅ Successfully synced with HF Repo. Launching app.")
     demo.queue()
-    demo.launch()

 _hf_api = HfApi(token=HF_TOKEN)
 # --- Main settings ---
 TARGET_PER_PERSON = 20
 CONTACT_EMAIL = "ffallah@asu.edu"
 # --- Paths ---
 CAPTIONS_JSON_PATH = os.environ.get("CAPTIONS_JSON_PATH", "data/captions.json")
 GT_MASKED_DIR   = "data/gt_b"   # Image 1
 GT_UNMASKED_DIR = "data/adc_b"  # Image 2
 SR_DIR          = "data/sr_b"   # Image 3
 RESULTS_DIR = "results"
 PROGRESS_PATH = os.path.join(RESULTS_DIR, "progress.json")
 ALL_RESULTS_JSONL = os.path.join(RESULTS_DIR, "all_results.jsonl")
+SAVE_PII = True
 WRITE_LOCK = threading.Lock()
+STRICT_ENFORCEMENT = False
 # ----------------------
 # Data model
 # ----------------------
 # Helpers
 # ----------------------
+def ensure_sample_objects(samples_input):
+    """
+    Accepts either:
+     - list[Sample] (already objects), or
+     - list[dict] (serialized Sample.__dict__)
+    Returns list[Sample].
+    """
+    if not samples_input:
+        return []
+    if isinstance(samples_input, list):
+        if len(samples_input) == 0:
+            return []
+        first = samples_input[0]
+        if isinstance(first, dict):
+            try:
+                return [Sample(**s) for s in samples_input]
+            except Exception:
+                # fall through to returning empty to avoid crashes
+                return []
+        elif isinstance(first, Sample):
+            return samples_input
+    return []
 def user_target_count(samples: List[Sample]) -> int:
     return min(len(samples), TARGET_PER_PERSON)
 def user_left_count(user_seen: List[str], samples: List[Sample]) -> int:
     target = user_target_count(samples)
+    seen = set(user_seen or [])
+    allowed_ids = {s.sample_id for s in samples}
     seen_in_allowed = len([sid for sid in seen if sid in allowed_ids])
     return max(0, target - seen_in_allowed)
     if not HF_TOKEN or not HF_RESULTS_REPO:
         return
     try:
         os.makedirs(RESULTS_DIR, exist_ok=True)
         user_file = os.path.join(RESULTS_DIR, f"{uid}.jsonl")
         ops = [
             CommitOperationAdd(
         (IMAGE_5_DIR, "IMAGE_5_DIR"),
     ]:
         if not os.path.isdir(pth):
             print(f"Warning: Directory '{pth}' for {name} not found.")
 def load_image(path: str) -> Image.Image:
+    if not path or not os.path.exists(path):
+        # return a simple placeholder image so UI doesn't crash
+        return Image.new("RGB", (256, 256), color="gray")
+    try:
+        return Image.open(path).convert("RGB")
+    except Exception:
+        return Image.new("RGB", (256, 256), color="gray")
 def load_dataset(captions_path: str, gt_masked_dir: str, gt_unmasked_dir: str, sr_dir: str, original_dir: str, image_5_dir: str) -> List[Sample]:
     if not os.path.exists(captions_path):
+        print(f"Captions file not found at {captions_path}")
         return []
     with open(captions_path, "r", encoding="utf-8") as f:
+        try:
+            captions_data = json.load(f)
+        except Exception:
+            print("Failed to parse captions JSON.")
+            return []
     samples: List[Sample] = []
     for item in captions_data:
             continue
         sample_id = os.path.splitext(base_filename)[0]
         paths = {
             "masked": os.path.join(gt_masked_dir, base_filename),
             "unmasked": os.path.join(gt_unmasked_dir, base_filename),
             "img5": os.path.join(image_5_dir, base_filename)
         }
+        # If strict enforcement required, require all five files to exist.
+        if STRICT_ENFORCEMENT:
+            if not all(os.path.exists(p) for p in paths.values()):
+                missing = [k for k, v in paths.items() if not os.path.exists(v)]
+                print(f"Skipping {base_filename}: Missing in folders {missing}")
+                continue
+        # In non-strict mode, it's okay to include samples even if some files missing;
+        # we will supply placeholders at load time.
+        samples.append(
+            Sample(
+                sample_id=sample_id,
+                masked_gt_path=paths["masked"],
+                unmasked_gt_path=paths["unmasked"],
+                sr_path=paths["sr"],
+                original_path=paths["original"],
+                image_5_path=paths["img5"]
             )
+        )
     return samples
 # LOGIC FOR CONVERTING SLIDERS TO RANK
 # ----------------------
 def convert_scores_to_rank(s1, s2, s3, s4, s5) -> Dict[str, int]:
     scores = [
         ("image_1", s1),
         ("image_2", s2),
         ("image_4", s4),
         ("image_5", s5)
     ]
     scores.sort(key=lambda x: x[1], reverse=True)
     ranks = {}
     current_rank = 1
     for img_key, score in scores:
         ranks[img_key] = current_rank
         current_rank += 1
     return ranks
 # ----------------------
 # App logic
 # ----------------------
 def pick_next_index(user_seen: List[str], samples: List[Sample]) -> int:
+    samples_obj = ensure_sample_objects(samples)
+    seen_set = set(user_seen or [])
+    remaining = [i for i, s in enumerate(samples_obj) if s.sample_id not in seen_set]
     if not remaining:
         return -1
     return random.choice(remaining)
     ensure_paths()
     samples = load_dataset(CAPTIONS_JSON_PATH, GT_MASKED_DIR, GT_UNMASKED_DIR, SR_DIR, ORIGINAL_DIR, IMAGE_5_DIR)
     if not samples:
+        raise gr.Error("No images found. Please check dataset configuration.")
     uid = hash_user_id(name, email)
     progress = load_progress()
     user_seen: List[str] = progress[uid].get("seen", [])
     left = user_left_count(user_seen, samples)
+    # placeholder image to avoid Gradio trying to load None
+    placeholder_img = Image.new("RGB", (256, 256), color="gray")
     # If the user has finished their target
     if left == 0 and len(user_seen) >= user_target_count(samples):
         status = (
             f"Welcome back, {name}. You’ve completed all {user_target_count(samples)} images. 🎉\n"
             f"Your personal results file: {os.path.join(RESULTS_DIR, f'{uid}.jsonl')}"
         )
+        samples_serialized = [s.__dict__ for s in samples]
         return (
+            uid,
+            samples_serialized,
+            user_seen,
+            -1,
+            placeholder_img, placeholder_img, placeholder_img, placeholder_img, placeholder_img,
             status,
             os.path.join(RESULTS_DIR, f"{uid}.jsonl"),
             gr.update(visible=False),
         )
     idx = pick_next_index(user_seen, samples)
+    if idx == -1:
+        samples_serialized = [s.__dict__ for s in samples]
+        return (
+            uid,
+            samples_serialized,
+            user_seen,
+            -1,
+            placeholder_img, placeholder_img, placeholder_img, placeholder_img, placeholder_img,
+            "No more new images available.",
+            "",
+            gr.update(visible=False),
+            gr.update(visible=True),
+            gr.update(visible=True)
+        )
     sample = samples[idx]
     os.makedirs(RESULTS_DIR, exist_ok=True)
     user_file_path = os.path.join(RESULTS_DIR, f"{uid}.jsonl")
+    samples_serialized = [s.__dict__ for s in samples]
     return (
+        uid,
+        samples_serialized,
+        user_seen,
+        idx,
         load_image(sample.masked_gt_path),
         load_image(sample.unmasked_gt_path),
         load_image(sample.sr_path),
         gr.update(visible=False),
     )
 def _save_record_and_progress(
     name: str,
     email: str,
     if not name or not email:
         raise gr.Error("Please enter your name and email.")
+    samples_obj = ensure_sample_objects(samples)
+    if idx is None or idx < 0 or idx >= len(samples_obj):
         return load_progress()
     rank_dict = convert_scores_to_rank(score_1, score_2, score_3, score_4, score_5)
+    sample = samples_obj[idx]
     progress = load_progress()
     progress.setdefault(uid, {"seen": []})
     seen = set(progress[uid].get("seen", []))
     if sample.sample_id in seen:
         return progress
     record: Dict[str, Any] = {
         "timestamp": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
         "name": name if SAVE_PII else None,
         "email": email if SAVE_PII else None,
         "sample_id": sample.sample_id,
         "raw_scores": {
             "image_1": score_1,
             "image_2": score_2,
             "image_5": score_5,
         },
         "responses": {
             "notes": q1_notes or "",
+            "image_ranking": rank_dict,
         },
     }
     os.makedirs(RESULTS_DIR, exist_ok=True)
     append_jsonl(os.path.join(RESULTS_DIR, f"{uid}.jsonl"), record)
     append_jsonl(ALL_RESULTS_JSONL, record)
+    # start background push but don't let failures crash the app
+    try:
+        thread = threading.Thread(target=push_results_to_private_repo, args=(uid,))
+        thread.daemon = True
+        thread.start()
+    except Exception:
+        pass
     seen.add(sample.sample_id)
     progress[uid]["seen"] = sorted(list(seen))
         return (
             user_seen, idx,
             gr.update(), gr.update(), gr.update(), gr.update(), gr.update(),
+            gr.update(),
             gr.update(), gr.update(), gr.update(), gr.update(), gr.update(),
             gr.update(),
         )
     return (
         user_seen, idx,
         gr.update(value=None), gr.update(value=None), gr.update(value=None), gr.update(value=None), gr.update(value=None),
+        gr.update(value=""),
         gr.update(value="Finished!"),
         gr.update(value=5), gr.update(value=5), gr.update(value=5), gr.update(value=5), gr.update(value=5),
+        gr.update(value=None),
     )
 def pause_exit(user_seen, samples):
     return user_seen, samples
 def submit_next_image(
     name: str,
     email: str,
     s1: float, s2: float, s3: float, s4: float, s5: float,
     q1_notes: str
 ):
+    samples_obj = ensure_sample_objects(samples)
     try:
         progress = _save_record_and_progress(
+            name, email, uid, samples_obj, user_seen, idx,
             s1, s2, s3, s4, s5,
             q1_notes
         )
     except gr.Error as e:
         raise e
+    seen_list = progress.get(uid, {}).get("seen", [])
+    left_after = user_left_count(seen_list, samples_obj)
+    target = user_target_count(samples_obj)
+    # placeholder image to avoid Gradio trying to load None
+    placeholder_img = Image.new("RGB", (256, 256), color="gray")
+    # If user reached the target, return placeholders for images and let the then() chain show thanks
+    if left_after == 0:
         status = (
             f"Saved! You’ve completed all {target} images. 🎉 "
             f"Click **Exit** to close this session."
         )
         return (
+            seen_list, -1,
+            placeholder_img, placeholder_img, placeholder_img, placeholder_img, placeholder_img,
+            gr.update(value=status),
+            gr.update(value=""),
+            5, 5, 5, 5, 5,
+            gr.update(value=None)
         )
+    idx_next = pick_next_index(seen_list, samples_obj)
     if idx_next == -1:
+        # no more images but target not met (rare). return placeholders too.
+        return (
+            seen_list, -1,
+            placeholder_img, placeholder_img, placeholder_img, placeholder_img, placeholder_img,
+            "No more images.",
+            "",
+            5, 5, 5, 5, 5,
+            None
+        )
+    sample_next = samples_obj[idx_next]
     return (
+        seen_list, idx_next,
         load_image(sample_next.masked_gt_path),
         load_image(sample_next.unmasked_gt_path),
         load_image(sample_next.sr_path),
         gr.update(value=""),
         gr.update(value=""),
         5, 5, 5, 5, 5,
+        gr.update(value=None)
     )
 def to_thanks(name: str, user_seen: List[str], samples: List[Sample]):
+    samples_obj = ensure_sample_objects(samples)
+    left = user_left_count(user_seen, samples_obj)
+    target = user_target_count(samples_obj)
     if left > 0:
+        msg = (
             f"### ⏸️ Session Paused!\n\n"
             f"### ✅ Thanks, {name}! Your progress has been saved.\n\n"
+            f"We’re grateful for your time and expertise. Our suggested target is "
+            f"{TARGET_PER_PERSON} images per reviewer.\n\n"
+            f"You have **{left}** images left.\n\n"
+            f"You can close this tab and return whenever you like—just use the same Name and Email to **continue where you left off**.\n\n"
+            f"If you have questions, issues, or suggestions, please email **{CONTACT_EMAIL}**.\n\n"
+            f"Click **Start Again** to evaluate another image."
         )
     else:
         msg = (
             f"### ✅ All Done, {name}!\n\n"
             f"You’ve completed the target of **{target}** images. Your responses are securely saved.\n\n"
     return gr.update(visible=False)
 def maybe_show_thanks(name: str, seen: List[str], samples: List[Sample]):
+    samples_obj = ensure_sample_objects(samples)
     if len(set(seen or [])) >= TARGET_PER_PERSON:
+        return to_thanks(name, seen, samples_obj)
     return gr.update(visible=True), gr.update(visible=False), gr.update()
 def reset_to_start():
     return (
         gr.update(value=""), # Clear Name
         gr.update(value=""), # Clear Email
     ### 👋 Welcome, and thanks for lending your expertise!
     We’re inviting domain experts to help evaluate satellite image patches for RTS.
     ---
     ### 📋 Instructions
     * **Suggested target:** ~{TARGET_PER_PERSON} images per reviewer.
     * **The Task:** For each set, you will see 5 variations of the same satellite image.
     * **Rating:** Rate each image from **1 (Poor)** to **10 (Excellent)** based on how clearly the RTS feature (indicated by the **Red Box**) is depicted.
     ### ⏸️ Saving & Resuming
     * **Automatic Saving:** Your progress is saved automatically after every "Submit".
     * **Take a Break:** You can close this tab at any time.
     ---
     **Questions or issues?** Email **{CONTACT_EMAIL}** — we appreciate your feedback and suggestions.
     **Ready?** Enter your details below to begin.
     """
     )
     eval_panel = gr.Group(visible=False)
     with eval_panel:
         gr.Markdown(
             """
             Focus your attention on the area inside the **Red Box**. This marks the potential location of the Retrogressive Thaw Slump (RTS). Compare the five images below. Rate how clearly and accurately each image depicts the **RTS** feature.
             **Rating Scale (1 - 10):**
             * **10 (Excellent):** The RTS feature is sharp, distinct, and clearly visible.
             * **1 (Poor):** The RTS feature is blurry, distorted, or impossible to distinguish.
             """
         )
         with gr.Row():
             with gr.Column(scale=1, min_width=150):
                 gr.Markdown("<div style='text-align:center; font-weight:600;'>Image 1</div>")
                 image_1 = gr.Image(show_label=False, interactive=False, height=256, show_download_button=False)
                 score_1 = gr.Slider(minimum=1, maximum=10, step=1, value=5, label="Score (1-10)")
             with gr.Column(scale=1, min_width=150):
                 gr.Markdown("<div style='text-align:center; font-weight:600;'>Image 2</div>")
                 image_2 = gr.Image(show_label=False, interactive=False, height=256, show_download_button=False)
                 score_2 = gr.Slider(minimum=1, maximum=10, step=1, value=5, label="Score (1-10)")
             with gr.Column(scale=1, min_width=150):
                 gr.Markdown("<div style='text-align:center; font-weight:600;'>Image 3</div>")
                 image_3 = gr.Image(show_label=False, interactive=False, height=256, show_download_button=False)
                 score_3 = gr.Slider(minimum=1, maximum=10, step=1, value=5, label="Score (1-10)")
             with gr.Column(scale=1, min_width=150):
                 gr.Markdown("<div style='text-align:center; font-weight:600;'>Image 4</div>")
                 image_4 = gr.Image(show_label=False, interactive=False, height=256, show_download_button=False)
                 score_4 = gr.Slider(minimum=1, maximum=10, step=1, value=5, label="Score (1-10)")
             with gr.Column(scale=1, min_width=150):
                 gr.Markdown("<div style='text-align:center; font-weight:600;'>Image 5</div>")
                 image_5 = gr.Image(show_label=False, interactive=False, height=256, show_download_button=False)
             lines=2,
             placeholder="If there are multiple RTS or ambiguities, please note here."
         )
         with gr.Row():
             submit_next_btn = gr.Button("Submit & Next Image", variant="primary")
             pause_exit_btn = gr.Button("Exit", variant="secondary")
         your_jsonl_path = gr.State()
     with gr.Group(visible=False) as thanks_group:
         thanks_md = gr.Markdown("### ✅ Thanks! Your responses were saved.\n\nClick **Start Again** to evaluate another image.")
         restart_btn = gr.Button("Start Again", variant="primary")
     # --- Wiring ---
     start_event = start_btn.click(
         start_or_resume,
         inputs=[name, email],
         inputs=[state_seen, state_samples],
         outputs=[state_seen, state_samples],
     )
     # 2. Then show the "Thanks/Resume" screen with the 'how many left' message
     pause_event.then(
         to_thanks,
         outputs=[eval_panel, thanks_group, thanks_md],
     )
     restart_event = restart_btn.click(
         reset_to_start,
         inputs=[],
     )
 if __name__ == "__main__":
     if HF_RESULTS_REPO:
         from huggingface_hub import snapshot_download
         try:
             snapshot_download(
                 repo_id=HF_RESULTS_REPO,
                 repo_type="dataset",
+                local_dir=".",
                 allow_patterns=["data/*", "results/*"],
                 token=HF_TOKEN
             )
             print(f"Error reading from HF: {e}")
     ensure_paths()
     _ = load_dataset(CAPTIONS_JSON_PATH, GT_MASKED_DIR, GT_UNMASKED_DIR, SR_DIR, ORIGINAL_DIR, IMAGE_5_DIR)
+    print("✅ Launching app.")
     demo.queue()
+    demo.launch()