Spaces:

piekenius123
/

Amaze-Visualization

Sleeping

App Files Files Community

piekenius123 commited on 20 days ago

Commit

eb2300a

verified ·

1 Parent(s): a949f42

Upload app.py

Browse files

Files changed (1) hide show

app.py +326 -136

app.py CHANGED Viewed

@@ -1,25 +1,34 @@
-# app.py
 import io
 import json
-import base64
 import random
-from typing import Optional, Dict, Any, List, Tuple
-import pandas as pd
-from PIL import Image
 import gradio as gr
 from huggingface_hub import HfApi, hf_hub_download
 DATASET_REPO_ID = "piekenius123/Amaze"
 REPO_TYPE = "dataset"
 SHAPES = ["circle", "hexagon", "square", "triangle"]
 SPLITS = ["train", "val", "test"]
-MAZE_SIZE_MIN, MAZE_SIZE_MAX = 3, 16
-MAZE_SIZE_CHOICES = ["All"] + [f"{n}×{n}" for n in range(MAZE_SIZE_MIN, MAZE_SIZE_MAX + 1)]
 IMAGE_COLS = ["original_img", "m_original_img", "sol_img", "mask_img", "cell_map"]
 # -------------------------
@@ -63,74 +72,167 @@ def decode_base64_image(base64_str: Any) -> Optional[Image.Image]:
         return None
 def infer_shape_from_repo_path(path: str) -> Optional[str]:
-    p = path.replace("\\", "/").lower()
-    for s in SHAPES:
-        if p.startswith(f"{s}/") or f"/{s}/" in p:
-            return s
     return None
 def infer_split_from_repo_path(path: str) -> Optional[str]:
-    p = path.replace("\\", "/").lower()
     fn = p.split("/")[-1]
-    if fn == "maze_dataset_train.parquet":
-        return "train"
-    if fn == "maze_dataset_test.parquet":
-        if "/maze-dataset_train/" in p:
             return "val"
-        if "/maze-dataset/" in p:
             return "test"
     return None
 def get_metadata_size(meta_str: Any) -> Optional[Tuple[int, int]]:
-    """
-    Your metadata structure says width/height are under maze_config (for non-circle).
-    Some datasets also duplicate width/height at top-level; we support both.
-    """
-    d, err = safe_json_loads(meta_str)
-    if not d or err:
         return None
-    mc = d.get("maze_config") if isinstance(d, dict) else None
-    if isinstance(mc, dict) and ("width" in mc) and ("height" in mc):
-        try:
-            return int(mc["width"]), int(mc["height"])
-        except Exception:
-            pass
-    if ("width" in d) and ("height" in d):
-        try:
-            return int(d["width"]), int(d["height"])
-        except Exception:
-            pass
     return None
-def filter_df_by_maze_size(df: pd.DataFrame, size_str: Optional[str]) -> pd.DataFrame:
-    if not size_str or size_str == "All":
-        return df
-    try:
-        a, b = size_str.split("×")
-        w, h = int(a), int(b)
-    except Exception:
-        return df
     if "metadata" not in df.columns:
-        return df
-    mask = df["metadata"].apply(lambda m: get_metadata_size(m) == (w, h))
     return df.loc[mask].reset_index(drop=True)
 def summarize_df(df: pd.DataFrame, filtered_len: Optional[int] = None) -> str:
-    base = f"{len(df)} rows · {len(df.columns)} cols"
     if filtered_len is not None and filtered_len != len(df):
-        base += f" · filtered: {filtered_len}"
     return base
@@ -156,22 +258,36 @@ def find_index_by_id(df: pd.DataFrame, sample_id: str) -> Optional[int]:
     return None
 # -------------------------
 # HF repo index + cache
 # -------------------------
-def build_repo_index() -> List[Dict[str, str]]:
     api = HfApi()
     files = api.list_repo_files(repo_id=DATASET_REPO_ID, repo_type=REPO_TYPE)
-    records: List[Dict[str, str]] = []
-    for f in files:
-        if not f.lower().endswith(".parquet"):
             continue
-        shape = infer_shape_from_repo_path(f)
-        split = infer_split_from_repo_path(f)
-        if shape and split:
-            records.append({"repo_path": f, "shape": shape, "split": split})
-    records.sort(key=lambda r: r["repo_path"])
     return records
@@ -187,14 +303,43 @@ def download_and_load_df(repo_path: str) -> pd.DataFrame:
     if local_path in _DF_CACHE:
         return _DF_CACHE[local_path]
-    wanted_cols = ["id", "instruction", "metadata"] + IMAGE_COLS
-    df = pd.read_parquet(local_path, columns=[c for c in wanted_cols if c is not None])
     _DF_CACHE[local_path] = df
     return df
-def get_repo_paths(records: List[Dict[str, str]], shape: str, split: str) -> List[str]:
-    out = [r["repo_path"] for r in (records or []) if r["shape"] == shape and r["split"] == split]
     out.sort()
     return out
@@ -204,19 +349,12 @@ def get_repo_paths(records: List[Dict[str, str]], shape: str, split: str) -> Lis
 # -------------------------
 def render_sample_view(df_filtered: pd.DataFrame, index: int):
     if len(df_filtered) == 0:
-        return (
-            0,
-            gr.update(value="No samples (after filtering)."),
-            "",
-            [],
-            {},
-            "",
-        )
     index = max(0, min(int(index), len(df_filtered) - 1))
     row = df_filtered.iloc[index]
-    sid = str(row.get("id", f"maze_{index}"))
     instruction = str(row.get("instruction", ""))
     original = decode_base64_image(row.get("original_img"))
@@ -241,9 +379,9 @@ def render_sample_view(df_filtered: pd.DataFrame, index: int):
         (mask, "Mask"),
         (cell_map, "Cell map"),
     ]
-    gallery_items = [(img, cap) for (img, cap) in gallery_items if img is not None]
-    status_md = f"**Sample** `{sid}`  \n**Index** `{index}` / `{len(df_filtered)-1}`"
     return index, status_md, instruction, gallery_items, meta_json, meta_raw
@@ -252,61 +390,98 @@ def render_sample_view(df_filtered: pd.DataFrame, index: int):
 # -------------------------
 def init_app():
     try:
-        recs = build_repo_index()
-        info_html = f"<div id='badges'><span class='badge'>✅ Indexed <b>{DATASET_REPO_ID}</b></span><span class='badge'>{len(recs)} parquet files</span></div>"
-        return recs, info_html
     except Exception as e:
-        return [], f"<div id='badges'><span class='badge'>❌ Failed to index: {e}</span></div>"
-def on_shape_split_change(records: List[Dict[str, str]], shape: str, split: str):
-    choices = get_repo_paths(records, shape, split)
     value = choices[0] if choices else None
-    tip_html = f"<div id='badges'><span class='badge'>Found <b>{len(choices)}</b> parquet file(s) for <b>{shape}</b> / <b>{split}</b></span></div>"
-    return gr.Dropdown(choices=choices, value=value), tip_html
 def get_filtered_df(repo_path: str, size_str: str) -> Tuple[pd.DataFrame, str]:
     df = download_and_load_df(repo_path)
-    filtered = filter_df_by_maze_size(df, size_str)
     summary = summarize_df(df, filtered_len=len(filtered))
     return filtered, summary
 def on_select_parquet(repo_path: str, size_str: str):
     if not repo_path:
-        return gr.update(value="<div id='badges'><span class='badge'>No parquet selected</span></div>"), gr.update(maximum=0, value=0)
-    filtered, summary = get_filtered_df(repo_path, size_str)
     max_idx = max(0, len(filtered) - 1)
-    summary_html = f"<div id='badges'><span class='badge'>{summary}</span></div>"
-    return gr.update(value=summary_html), gr.update(maximum=max_idx, value=0)
 def on_prev(repo_path: str, index: int, size_str: str):
     if not repo_path:
-        return 0, "No parquet selected.", "", [], {}, ""
     filtered, _ = get_filtered_df(repo_path, size_str)
     return render_sample_view(filtered, max(0, int(index) - 1))
 def on_next(repo_path: str, index: int, size_str: str):
     if not repo_path:
-        return 0, "No parquet selected.", "", [], {}, ""
     filtered, _ = get_filtered_df(repo_path, size_str)
     return render_sample_view(filtered, min(len(filtered) - 1, int(index) + 1))
 def on_show(repo_path: str, index: int, size_str: str):
     if not repo_path:
-        return 0, "No parquet selected.", "", [], {}, ""
     filtered, _ = get_filtered_df(repo_path, size_str)
     return render_sample_view(filtered, index)
 def on_random(repo_path: str, size_str: str):
     if not repo_path:
-        return 0, "No parquet selected.", "", [], {}, ""
     filtered, _ = get_filtered_df(repo_path, size_str)
     if len(filtered) == 0:
         return render_sample_view(filtered, 0)
@@ -315,12 +490,12 @@ def on_random(repo_path: str, size_str: str):
 def on_find_id(repo_path: str, query_id: str, size_str: str):
     if not repo_path:
-        return 0, "No parquet selected.", "", [], {}, ""
     filtered, _ = get_filtered_df(repo_path, size_str)
     pos = find_index_by_id(filtered, query_id.strip() if isinstance(query_id, str) else "")
     if pos is None:
         out = list(render_sample_view(filtered, 0))
-        out[1] = out[1] + f"  \n⚠️ id search `{query_id}` not found"
         return tuple(out)
     return render_sample_view(filtered, pos)
@@ -329,12 +504,9 @@ def on_find_id(repo_path: str, query_id: str, size_str: str):
 # UI (styled)
 # -------------------------
 CSS = """
-/* 使用系统默认字体 */
 .gradio-container { font-family: system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif !important; }
-/* 全局：页面居中 + 不要铺满 */
 .gradio-container { max-width: 1200px !important; margin: 0 auto !important; }
-/* 顶部控制卡片：紧凑、没有大灰底空白 */
 #topbar {
   padding: 12px 14px;
   border-radius: 16px;
@@ -343,14 +515,9 @@ CSS = """
 }
 #topbar .gr-row { flex-wrap: wrap; gap: 10px; }
 #topbar .gr-form { margin-bottom: 0 !important; }
-/* 输入/下拉更紧凑 */
 #topbar input, #topbar textarea, #topbar .wrap { border-radius: 12px !important; }
-/* 按钮统一，不要变成右侧巨大菜单 */
 #topbar button { height: 42px !important; border-radius: 12px !important; }
-/* badges */
 #badges { display: flex; gap: 10px; flex-wrap: wrap; align-items: center; }
 .badge {
   padding: 6px 10px;
@@ -361,12 +528,9 @@ CSS = """
   line-height: 1.2;
 }
-/* Index 一行，按钮单独一行并向下留间距 */
 #toolbar .gr-row { align-items: end; }
 #toolbar-btns { margin-top: 12px; }
 #toolbar-btns .gr-row { align-items: end; }
-/* Gallery 更像 viewer */
 #viewer { margin-top: 10px; }
 """
@@ -375,49 +539,45 @@ THEME = gr.themes.Soft(
     text_size=gr.themes.sizes.text_md,
 )
 def build_ui():
     with gr.Blocks(title="Amaze Viewer", theme=THEME, css=CSS) as demo:
         gr.Markdown(
             f"""
             # Amaze
-            Dataset: https://huggingface.co/datasets/piekenius123/Amaze
-            Amaze is a benchmark for Edting-as-Reasoning task (EAR). It features four maze shapes: circle, hexagon, square, and triangle. Each sample provides: an unmarked maze image (original_img), a maze image with start and end points marked (m_original_img), a blue solution path image (sol_img), a binary path mask (mask_img), a cell segmentation map (cell_map), and metadata (JSON) for describing the maze structure and difficulty.
-            The test set covers various sizes from 3×3 to 16×16 (50 samples for each size), while the training set mainly consists of 3×3 mazes (1024 samples), and validation set consists of 3×3 mazes (256 samples).
-            Browse samples by **shape / split / maze size**, then view images + metadata.
             """
         )
         records_state = gr.State([])
-        # Top control bar (compact card)
         with gr.Column(elem_id="topbar"):
             with gr.Row():
                 parquet_tip = gr.HTML(value="<div id='badges'></div>")
                 summary_badge = gr.HTML(value="<div id='badges'><span class='badge'>No parquet selected</span></div>")
-                scan_info = gr.HTML(value="<div id='badges'><span class='badge'>Indexing dataset repo…</span></div>")
             with gr.Row():
                 shape_dd = gr.Dropdown(label="Shape", choices=SHAPES, value="circle", scale=1)
                 split_dd = gr.Dropdown(label="Split", choices=SPLITS, value="test", scale=1)
-                size_dd = gr.Dropdown(label="Maze size", choices=MAZE_SIZE_CHOICES, value="All", scale=1)
                 parquet_dd = gr.Dropdown(label="Parquet", choices=[], value=None, scale=2)
             with gr.Row(elem_id="toolbar"):
                 id_query = gr.Textbox(label="Find by id", placeholder="UUID or substring", scale=2)
                 idx_slider = gr.Slider(label="Index", minimum=0, maximum=0, value=0, step=1, scale=2)
             with gr.Row():
-                prev_btn = gr.Button("⬅ Prev", variant="secondary", scale=1)
-                next_btn = gr.Button("Next ➡", variant="secondary", scale=1)
-                random_btn = gr.Button("🎲 Random", variant="primary", scale=1)
-                find_btn = gr.Button("🔎 Find", variant="secondary", scale=1)
                 show_btn = gr.Button("Show", variant="secondary", scale=1)
-        # Main viewer layout
         with gr.Row(elem_id="viewer"):
             with gr.Column(scale=3):
                 status_md = gr.Markdown(elem_id="status")
@@ -436,42 +596,72 @@ def build_ui():
                 with gr.Accordion("Metadata (raw)", open=False):
                     meta_raw = gr.Textbox(lines=10, interactive=False)
-        # ---- events ----
         demo.load(
             fn=init_app,
             inputs=None,
             outputs=[records_state, scan_info],
         ).then(
-            fn=on_shape_split_change,
-            inputs=[records_state, shape_dd, split_dd],
-            outputs=[parquet_dd, parquet_tip],
         ).then(
-            fn=lambda p, s: on_select_parquet(p, s) if p else (gr.update(value="<div id='badges'><span class='badge'>No parquet selected</span></div>"), gr.update(maximum=0, value=0)),
             inputs=[parquet_dd, size_dd],
-            outputs=[summary_badge, idx_slider],
         ).then(
-            fn=lambda p, s: on_show(p, 0, s) if p else (0, "No parquet selected.", "", [], {}, ""),
             inputs=[parquet_dd, size_dd],
             outputs=[idx_slider, status_md, instruction, gallery, meta_json, meta_raw],
         )
         shape_dd.change(
-            fn=on_shape_split_change,
-            inputs=[records_state, shape_dd, split_dd],
             outputs=[parquet_dd, parquet_tip],
         )
         split_dd.change(
-            fn=on_shape_split_change,
-            inputs=[records_state, shape_dd, split_dd],
             outputs=[parquet_dd, parquet_tip],
         )
         parquet_dd.change(
             fn=on_select_parquet,
             inputs=[parquet_dd, size_dd],
-            outputs=[summary_badge, idx_slider],
         ).then(
-            fn=lambda p, s: on_show(p, 0, s) if p else (0, "No parquet selected.", "", [], {}, ""),
             inputs=[parquet_dd, size_dd],
             outputs=[idx_slider, status_md, instruction, gallery, meta_json, meta_raw],
         )
@@ -479,9 +669,9 @@ def build_ui():
         size_dd.change(
             fn=on_select_parquet,
             inputs=[parquet_dd, size_dd],
-            outputs=[summary_badge, idx_slider],
         ).then(
-            fn=lambda p, s: on_show(p, 0, s) if p else (0, "No parquet selected.", "", [], {}, ""),
             inputs=[parquet_dd, size_dd],
             outputs=[idx_slider, status_md, instruction, gallery, meta_json, meta_raw],
         )
@@ -522,4 +712,4 @@ def build_ui():
 if __name__ == "__main__":
     demo = build_ui()
-    demo.launch()

+import base64
 import io
 import json
 import random
+import re
+from typing import Any, Dict, List, Optional, Tuple
 import gradio as gr
+import pandas as pd
 from huggingface_hub import HfApi, hf_hub_download
+from PIL import Image
 DATASET_REPO_ID = "piekenius123/Amaze"
 REPO_TYPE = "dataset"
+TASKS = ["maze", "queen"]
+DEFAULT_TASK = "maze"
+DEFAULT_SIZE_CHOICE = "All"
 SHAPES = ["circle", "hexagon", "square", "triangle"]
 SPLITS = ["train", "val", "test"]
 IMAGE_COLS = ["original_img", "m_original_img", "sol_img", "mask_img", "cell_map"]
+SIZE_CONTAINER_KEYS = ["maze_config", "queen_config", "board_config", "config"]
+SIZE_PAIR_KEYS = [
+    ("width", "height"),
+    ("cols", "rows"),
+    ("board_width", "board_height"),
+    ("board_cols", "board_rows"),
+]
+SIZE_SCALAR_KEYS = ["size", "board_size", "n", "board_n"]
 # -------------------------
         return None
+def normalize_repo_path(path: str) -> str:
+    return path.replace("\\", "/").lower()
+def get_path_segments(path: str) -> List[str]:
+    return [seg for seg in normalize_repo_path(path).split("/") if seg]
+def get_path_tokens(path: str) -> List[str]:
+    return [tok for tok in re.split(r"[/_.-]+", normalize_repo_path(path)) if tok]
+def infer_task_from_repo_path(path: str) -> str:
+    segments = get_path_segments(path)
+    tokens = get_path_tokens(path)
+    for candidate in TASKS:
+        if candidate in segments:
+            return candidate
+    for token in tokens:
+        if token.startswith("queen"):
+            return "queen"
+        if token.startswith("maze"):
+            return "maze"
+    return DEFAULT_TASK
 def infer_shape_from_repo_path(path: str) -> Optional[str]:
+    segments = get_path_segments(path)
+    for shape in SHAPES:
+        if shape in segments:
+            return shape
     return None
 def infer_split_from_repo_path(path: str) -> Optional[str]:
+    p = normalize_repo_path(path)
     fn = p.split("/")[-1]
+    segments = get_path_segments(path)
+    # Backward compatibility for the original maze repo layout:
+    # maze-dataset_train/maze_dataset_test.parquet is the validation split.
+    if "/maze-dataset_train/" in p and fn == "maze_dataset_test.parquet":
+        return "val"
+    if "/maze-dataset/" in p and fn == "maze_dataset_test.parquet":
+        return "test"
+    filename_checks = [
+        (r"(?:^|[_-])train(?:ing)?(?:[_-]|\.|$)", "train"),
+        (r"(?:^|[_-])(?:val|valid|validation)(?:[_-]|\.|$)", "val"),
+        (r"(?:^|[_-])test(?:[_-]|\.|$)", "test"),
+    ]
+    for pattern, split in filename_checks:
+        if re.search(pattern, fn):
+            return split
+    for seg in reversed(segments[:-1]):
+        if seg in {"train", "training"}:
+            return "train"
+        if seg in {"val", "valid", "validation"}:
             return "val"
+        if seg == "test":
             return "test"
     return None
+def iter_metadata_containers(meta: Dict[str, Any]) -> List[Dict[str, Any]]:
+    containers = [meta]
+    for key in SIZE_CONTAINER_KEYS:
+        value = meta.get(key)
+        if isinstance(value, dict):
+            containers.append(value)
+    return containers
+def parse_square_size(value: Any) -> Optional[Tuple[int, int]]:
+    if isinstance(value, bool):
+        return None
+    if isinstance(value, int):
+        return value, value
+    if isinstance(value, float):
+        if pd.isna(value):
+            return None
+        iv = int(value)
+        return iv, iv
+    if isinstance(value, str):
+        text = value.strip().lower()
+        match = re.fullmatch(r"(\d+)\s*[x×]\s*(\d+)", text)
+        if match:
+            return int(match.group(1)), int(match.group(2))
+        if text.isdigit():
+            iv = int(text)
+            return iv, iv
+    return None
 def get_metadata_size(meta_str: Any) -> Optional[Tuple[int, int]]:
+    meta, err = safe_json_loads(meta_str)
+    if not meta or err:
         return None
+    for container in iter_metadata_containers(meta):
+        for width_key, height_key in SIZE_PAIR_KEYS:
+            if width_key in container and height_key in container:
+                try:
+                    return int(container[width_key]), int(container[height_key])
+                except Exception:
+                    pass
+        for scalar_key in SIZE_SCALAR_KEYS:
+            if scalar_key in container:
+                parsed = parse_square_size(container[scalar_key])
+                if parsed:
+                    return parsed
     return None
+def format_size_choice(size: Tuple[int, int]) -> str:
+    return f"{size[0]}x{size[1]}"
+def parse_size_choice(size_str: Optional[str]) -> Optional[Tuple[int, int]]:
+    if not size_str or size_str == DEFAULT_SIZE_CHOICE:
+        return None
+    match = re.fullmatch(r"\s*(\d+)\s*[x×]\s*(\d+)\s*", size_str)
+    if not match:
+        return None
+    return int(match.group(1)), int(match.group(2))
+def get_size_choices(df: pd.DataFrame) -> List[str]:
     if "metadata" not in df.columns:
+        return [DEFAULT_SIZE_CHOICE]
+    sizes = {
+        size
+        for size in df["metadata"].map(get_metadata_size)
+        if size is not None
+    }
+    ordered_sizes = sorted(sizes, key=lambda x: (x[0] * x[1], x[0], x[1]))
+    return [DEFAULT_SIZE_CHOICE] + [format_size_choice(size) for size in ordered_sizes]
+def filter_df_by_size(df: pd.DataFrame, size_str: Optional[str]) -> pd.DataFrame:
+    target_size = parse_size_choice(size_str)
+    if target_size is None or "metadata" not in df.columns:
+        return df.reset_index(drop=True)
+    mask = df["metadata"].map(lambda meta: get_metadata_size(meta) == target_size)
     return df.loc[mask].reset_index(drop=True)
 def summarize_df(df: pd.DataFrame, filtered_len: Optional[int] = None) -> str:
+    base = f"{len(df)} rows | {len(df.columns)} cols"
     if filtered_len is not None and filtered_len != len(df):
+        base += f" | filtered: {filtered_len}"
     return base
     return None
+def empty_sample_view(message: str = "No parquet selected."):
+    return 0, message, "", [], {}, ""
 # -------------------------
 # HF repo index + cache
 # -------------------------
+def build_repo_index() -> List[Dict[str, Optional[str]]]:
     api = HfApi()
     files = api.list_repo_files(repo_id=DATASET_REPO_ID, repo_type=REPO_TYPE)
+    records: List[Dict[str, Optional[str]]] = []
+    for repo_path in files:
+        if not repo_path.lower().endswith(".parquet"):
             continue
+        task = infer_task_from_repo_path(repo_path)
+        shape = infer_shape_from_repo_path(repo_path)
+        split = infer_split_from_repo_path(repo_path)
+        if split:
+            records.append(
+                {
+                    "repo_path": repo_path,
+                    "task": task,
+                    "shape": shape,
+                    "split": split,
+                }
+            )
+    records.sort(key=lambda record: (record["task"] or "", record["shape"] or "", record["split"] or "", record["repo_path"] or ""))
     return records
     if local_path in _DF_CACHE:
         return _DF_CACHE[local_path]
+    df = pd.read_parquet(local_path)
     _DF_CACHE[local_path] = df
     return df
+def get_shape_choices(records: List[Dict[str, Optional[str]]], task: str) -> List[str]:
+    shapes = sorted(
+        {record["shape"] for record in (records or []) if record.get("task") == task and record.get("shape")},
+        key=lambda shape: SHAPES.index(shape) if shape in SHAPES else len(SHAPES),
+    )
+    if shapes:
+        return shapes
+    if task == "maze":
+        return SHAPES.copy()
+    return []
+def get_default_shape(task: str, choices: List[str]) -> str:
+    if not choices:
+        return "All"
+    if task == "maze" and "circle" in choices:
+        return "circle"
+    return choices[0]
+def get_repo_paths(records: List[Dict[str, Optional[str]]], task: str, shape: str, split: str) -> List[str]:
+    out: List[str] = []
+    for record in records or []:
+        if record.get("task") != task:
+            continue
+        if record.get("split") != split:
+            continue
+        record_shape = record.get("shape")
+        if shape and shape != "All" and record_shape != shape:
+            continue
+        out.append(str(record["repo_path"]))
     out.sort()
     return out
 # -------------------------
 def render_sample_view(df_filtered: pd.DataFrame, index: int):
     if len(df_filtered) == 0:
+        return empty_sample_view("No samples after filtering.")
     index = max(0, min(int(index), len(df_filtered) - 1))
     row = df_filtered.iloc[index]
+    sid = str(row.get("id", f"sample_{index}"))
     instruction = str(row.get("instruction", ""))
     original = decode_base64_image(row.get("original_img"))
         (mask, "Mask"),
         (cell_map, "Cell map"),
     ]
+    gallery_items = [(img, caption) for (img, caption) in gallery_items if img is not None]
+    status_md = f"**Sample** `{sid}`  \n**Index** `{index}` / `{len(df_filtered) - 1}`"
     return index, status_md, instruction, gallery_items, meta_json, meta_raw
 # -------------------------
 def init_app():
     try:
+        records = build_repo_index()
+        info_html = (
+            "<div id='badges'>"
+            f"<span class='badge'>Indexed <b>{DATASET_REPO_ID}</b></span>"
+            f"<span class='badge'>{len(records)} parquet files</span>"
+            "</div>"
+        )
+        return records, info_html
     except Exception as e:
+        return [], f"<div id='badges'><span class='badge'>Failed to index: {e}</span></div>"
+def build_parquet_dropdown(records: List[Dict[str, Optional[str]]], task: str, shape: str, split: str):
+    choices = get_repo_paths(records, task, shape, split)
     value = choices[0] if choices else None
+    scope = f"{task} / {split}" if shape == "All" else f"{task} / {shape} / {split}"
+    tip_html = (
+        "<div id='badges'>"
+        f"<span class='badge'>Found <b>{len(choices)}</b> parquet file(s) for <b>{scope}</b></span>"
+        "</div>"
+    )
+    return gr.update(choices=choices, value=value), tip_html
+def on_task_change(records: List[Dict[str, Optional[str]]], task: str, split: str):
+    shape_choices = get_shape_choices(records, task)
+    shape_visible = task == "maze" or bool(shape_choices)
+    if not shape_choices:
+        shape_choices = ["All"]
+    shape_value = get_default_shape(task, shape_choices)
+    parquet_update, tip_html = build_parquet_dropdown(records, task, shape_value, split)
+    shape_update = gr.update(choices=shape_choices, value=shape_value, visible=shape_visible)
+    return shape_update, parquet_update, tip_html
+def on_task_shape_split_change(records: List[Dict[str, Optional[str]]], task: str, shape: str, split: str):
+    return build_parquet_dropdown(records, task, shape or "All", split)
 def get_filtered_df(repo_path: str, size_str: str) -> Tuple[pd.DataFrame, str]:
     df = download_and_load_df(repo_path)
+    filtered = filter_df_by_size(df, size_str)
     summary = summarize_df(df, filtered_len=len(filtered))
     return filtered, summary
 def on_select_parquet(repo_path: str, size_str: str):
     if not repo_path:
+        return (
+            gr.update(value="<div id='badges'><span class='badge'>No parquet selected</span></div>"),
+            gr.update(maximum=0, value=0),
+            gr.update(choices=[DEFAULT_SIZE_CHOICE], value=DEFAULT_SIZE_CHOICE),
+        )
+    df = download_and_load_df(repo_path)
+    size_choices = get_size_choices(df)
+    size_value = size_str if size_str in size_choices else DEFAULT_SIZE_CHOICE
+    filtered = filter_df_by_size(df, size_value)
     max_idx = max(0, len(filtered) - 1)
+    summary_html = f"<div id='badges'><span class='badge'>{summarize_df(df, filtered_len=len(filtered))}</span></div>"
+    return (
+        gr.update(value=summary_html),
+        gr.update(maximum=max_idx, value=0),
+        gr.update(choices=size_choices, value=size_value),
+    )
 def on_prev(repo_path: str, index: int, size_str: str):
     if not repo_path:
+        return empty_sample_view()
     filtered, _ = get_filtered_df(repo_path, size_str)
     return render_sample_view(filtered, max(0, int(index) - 1))
 def on_next(repo_path: str, index: int, size_str: str):
     if not repo_path:
+        return empty_sample_view()
     filtered, _ = get_filtered_df(repo_path, size_str)
     return render_sample_view(filtered, min(len(filtered) - 1, int(index) + 1))
 def on_show(repo_path: str, index: int, size_str: str):
     if not repo_path:
+        return empty_sample_view()
     filtered, _ = get_filtered_df(repo_path, size_str)
     return render_sample_view(filtered, index)
 def on_random(repo_path: str, size_str: str):
     if not repo_path:
+        return empty_sample_view()
     filtered, _ = get_filtered_df(repo_path, size_str)
     if len(filtered) == 0:
         return render_sample_view(filtered, 0)
 def on_find_id(repo_path: str, query_id: str, size_str: str):
     if not repo_path:
+        return empty_sample_view()
     filtered, _ = get_filtered_df(repo_path, size_str)
     pos = find_index_by_id(filtered, query_id.strip() if isinstance(query_id, str) else "")
     if pos is None:
         out = list(render_sample_view(filtered, 0))
+        out[1] = out[1] + f"  \nID search `{query_id}` not found"
         return tuple(out)
     return render_sample_view(filtered, pos)
 # UI (styled)
 # -------------------------
 CSS = """
 .gradio-container { font-family: system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif !important; }
 .gradio-container { max-width: 1200px !important; margin: 0 auto !important; }
 #topbar {
   padding: 12px 14px;
   border-radius: 16px;
 }
 #topbar .gr-row { flex-wrap: wrap; gap: 10px; }
 #topbar .gr-form { margin-bottom: 0 !important; }
 #topbar input, #topbar textarea, #topbar .wrap { border-radius: 12px !important; }
 #topbar button { height: 42px !important; border-radius: 12px !important; }
 #badges { display: flex; gap: 10px; flex-wrap: wrap; align-items: center; }
 .badge {
   padding: 6px 10px;
   line-height: 1.2;
 }
 #toolbar .gr-row { align-items: end; }
 #toolbar-btns { margin-top: 12px; }
 #toolbar-btns .gr-row { align-items: end; }
 #viewer { margin-top: 10px; }
 """
     text_size=gr.themes.sizes.text_md,
 )
 def build_ui():
     with gr.Blocks(title="Amaze Viewer", theme=THEME, css=CSS) as demo:
         gr.Markdown(
             f"""
             # Amaze
+            Dataset: https://huggingface.co/datasets/piekenius123/Amaze
+            Browse samples by **task / shape / split / size**, then inspect the images and metadata.
+            Maze and Queen share the same viewer so the visualization panel stays unchanged.
             """
         )
         records_state = gr.State([])
         with gr.Column(elem_id="topbar"):
             with gr.Row():
                 parquet_tip = gr.HTML(value="<div id='badges'></div>")
                 summary_badge = gr.HTML(value="<div id='badges'><span class='badge'>No parquet selected</span></div>")
+                scan_info = gr.HTML(value="<div id='badges'><span class='badge'>Indexing dataset repo...</span></div>")
             with gr.Row():
+                task_dd = gr.Dropdown(label="Task", choices=TASKS, value=DEFAULT_TASK, scale=1)
                 shape_dd = gr.Dropdown(label="Shape", choices=SHAPES, value="circle", scale=1)
                 split_dd = gr.Dropdown(label="Split", choices=SPLITS, value="test", scale=1)
+                size_dd = gr.Dropdown(label="Size", choices=[DEFAULT_SIZE_CHOICE], value=DEFAULT_SIZE_CHOICE, scale=1)
                 parquet_dd = gr.Dropdown(label="Parquet", choices=[], value=None, scale=2)
             with gr.Row(elem_id="toolbar"):
                 id_query = gr.Textbox(label="Find by id", placeholder="UUID or substring", scale=2)
                 idx_slider = gr.Slider(label="Index", minimum=0, maximum=0, value=0, step=1, scale=2)
             with gr.Row():
+                prev_btn = gr.Button("Prev", variant="secondary", scale=1)
+                next_btn = gr.Button("Next", variant="secondary", scale=1)
+                random_btn = gr.Button("Random", variant="primary", scale=1)
+                find_btn = gr.Button("Find", variant="secondary", scale=1)
                 show_btn = gr.Button("Show", variant="secondary", scale=1)
         with gr.Row(elem_id="viewer"):
             with gr.Column(scale=3):
                 status_md = gr.Markdown(elem_id="status")
                 with gr.Accordion("Metadata (raw)", open=False):
                     meta_raw = gr.Textbox(lines=10, interactive=False)
         demo.load(
             fn=init_app,
             inputs=None,
             outputs=[records_state, scan_info],
         ).then(
+            fn=on_task_change,
+            inputs=[records_state, task_dd, split_dd],
+            outputs=[shape_dd, parquet_dd, parquet_tip],
+        ).then(
+            fn=on_select_parquet,
+            inputs=[parquet_dd, size_dd],
+            outputs=[summary_badge, idx_slider, size_dd],
+        ).then(
+            fn=lambda p, s: on_show(p, 0, s) if p else empty_sample_view(),
+            inputs=[parquet_dd, size_dd],
+            outputs=[idx_slider, status_md, instruction, gallery, meta_json, meta_raw],
+        )
+        task_dd.change(
+            fn=on_task_change,
+            inputs=[records_state, task_dd, split_dd],
+            outputs=[shape_dd, parquet_dd, parquet_tip],
         ).then(
+            fn=on_select_parquet,
             inputs=[parquet_dd, size_dd],
+            outputs=[summary_badge, idx_slider, size_dd],
         ).then(
+            fn=lambda p, s: on_show(p, 0, s) if p else empty_sample_view(),
             inputs=[parquet_dd, size_dd],
             outputs=[idx_slider, status_md, instruction, gallery, meta_json, meta_raw],
         )
         shape_dd.change(
+            fn=on_task_shape_split_change,
+            inputs=[records_state, task_dd, shape_dd, split_dd],
             outputs=[parquet_dd, parquet_tip],
+        ).then(
+            fn=on_select_parquet,
+            inputs=[parquet_dd, size_dd],
+            outputs=[summary_badge, idx_slider, size_dd],
+        ).then(
+            fn=lambda p, s: on_show(p, 0, s) if p else empty_sample_view(),
+            inputs=[parquet_dd, size_dd],
+            outputs=[idx_slider, status_md, instruction, gallery, meta_json, meta_raw],
         )
         split_dd.change(
+            fn=on_task_shape_split_change,
+            inputs=[records_state, task_dd, shape_dd, split_dd],
             outputs=[parquet_dd, parquet_tip],
+        ).then(
+            fn=on_select_parquet,
+            inputs=[parquet_dd, size_dd],
+            outputs=[summary_badge, idx_slider, size_dd],
+        ).then(
+            fn=lambda p, s: on_show(p, 0, s) if p else empty_sample_view(),
+            inputs=[parquet_dd, size_dd],
+            outputs=[idx_slider, status_md, instruction, gallery, meta_json, meta_raw],
         )
         parquet_dd.change(
             fn=on_select_parquet,
             inputs=[parquet_dd, size_dd],
+            outputs=[summary_badge, idx_slider, size_dd],
         ).then(
+            fn=lambda p, s: on_show(p, 0, s) if p else empty_sample_view(),
             inputs=[parquet_dd, size_dd],
             outputs=[idx_slider, status_md, instruction, gallery, meta_json, meta_raw],
         )
         size_dd.change(
             fn=on_select_parquet,
             inputs=[parquet_dd, size_dd],
+            outputs=[summary_badge, idx_slider, size_dd],
         ).then(
+            fn=lambda p, s: on_show(p, 0, s) if p else empty_sample_view(),
             inputs=[parquet_dd, size_dd],
             outputs=[idx_slider, status_md, instruction, gallery, meta_json, meta_raw],
         )
 if __name__ == "__main__":
     demo = build_ui()
+    demo.launch()