inkslop-viewer / app.py
amaksay's picture
Sync from GitHub
9832329 verified
"""HuggingFace Spaces entry point for InkSlop Benchmark Viewer.
This app downloads hard datasets and results from HuggingFace,
then serves the Gradio viewer.
"""
import logging
import os
import shutil
# Disable SSR mode - causes rendering issues with CheckboxGroup and
# dynamic @gr.render components in Gradio 6.x
os.environ["GRADIO_SSR_MODE"] = "False"
from pathlib import Path
from huggingface_hub import snapshot_download
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
)
logger = logging.getLogger(__name__)
CACHE_DIR = Path("/tmp/inkslop_cache")
DATASETS_DIR = CACHE_DIR / "datasets"
RESULTS_DIR = CACHE_DIR / "results"
HF_DATASETS = {
"overlap_hard": "amaksay/inkslop-overlap-hard",
"autocomplete_hard": "amaksay/inkslop-autocomplete-hard",
"derender_hard": "amaksay/inkslop-derender-hard",
"mazes_hard": "amaksay/inkslop-mazes-hard",
}
HF_RESULTS = "amaksay/inkslop-results"
def restructure_dataset(task_dir: Path, task_id: str) -> None:
"""Restructure downloaded dataset so samples/ is at the root.
HF datasets have structure:
task_dir/source_data/samples/... (sample records we need)
task_dir/original/... (intermediate files, not needed)
We move source_data/samples/ to task_dir/samples/ and remove the rest.
This ensures the dataset_name matches the task_id in results.
"""
target = task_dir / "samples"
if target.exists():
logger.info(f"{task_id} already restructured")
return
# Look for source_data/samples/
source_data_samples = task_dir / "source_data" / "samples"
if not source_data_samples.exists():
logger.warning(f"No source_data/samples/ found in {task_dir}, skipping restructure")
return
logger.info(f"Restructuring {task_id}: moving source_data/samples/ to root")
# Move samples to task root
shutil.move(str(source_data_samples), str(target))
# Clean up source_data and original directories
for subdir in ["source_data", "original"]:
path = task_dir / subdir
if path.exists():
try:
shutil.rmtree(path)
except Exception:
pass
def download_hf_datasets() -> Path:
"""Download hard datasets from HuggingFace."""
DATASETS_DIR.mkdir(parents=True, exist_ok=True)
for task_id, repo_id in HF_DATASETS.items():
task_dir = DATASETS_DIR / task_id
if task_dir.exists() and (task_dir / "samples").exists():
logger.info(f"Using cached {task_id}")
continue
logger.info(f"Downloading {repo_id}...")
try:
local_path = snapshot_download(
repo_id=repo_id,
repo_type="dataset",
local_dir=task_dir,
)
logger.info(f"Downloaded to {local_path}")
# Restructure so samples/ is at task root
restructure_dataset(task_dir, task_id)
except Exception as exc:
logger.error(f"Failed to download {repo_id}: {exc}")
raise
return DATASETS_DIR
def download_results() -> Path:
"""Download results from HuggingFace."""
if RESULTS_DIR.exists() and any(RESULTS_DIR.iterdir()):
logger.info("Using cached results")
return RESULTS_DIR
logger.info(f"Downloading {HF_RESULTS}...")
try:
local_path = snapshot_download(
repo_id=HF_RESULTS,
repo_type="dataset",
local_dir=RESULTS_DIR,
)
logger.info(f"Downloaded results to {local_path}")
except Exception as exc:
logger.error(f"Failed to download results: {exc}")
raise
return RESULTS_DIR
def setup() -> tuple[Path, Path]:
"""Download all required data."""
logger.info("Setting up InkSlop viewer...")
datasets_dir = download_hf_datasets()
results_dir = download_results()
logger.info("Setup complete!")
return datasets_dir, results_dir
# Run setup on import
datasets_dir, results_dir = setup()
# Import and create app
from inkslop.visualization.gradio_viewer import create_app
app = create_app(
records_root=datasets_dir,
results_root=results_dir,
hf_mode=True,
)
if __name__ == "__main__":
app.launch()