Spaces:

amaksay
/

inkslop-viewer

Sleeping

App Files Files Community

inkslop-viewer / app.py

amaksay

Sync from GitHub

9832329 verified 3 months ago

raw

history blame contribute delete

4.3 kB

	"""HuggingFace Spaces entry point for InkSlop Benchmark Viewer.

	This app downloads hard datasets and results from HuggingFace,
	then serves the Gradio viewer.
	"""

	import logging
	import os
	import shutil

	# Disable SSR mode - causes rendering issues with CheckboxGroup and
	# dynamic @gr.render components in Gradio 6.x
	os.environ["GRADIO_SSR_MODE"] = "False"
	from pathlib import Path

	from huggingface_hub import snapshot_download

	logging.basicConfig(
	level=logging.INFO,
	format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
	)
	logger = logging.getLogger(__name__)

	CACHE_DIR = Path("/tmp/inkslop_cache")
	DATASETS_DIR = CACHE_DIR / "datasets"
	RESULTS_DIR = CACHE_DIR / "results"

	HF_DATASETS = {
	"overlap_hard": "amaksay/inkslop-overlap-hard",
	"autocomplete_hard": "amaksay/inkslop-autocomplete-hard",
	"derender_hard": "amaksay/inkslop-derender-hard",
	"mazes_hard": "amaksay/inkslop-mazes-hard",
	}

	HF_RESULTS = "amaksay/inkslop-results"


	def restructure_dataset(task_dir: Path, task_id: str) -> None:
	"""Restructure downloaded dataset so samples/ is at the root.

	HF datasets have structure:
	task_dir/source_data/samples/... (sample records we need)
	task_dir/original/... (intermediate files, not needed)

	We move source_data/samples/ to task_dir/samples/ and remove the rest.
	This ensures the dataset_name matches the task_id in results.
	"""
	target = task_dir / "samples"
	if target.exists():
	logger.info(f"{task_id} already restructured")
	return

	# Look for source_data/samples/
	source_data_samples = task_dir / "source_data" / "samples"
	if not source_data_samples.exists():
	logger.warning(f"No source_data/samples/ found in {task_dir}, skipping restructure")
	return

	logger.info(f"Restructuring {task_id}: moving source_data/samples/ to root")

	# Move samples to task root
	shutil.move(str(source_data_samples), str(target))

	# Clean up source_data and original directories
	for subdir in ["source_data", "original"]:
	path = task_dir / subdir
	if path.exists():
	try:
	shutil.rmtree(path)
	except Exception:
	pass


	def download_hf_datasets() -> Path:
	"""Download hard datasets from HuggingFace."""
	DATASETS_DIR.mkdir(parents=True, exist_ok=True)

	for task_id, repo_id in HF_DATASETS.items():
	task_dir = DATASETS_DIR / task_id
	if task_dir.exists() and (task_dir / "samples").exists():
	logger.info(f"Using cached {task_id}")
	continue

	logger.info(f"Downloading {repo_id}...")
	try:
	local_path = snapshot_download(
	repo_id=repo_id,
	repo_type="dataset",
	local_dir=task_dir,
	)
	logger.info(f"Downloaded to {local_path}")

	# Restructure so samples/ is at task root
	restructure_dataset(task_dir, task_id)

	except Exception as exc:
	logger.error(f"Failed to download {repo_id}: {exc}")
	raise

	return DATASETS_DIR


	def download_results() -> Path:
	"""Download results from HuggingFace."""
	if RESULTS_DIR.exists() and any(RESULTS_DIR.iterdir()):
	logger.info("Using cached results")
	return RESULTS_DIR

	logger.info(f"Downloading {HF_RESULTS}...")
	try:
	local_path = snapshot_download(
	repo_id=HF_RESULTS,
	repo_type="dataset",
	local_dir=RESULTS_DIR,
	)
	logger.info(f"Downloaded results to {local_path}")
	except Exception as exc:
	logger.error(f"Failed to download results: {exc}")
	raise

	return RESULTS_DIR


	def setup() -> tuple[Path, Path]:
	"""Download all required data."""
	logger.info("Setting up InkSlop viewer...")
	datasets_dir = download_hf_datasets()
	results_dir = download_results()
	logger.info("Setup complete!")
	return datasets_dir, results_dir


	# Run setup on import
	datasets_dir, results_dir = setup()

	# Import and create app
	from inkslop.visualization.gradio_viewer import create_app

	app = create_app(
	records_root=datasets_dir,
	results_root=results_dir,
	hf_mode=True,
	)

	if __name__ == "__main__":
	app.launch()