import os import tempfile import zipfile import subprocess from pathlib import Path def run_supervisely_parser( project_path: str, train_ratio: float, seed: int, ) -> str: """Extract a Supervisely project zip and run the parser script inside .venv-sly. Parameters ---------- project_path : (str) Path to the uploaded Supervisely project .zip. train_ratio : float Portion of data to allocate to training (remainder is validation). seed : int Random seed forwarded to the parser for reproducible splits. Returns ------- str Path to the parsed dataset directory produced by the parser script. """ project_zip = Path(project_path) if not project_zip.exists(): raise FileNotFoundError( f"Provided project zip not found: {project_zip}" ) if project_zip.suffix.lower() != ".zip": raise ValueError("Supervisely project must be a .zip archive") project_dir = project_zip.parent extract_dir = Path(tempfile.mkdtemp(dir=project_dir)) output_base_dir = Path(tempfile.mkdtemp(dir=project_dir)) with zipfile.ZipFile(project_zip, "r") as zf: zf.extractall(extract_dir) def find_project_root(root: Path) -> Path: if (root / "meta.json").exists(): return root for child in root.iterdir(): if child.is_dir() and (child / "meta.json").exists(): return child raise FileNotFoundError( f"Could not locate 'meta.json' inside extracted archive at {root}" ) project_root = find_project_root(extract_dir) repo_root = Path(__file__).resolve().parent.parent parser_script = repo_root / "scripts" / "supervisely_parser.py" venv_python = repo_root / ".venv-sly" / "bin" / "python" if not parser_script.exists(): raise FileNotFoundError( f"Parser script not found: {parser_script}", ) if not venv_python.exists(): raise FileNotFoundError( "Expected .venv-sly Python interpreter at: " f"{venv_python}", ) cmd = [ str(venv_python), str(parser_script), "--project_dir", str(project_root), "--output_base_dir", str(output_base_dir), "--train_ratio", str(train_ratio), "--seed", str(seed), ] result = subprocess.run( cmd, capture_output=True, text=True, env={**os.environ}, ) if result.returncode != 0: raise RuntimeError( "Supervisely parser failed.\n" f"STDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}" ) produced_dirs = [p for p in output_base_dir.iterdir() if p.is_dir()] if len(produced_dirs) != 1: raise RuntimeError( "Could not unambiguously determine parsed dataset directory in " f"{output_base_dir}. Found: {produced_dirs}" ) dataset_dir = produced_dirs[0] return str(dataset_dir)