Spaces:
Sleeping
Sleeping
File size: 3,023 Bytes
e4aef33 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
import os
import tempfile
import zipfile
import subprocess
from pathlib import Path
def run_supervisely_parser(
project_path: str,
train_ratio: float,
seed: int,
) -> str:
"""Extract a Supervisely project zip and run the parser script inside .venv-sly.
Parameters
----------
project_path : (str)
Path to the uploaded Supervisely project .zip.
train_ratio : float
Portion of data to allocate to training (remainder is validation).
seed : int
Random seed forwarded to the parser for reproducible splits.
Returns
-------
str
Path to the parsed dataset directory produced by the parser script.
"""
project_zip = Path(project_path)
if not project_zip.exists():
raise FileNotFoundError(
f"Provided project zip not found: {project_zip}"
)
if project_zip.suffix.lower() != ".zip":
raise ValueError("Supervisely project must be a .zip archive")
project_dir = project_zip.parent
extract_dir = Path(tempfile.mkdtemp(dir=project_dir))
output_base_dir = Path(tempfile.mkdtemp(dir=project_dir))
with zipfile.ZipFile(project_zip, "r") as zf:
zf.extractall(extract_dir)
def find_project_root(root: Path) -> Path:
if (root / "meta.json").exists():
return root
for child in root.iterdir():
if child.is_dir() and (child / "meta.json").exists():
return child
raise FileNotFoundError(
f"Could not locate 'meta.json' inside extracted archive at {root}"
)
project_root = find_project_root(extract_dir)
repo_root = Path(__file__).resolve().parent.parent
parser_script = repo_root / "scripts" / "supervisely_parser.py"
venv_python = repo_root / ".venv-sly" / "bin" / "python"
if not parser_script.exists():
raise FileNotFoundError(
f"Parser script not found: {parser_script}",
)
if not venv_python.exists():
raise FileNotFoundError(
"Expected .venv-sly Python interpreter at: " f"{venv_python}",
)
cmd = [
str(venv_python),
str(parser_script),
"--project_dir",
str(project_root),
"--output_base_dir",
str(output_base_dir),
"--train_ratio",
str(train_ratio),
"--seed",
str(seed),
]
result = subprocess.run(
cmd,
capture_output=True,
text=True,
env={**os.environ},
)
if result.returncode != 0:
raise RuntimeError(
"Supervisely parser failed.\n"
f"STDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}"
)
produced_dirs = [p for p in output_base_dir.iterdir() if p.is_dir()]
if len(produced_dirs) != 1:
raise RuntimeError(
"Could not unambiguously determine parsed dataset directory in "
f"{output_base_dir}. Found: {produced_dirs}"
)
dataset_dir = produced_dirs[0]
return str(dataset_dir)
|