File size: 3,023 Bytes
e4aef33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import os
import tempfile
import zipfile
import subprocess
from pathlib import Path


def run_supervisely_parser(
    project_path: str,
    train_ratio: float,
    seed: int,
) -> str:
    """Extract a Supervisely project zip and run the parser script inside .venv-sly.

    Parameters
    ----------
    project_path : (str)
        Path to the uploaded Supervisely project .zip.
    train_ratio : float
        Portion of data to allocate to training (remainder is validation).
    seed : int
        Random seed forwarded to the parser for reproducible splits.

    Returns
    -------
    str
        Path to the parsed dataset directory produced by the parser script.
    """

    project_zip = Path(project_path)
    if not project_zip.exists():
        raise FileNotFoundError(
            f"Provided project zip not found: {project_zip}"
        )
    if project_zip.suffix.lower() != ".zip":
        raise ValueError("Supervisely project must be a .zip archive")

    project_dir = project_zip.parent
    extract_dir = Path(tempfile.mkdtemp(dir=project_dir))
    output_base_dir = Path(tempfile.mkdtemp(dir=project_dir))

    with zipfile.ZipFile(project_zip, "r") as zf:
        zf.extractall(extract_dir)

    def find_project_root(root: Path) -> Path:
        if (root / "meta.json").exists():
            return root
        for child in root.iterdir():
            if child.is_dir() and (child / "meta.json").exists():
                return child
        raise FileNotFoundError(
            f"Could not locate 'meta.json' inside extracted archive at {root}"
        )

    project_root = find_project_root(extract_dir)

    repo_root = Path(__file__).resolve().parent.parent
    parser_script = repo_root / "scripts" / "supervisely_parser.py"
    venv_python = repo_root / ".venv-sly" / "bin" / "python"

    if not parser_script.exists():
        raise FileNotFoundError(
            f"Parser script not found: {parser_script}",
        )
    if not venv_python.exists():
        raise FileNotFoundError(
            "Expected .venv-sly Python interpreter at: " f"{venv_python}",
        )

    cmd = [
        str(venv_python),
        str(parser_script),
        "--project_dir",
        str(project_root),
        "--output_base_dir",
        str(output_base_dir),
        "--train_ratio",
        str(train_ratio),
        "--seed",
        str(seed),
    ]

    result = subprocess.run(
        cmd,
        capture_output=True,
        text=True,
        env={**os.environ},
    )
    if result.returncode != 0:
        raise RuntimeError(
            "Supervisely parser failed.\n"
            f"STDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}"
        )
    produced_dirs = [p for p in output_base_dir.iterdir() if p.is_dir()]
    if len(produced_dirs) != 1:
        raise RuntimeError(
            "Could not unambiguously determine parsed dataset directory in "
            f"{output_base_dir}. Found: {produced_dirs}"
        )
    dataset_dir = produced_dirs[0]
    return str(dataset_dir)