File size: 1,676 Bytes
951f760
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
#!/usr/bin/env python3
from __future__ import annotations

from pathlib import Path

from scripts.bootstrap_benchmark_env import build_bootstrap_report
from scripts.benchmark_checkpoint import choose_checkpoint_candidate


def build_readiness_report(*, cache_dir: Path, hf_token_present: bool, dependencies_present: bool = True, missing_dependencies: list[str] | None = None, output_repo: str | None = None, tokenizer_repo: str | None = None) -> dict[str, object]:
    checkpoint = choose_checkpoint_candidate(cache_dir)
    tokenizer_dir = cache_dir / "tokenizer"
    tokenizer_ready = (tokenizer_dir / "tokenizer.pkl").exists() and (tokenizer_dir / "token_bytes.pt").exists()
    checkpoint_present = checkpoint is not None
    runtime = build_bootstrap_report(missing_dependencies=list(missing_dependencies or []))
    return {
        "cache_dir": str(cache_dir),
        "checkpoint_present": checkpoint_present,
        "checkpoint_path": str(checkpoint) if checkpoint is not None else None,
        "tokenizer_ready": tokenizer_ready,
        "hf_token_present": hf_token_present,
        "dependencies_present": dependencies_present,
        "missing_dependencies": list(missing_dependencies or []),
        "install_hint": runtime["install_hint"],
        "install_command": runtime["install_command"],
        "install_blockers": runtime["install_blockers"],
        "output_repo": output_repo,
        "tokenizer_repo": tokenizer_repo,
        "hydration_possible": bool(hf_token_present and output_repo and tokenizer_repo),
        "ready_for_hydra_benchmarks": checkpoint_present and tokenizer_ready and dependencies_present,
    }