Spaces:

crazylemonade
/

openskill-ocr

Sleeping

File size: 28,203 Bytes

0ad3f89

#!/usr/bin/env python3
"""
Pre-flight validation script for MinerU OCR Service.

Run by entrypoint.sh BEFORE uvicorn starts.
Exits 0 if all checks pass.
Exits 1 if any CRITICAL check fails — this crashes the container loudly
so Hugging Face logs show an actionable error instead of a silent crash
or a healthy-looking service that fails on every request.

Usage:
    python validate.py           # run all checks, exit 0/1
    python validate.py --soft    # run all checks, always exit 0 (log only)

── FORENSIC NOTES (2025-06) ──────────────────────────────────────────────────

OCR engine:
  The pipeline (full) backend uses paddleocr2pytorch — a self-contained
  PyTorch reimplementation of PaddleOCR bundled inside the magic-pdf wheel.
  It uses: torch, cv2, numpy, pyclipper, shapely, yaml.
  paddlepaddle and paddleocr packages are NOT installed and NOT needed.

  pp_structure_v2.py (which imports paddleocr) is only loaded in 'lite' model
  mode. Pipeline backend always uses 'full' mode (CustomPEKModel). That file is
  never imported at runtime.

OCR model path resolution (from pytorch_paddle.py):
  ocr_models_dir = os.path.join(get_local_models_dir(), 'OCR', 'paddleocr_torch')
  det_model_path = os.path.join(ocr_models_dir, det_filename)
  where det_filename comes from models_config.yml keyed by language.

  Default CPU path: lang='ch' → forced to 'ch_lite' on CPU device.
  After Dockerfile Layer 3.5 patch:
    ch_lite.det = ch_PP-OCRv5_det_infer.pth  (was ch_PP-OCRv3 — not in HF repo)
    ch_lite.rec = ch_PP-OCRv5_rec_infer.pth  (unchanged — already in HF repo)

Arch config lookup (from pytorchocr_utility.py):
  get_arch_config(model_path) uses Path(model_path).stem as the key into
  arch_config.yaml (bundled in magic-pdf wheel). Both replacement filenames
  have entries in arch_config.yaml — verified before patch was written.

OpenCV conflict handling:
  doclayout-yolo, ultralytics, and rapid-table all declare opencv-python
  (non-headless) as a required dep. pip installs the full build in Layer 3.
  Layer 4 force-reinstalls opencv-python-headless to overwrite cv2. Both
  packages expose an identical cv2 API so all callers work correctly at
  runtime. pip-check shows warnings but they are harmless.

onnxruntime:
  rapid-table declares onnxruntime>1.17.0 as a required (non-optional) dep.
  pip resolves it automatically when magic-pdf[full] is installed in Layer 3.

slanet-plus.onnx (table model):
  Bundled inside the magic-pdf wheel at:
    magic_pdf/resources/slanet_plus/slanet-plus.onnx
  NOT downloaded from HF Hub — no separate download needed.
"""

import importlib
import json
import os
import shutil
import sys
import tempfile
import traceback

SOFT_MODE = "--soft" in sys.argv  # never exit 1, just print

MODELS_DIR = "/app/models"
EXTRACT_KIT_MODELS = os.path.join(MODELS_DIR, "PDF-Extract-Kit-1.0", "models")
LAYOUT_MARKER = os.path.join(EXTRACT_KIT_MODELS, "Layout")  # canary directory
CONFIG_PATH = os.path.expanduser("~/magic-pdf.json")


# ── helpers ────────────────────────────────────────────────────────────────────
def ok(label: str, detail: str = "") -> None:
    suffix = f"  ({detail})" if detail else ""
    print(f"  ✓  {label}{suffix}", flush=True)


def fail(label: str, detail: str, critical: bool = True) -> None:
    tag = "CRITICAL" if critical else "WARNING"
    print(f"  ✗  [{tag}] {label}: {detail}", flush=True)


def section(title: str) -> None:
    print(f"\n{'─' * 60}", flush=True)
    print(f"  {title}", flush=True)
    print(f"{'─' * 60}", flush=True)


# ── check registry ─────────────────────────────────────────────────────────────
failures: list[tuple[str, str]] = []
warnings: list[tuple[str, str]] = []


def record_fail(label: str, detail: str, critical: bool = True) -> None:
    fail(label, detail, critical)
    if critical:
        failures.append((label, detail))
    else:
        warnings.append((label, detail))


# ═══════════════════════════════════════════════════════════════════════════════
print("\n" + "═" * 60, flush=True)
print("  MinerU OCR Service — Pre-flight Validation", flush=True)
print("═" * 60, flush=True)

# ── 1. Python version ──────────────────────────────────────────────────────────
section("1. Python runtime")
pv = sys.version_info
if pv >= (3, 10):
    ok("Python version", f"{pv.major}.{pv.minor}.{pv.micro}")
else:
    record_fail("Python version",
                f"{pv.major}.{pv.minor} detected — magic-pdf requires >= 3.10")

# ── 2. cv2 ─────────────────────────────────────────────────────────────────────
section("2. OpenCV (cv2)")
try:
    import cv2
    ok("cv2 import", f"version {cv2.__version__}")
    build = cv2.getBuildInformation()
    if "GTK" in build or "Qt" in build:
        record_fail("cv2 build", "GUI backend detected — use opencv-python-headless",
                    critical=False)
    else:
        ok("cv2 headless", "no GUI backend detected")
except ImportError as exc:
    record_fail(
        "cv2 import",
        f"{exc}. "
        "Layer 4 force-reinstall of opencv-python-headless may have failed. "
        "Check Docker build log for the 'pip install --force-reinstall opencv-python-headless' step.",
    )
except Exception as exc:
    record_fail("cv2 import", f"unexpected error: {exc}")

# ── 3. PyTorch ─────────────────────────────────────────────────────────────────
section("3. PyTorch + TorchVision")
try:
    import torch
    ok("torch import", f"version {torch.__version__}")
    if torch.cuda.is_available():
        record_fail("torch CUDA", "CUDA detected on CPU-only space — unexpected",
                    critical=False)
    else:
        ok("torch device", "CPU-only (expected for free tier)")
except ImportError as exc:
    record_fail(
        "torch import",
        f"{exc}. "
        "Install from PyTorch CPU index BEFORE magic-pdf in Dockerfile Layer 2: "
        "pip install --index-url https://download.pytorch.org/whl/cpu torch torchvision",
    )
except Exception as exc:
    record_fail("torch import", f"unexpected: {exc}")

try:
    import torchvision
    ok("torchvision import", f"version {torchvision.__version__}")
except ImportError as exc:
    record_fail("torchvision import", str(exc))
except Exception as exc:
    record_fail("torchvision import", f"unexpected: {exc}")

# ── 4. ultralytics ─────────────────────────────────────────────────────────────
section("4. ultralytics (YOLO — required by doclayout_yolo)")
try:
    import ultralytics
    ok("ultralytics import", f"version {ultralytics.__version__}")
except ImportError as exc:
    record_fail(
        "ultralytics import",
        f"{exc}. "
        "Provided by magic-pdf[full]. "
        "ROOT CAUSE: [full-cpu] is NOT a valid extra in magic-pdf 1.3.12 — "
        "pip silently installed only the base package when given an unknown extra. "
        "Dockerfile Layer 3 must use magic-pdf[full]==1.3.12 (not [full-cpu]).",
    )
except Exception as exc:
    record_fail("ultralytics import", f"unexpected: {exc}")

# ── 5. doclayout_yolo ──────────────────────────────────────────────────────────
section("5. doclayout_yolo (layout detection model)")
try:
    import doclayout_yolo
    ok("doclayout_yolo import", f"version {getattr(doclayout_yolo, '__version__', 'unknown')}")
except ImportError as exc:
    record_fail(
        "doclayout_yolo import",
        f"{exc}. "
        "Provided by magic-pdf[full] (version 0.0.2b1). "
        "doclayout-yolo==0.0.2b1 is only on the myhloli custom wheel index — "
        "Dockerfile Layer 3 must include: "
        "--extra-index-url https://myhloli.github.io/wheels/",
    )
except Exception as exc:
    record_fail("doclayout_yolo import", f"unexpected: {exc}")

# ── 6. rapid_table ─────────────────────────────────────────────────────────────
section("6. rapid_table (table extraction)")
try:
    import rapid_table
    ok("rapid_table import", f"version {getattr(rapid_table, '__version__', 'unknown')}")
except ImportError as exc:
    record_fail(
        "rapid_table import",
        f"{exc}. Provided by magic-pdf[full]. Check Layer 3 install.",
    )
except Exception as exc:
    record_fail("rapid_table import", f"unexpected: {exc}")

# ── 7. onnxruntime ─────────────────────────────────────────────────────────────
section("7. onnxruntime (required by rapid-table for table model inference)")
# onnxruntime is a required (non-optional) dep of rapid-table>=1.0.5.
# pip resolves it automatically when magic-pdf[full] is installed in Layer 3.
# If it is missing it means rapid-table itself failed to install.
try:
    import onnxruntime
    ok("onnxruntime import", f"version {onnxruntime.__version__}")
except ImportError as exc:
    record_fail(
        "onnxruntime import",
        f"{exc}. "
        "onnxruntime is a required dep of rapid-table>=1.0.5. "
        "Its absence means rapid-table failed to install in Layer 3. "
        "Check Docker build log for rapid-table install errors.",
    )
except Exception as exc:
    record_fail("onnxruntime import", f"unexpected: {exc}")

# ── 8. magic_pdf core imports ──────────────────────────────────────────────────
section("8. magic_pdf core imports")

REQUIRED_IMPORTS = [
    ("magic_pdf.data.dataset",                     ["PymuDocDataset", "ImageDataset"]),
    ("magic_pdf.data.data_reader_writer",          ["FileBasedDataReader", "FileBasedDataWriter"]),
    ("magic_pdf.model.doc_analyze_by_custom_model", ["doc_analyze"]),
    ("magic_pdf.config.enums",                     ["SupportedPdfParseMethod"]),
]

for module_path, symbols in REQUIRED_IMPORTS:
    try:
        mod = importlib.import_module(module_path)
        missing = [s for s in symbols if not hasattr(mod, s)]
        if missing:
            record_fail(f"{module_path}", f"missing symbols: {missing}")
        else:
            ok(module_path, ", ".join(symbols))
    except ImportError as exc:
        record_fail(module_path, str(exc))
    except Exception as exc:
        record_fail(module_path, f"unexpected: {exc}")

# ── 8b. paddleocr2pytorch (OCR engine bundled inside magic-pdf wheel) ──────────
section("8b. paddleocr2pytorch (PyTorch OCR — bundled in magic-pdf wheel)")
# This is the actual OCR engine for the pipeline backend.
# It is NOT a separate pip package — it lives inside the magic-pdf wheel at
#   magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/
# If it is missing, the entire magic-pdf package did not install correctly.
try:
    from magic_pdf.model.sub_modules.ocr.paddleocr2pytorch.pytorch_paddle import PytorchPaddleOCR
    ok("PytorchPaddleOCR (paddleocr2pytorch)", "bundled inside magic-pdf wheel — no paddlepaddle pkg needed")
except ImportError as exc:
    record_fail(
        "PytorchPaddleOCR import",
        f"{exc}. "
        "This module is bundled inside magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/. "
        "If missing, magic-pdf itself did not install correctly.",
    )
except Exception as exc:
    record_fail("PytorchPaddleOCR import", f"unexpected: {exc}")

# ── 8c. Deprecated API check ───────────────────────────────────────────────────
section("8c. Deprecated API check (should NOT exist)")
OBSOLETE = [
    "magic_pdf.pipe.UNIPipe",
    "magic_pdf.rw.DiskReaderWriter",
]
for mod_path in OBSOLETE:
    try:
        importlib.import_module(mod_path)
        record_fail(mod_path, "still importable — code may use old API", critical=False)
    except ImportError:
        ok(f"{mod_path} (correctly absent)")

# ── 9. End-to-end pipeline smoke test ─────────────────────────────────────────
section("9. End-to-end pipeline smoke test")
try:
    from magic_pdf.model.doc_analyze_by_custom_model import doc_analyze  # noqa: F401
    import ultralytics  # noqa: F401
    from magic_pdf.data.dataset import ImageDataset  # noqa: F401
    from magic_pdf.data.data_reader_writer import FileBasedDataReader, FileBasedDataWriter  # noqa: F401
    ok("Pipeline imports (doc_analyze + ultralytics + ImageDataset + readers)", "all OK")
except ImportError as exc:
    record_fail(
        "Pipeline smoke test",
        f"Full pipeline import chain failed: {exc}. "
        "This means POST /extract will fail on every request.",
    )
except Exception as exc:
    record_fail("Pipeline smoke test", f"unexpected: {exc}")

# ── 10. Config file ────────────────────────────────────────────────────────────
section("10. MinerU config (magic-pdf.json)")
_cfg: dict = {}
if os.path.exists(CONFIG_PATH):
    try:
        with open(CONFIG_PATH) as f:
            _cfg = json.load(f)
        required_keys = ["models-dir", "device-mode"]
        missing_keys = [k for k in required_keys if k not in _cfg]
        if missing_keys:
            record_fail("Config keys", f"missing: {missing_keys}")
        else:
            ok("Config file", CONFIG_PATH)
            ok("device-mode", _cfg.get("device-mode", "?"))
            ok("models-dir",  _cfg.get("models-dir",  "?"))
            ok("formula-enable", str(_cfg.get("formula-config", {}).get("enable", "?")))
            ok("table-enable",   str(_cfg.get("table-config",   {}).get("enable", "?")))
    except json.JSONDecodeError as exc:
        record_fail("Config file", f"invalid JSON: {exc}")
    except Exception as exc:
        record_fail("Config file", str(exc))
else:
    record_fail(
        "Config file",
        f"not found at {CONFIG_PATH}. "
        "Run download_models.py or check Docker build log.",
    )

# ── 11. Model directory structure ─────────────────────────────────────────────
section("11. Model directory structure")

model_dir_checks = [
    ("PDF-Extract-Kit-1.0 root",  os.path.join(MODELS_DIR, "PDF-Extract-Kit-1.0")),
    ("Layout models",             os.path.join(EXTRACT_KIT_MODELS, "Layout")),
    ("Layout/YOLO",               os.path.join(EXTRACT_KIT_MODELS, "Layout", "YOLO")),
    ("OCR models",                os.path.join(EXTRACT_KIT_MODELS, "OCR")),
    ("OCR/paddleocr_torch",       os.path.join(EXTRACT_KIT_MODELS, "OCR", "paddleocr_torch")),
    ("Table models (TabRec)",     os.path.join(EXTRACT_KIT_MODELS, "TabRec")),
]

for label, path in model_dir_checks:
    if os.path.isdir(path):
        try:
            n = sum(1 for _ in os.scandir(path))
            ok(label, f"{n} entries  [{path}]")
        except OSError:
            ok(label, path)
    else:
        record_fail(label, f"directory not found: {path}")

lr_dir = os.path.join(MODELS_DIR, "layoutreader")
if os.path.isdir(lr_dir):
    ok("layoutreader (optional)", lr_dir)
else:
    record_fail("layoutreader (optional)",
                "not found — MinerU will use fallback ordering (non-critical)",
                critical=False)

# ── 11b. Critical model weight files ──────────────────────────────────────────
section("11b. Critical model weight files")
#
# These are the EXACT files MinerU will try to open when processing a document
# on a CPU deployment (default language = ch → forced to ch_lite on CPU).
#
# After Dockerfile Layer 3.5 patch, models_config.yml now references:
#   ch_lite.det = ch_PP-OCRv5_det_infer.pth  (patched from v3 — v3 NOT in repo)
#   ch_lite.rec = ch_PP-OCRv5_rec_infer.pth  (unchanged — always in repo)
#
# Layout uses doclayout_yolo (from magic-pdf.json layout-config).
# Table (rapid_table) uses slanet-plus.onnx BUNDLED IN THE WHEEL — not here.
# Formula is DISABLED — MFD/MFR files not required.
#
# Any CRITICAL failure here = service boots but crashes on first document.

_ocr_dir = os.path.join(EXTRACT_KIT_MODELS, "OCR", "paddleocr_torch")

CRITICAL_WEIGHT_FILES: list[tuple[str, str, str]] = [
    # (label, relative-to-EXTRACT_KIT_MODELS, reason)
    (
        "OCR det weight (ch_lite, default CPU lang)",
        os.path.join("OCR", "paddleocr_torch", "ch_PP-OCRv5_det_infer.pth"),
        "Patched from ch_PP-OCRv3_det_infer.pth (absent in HF repo). "
        "Missing = all OCR will crash at model load time."
    ),
    (
        "OCR rec weight (ch_lite)",
        os.path.join("OCR", "paddleocr_torch", "ch_PP-OCRv5_rec_infer.pth"),
        "Recognition model for ch_lite. "
        "Missing = OCR loads det but crashes at recognition."
    ),
    (
        "OCR cls weight (angle classifier)",
        os.path.join("OCR", "paddleocr_torch", "ch_ptocr_mobile_v2.0_cls_infer.pth"),
        "Used when use_angle_cls=True. Default is False so non-critical, "
        "but its absence causes crash if angle classification is enabled."
    ),
    (
        "Layout YOLO weight (doclayout_yolo)",
        os.path.join("Layout", "YOLO", "doclayout_yolo_docstructbench_imgsz1280_2501.pt"),
        "Layout detection model. Missing = layout detection crashes on every document."
    ),
    (
        "Layout LayoutLMv3 weight",
        os.path.join("Layout", "LayoutLMv3", "model_final.pth"),
        "Alternative layout model. Required even when doclayout_yolo is primary "
        "because model_configs.yaml always lists it."
    ),
    (
        "Multilingual OCR det (en/latin fallback)",
        os.path.join("OCR", "paddleocr_torch", "Multilingual_PP-OCRv3_det_infer.pth"),
        "Patched det for en and latin languages. Missing = crash if lang=en/latin."
    ),
]

# cls weight is only critical if use_angle_cls=True (default False)
NON_CRITICAL_LABELS = {"OCR cls weight (angle classifier)"}

for label, rel_path, reason in CRITICAL_WEIGHT_FILES:
    full_path = os.path.join(EXTRACT_KIT_MODELS, rel_path)
    is_critical = label not in NON_CRITICAL_LABELS
    if os.path.isfile(full_path):
        size_mb = os.path.getsize(full_path) / (1024 * 1024)
        ok(label, f"{size_mb:.1f} MB  [{full_path}]")
    else:
        record_fail(
            label,
            f"FILE NOT FOUND: {full_path}\n"
            f"    Reason: {reason}",
            critical=is_critical,
        )

# ── 11c. models_config.yml consistency check ──────────────────────────────────
section("11c. models_config.yml consistency check")
#
# Reads the installed models_config.yml (inside magic_pdf package) and verifies
# that every det/rec file it references for the default CPU language (ch_lite)
# actually exists on disk in the expected location.
#
# This catches future version drift between the magic-pdf package and the HF repo
# BEFORE the service starts, rather than mid-request.

try:
    import magic_pdf
    import yaml as _yaml
    from pathlib import Path as _Path

    _pkg = _Path(magic_pdf.__file__).parent
    _mcfg = _pkg / 'model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/models_config.yml'

    if not _mcfg.exists():
        record_fail("models_config.yml", f"not found at expected path: {_mcfg}")
    else:
        with open(_mcfg) as _f:
            _mc = _yaml.safe_load(_f)

        _ocr_torch = os.path.join(EXTRACT_KIT_MODELS, "OCR", "paddleocr_torch")

        # Check the two languages actually used on this CPU deployment
        _check_langs = ["ch_lite", "ch"]
        _mc_ok = True
        for _lang in _check_langs:
            _entry = _mc.get("lang", {}).get(_lang, {})
            for _field in ("det", "rec"):
                _fname = _entry.get(_field)
                if not _fname:
                    continue
                _fpath = os.path.join(_ocr_torch, _fname)
                if os.path.isfile(_fpath):
                    ok(f"models_config[{_lang}].{_field}", _fname)
                else:
                    record_fail(
                        f"models_config[{_lang}].{_field}",
                        f"Config references '{_fname}' but file not found at:\n"
                        f"    {_fpath}\n"
                        f"    Dockerfile Layer 3.5 patch may not have run, "
                        f"or HF repo changed its file structure again.",
                        critical=True,
                    )
                    _mc_ok = False

        if _mc_ok:
            ok("models_config.yml consistency", "all referenced det/rec files exist on disk")

except Exception as _exc:
    record_fail("models_config.yml consistency check", f"unexpected error: {_exc}", critical=False)

# ── 11d. Bundled wheel resources ──────────────────────────────────────────────
section("11d. Bundled wheel resources (inside magic_pdf package)")
#
# These files are shipped inside the magic-pdf wheel itself.
# They do NOT come from the HF download. Their absence means the wheel
# installed incorrectly or was corrupted.

try:
    import magic_pdf as _mp
    from pathlib import Path as _P

    _pkg_root = _P(_mp.__file__).parent
    _bundled = [
        ("slanet-plus.onnx (table model)",
         _pkg_root / "resources" / "slanet_plus" / "slanet-plus.onnx"),
        ("fasttext langdetect model",
         _pkg_root / "resources" / "fasttext-langdetect" / "lid.176.ftz"),
        ("YOLO langdetect model",
         _pkg_root / "resources" / "yolov11-langdetect" / "yolo_v11_ft.pt"),
        ("model_configs.yaml (weight path map)",
         _pkg_root / "resources" / "model_config" / "model_configs.yaml"),
    ]
    for _lbl, _p in _bundled:
        if _p.exists():
            _sz = _p.stat().st_size / (1024 * 1024)
            ok(_lbl, f"{_sz:.2f} MB")
        else:
            record_fail(_lbl, f"expected inside wheel at {_p} — magic-pdf install may be corrupted")

except Exception as _exc:
    record_fail("Bundled wheel resources check", f"unexpected: {_exc}", critical=False)

# ── 12. Temp storage ───────────────────────────────────────────────────────────
section("12. Temp storage")
try:
    td = tempfile.mkdtemp(prefix="mineru_validate_")
    test_file = os.path.join(td, "write_test.bin")
    with open(test_file, "wb") as f:
        f.write(b"x" * 4096)
    assert os.path.getsize(test_file) == 4096
    shutil.rmtree(td)
    ok("Temp write + delete", tempfile.gettempdir())
except Exception as exc:
    record_fail("Temp storage", str(exc))

# ── 13. System memory (cgroups) ────────────────────────────────────────────────
section("13. System memory (cgroups)")
mem_source = "unknown"
total_mb = used_mb = 0

try:
    with open("/sys/fs/cgroup/memory.max") as f:
        raw = f.read().strip()
    if raw != "max":
        total_mb = int(raw) // (1024 * 1024)
        with open("/sys/fs/cgroup/memory.current") as f:
            used_mb = int(f.read().strip()) // (1024 * 1024)
        mem_source = "cgroups v2"
except (FileNotFoundError, ValueError):
    pass

if total_mb == 0:
    try:
        with open("/sys/fs/cgroup/memory/memory.limit_in_bytes") as f:
            limit = int(f.read().strip())
        with open("/sys/fs/cgroup/memory/memory.usage_in_bytes") as f:
            used_bytes = int(f.read().strip())
        if limit < 128 * 1024 * 1024 * 1024:
            total_mb = limit // (1024 * 1024)
            used_mb  = used_bytes // (1024 * 1024)
            mem_source = "cgroups v1"
    except (FileNotFoundError, ValueError):
        pass

if total_mb == 0:
    try:
        info: dict[str, int] = {}
        with open("/proc/meminfo") as f:
            for line in f:
                parts = line.split()
                if len(parts) >= 2:
                    info[parts[0].rstrip(":")] = int(parts[1])
        total_mb = info.get("MemTotal", 0) // 1024
        used_mb  = (info.get("MemTotal", 0) - info.get("MemAvailable", 0)) // 1024
        mem_source = "/proc/meminfo (may show host RAM)"
    except Exception:
        pass

ok("Memory source", mem_source)
ok("Total memory",  f"{total_mb} MB")
ok("Used memory",   f"{used_mb} MB")
ok("Free memory",   f"{total_mb - used_mb} MB")

if total_mb > 32 * 1024:
    record_fail(
        "Memory total",
        f"{total_mb} MB seems too large — cgroups may not be available; "
        "/proc/meminfo showing host RAM. Memory guard in main.py will be conservative.",
        critical=False,
    )

# ── 14. /proc/meminfo sanity ───────────────────────────────────────────────────
section("14. /proc/meminfo (reference)")
try:
    with open("/proc/meminfo") as f:
        lines = f.readlines()[:5]
    for line in lines:
        parts = line.split()
        if len(parts) >= 2:
            kb = int(parts[1])
            ok(parts[0].rstrip(":"), f"{kb // 1024} MB")
except Exception as exc:
    record_fail("/proc/meminfo", str(exc), critical=False)

# ═══════════════════════════════════════════════════════════════════════════════
# Summary
# ═══════════════════════════════════════════════════════════════════════════════
print("\n" + "═" * 60, flush=True)
print("  Validation Summary", flush=True)
print("═" * 60, flush=True)

if warnings:
    print(f"\n  ⚠  {len(warnings)} warning(s):", flush=True)
    for label, detail in warnings:
        print(f"     • {label}: {detail}", flush=True)

if failures:
    print(f"\n  ✗  {len(failures)} CRITICAL failure(s):", flush=True)
    for label, detail in failures:
        print(f"     • {label}: {detail}", flush=True)
    print("\n  Service will NOT start until these are resolved.", flush=True)
    print("  Check Dockerfile pip layers and Docker build log.", flush=True)
    print("═" * 60 + "\n", flush=True)
    if not SOFT_MODE:
        sys.exit(1)
else:
    print(f"\n  ✓  All critical checks passed", flush=True)
    if warnings:
        print(f"  ⚠  {len(warnings)} non-critical warning(s) — see above", flush=True)
    print("\n  Service is ready to start.", flush=True)
    print("═" * 60 + "\n", flush=True)
    sys.exit(0)