Spaces:

crazylemonade
/

openskill-ocr

Running

App Files Files Community

openskill-ocr / validate.py

crazylemonade

Upload 7 files

0ad3f89 verified 12 days ago

Raw

History Blame Contribute Delete

28.2 kB

	#!/usr/bin/env python3
	"""
	Pre-flight validation script for MinerU OCR Service.

	Run by entrypoint.sh BEFORE uvicorn starts.
	Exits 0 if all checks pass.
	Exits 1 if any CRITICAL check fails — this crashes the container loudly
	so Hugging Face logs show an actionable error instead of a silent crash
	or a healthy-looking service that fails on every request.

	Usage:
	python validate.py # run all checks, exit 0/1
	python validate.py --soft # run all checks, always exit 0 (log only)

	── FORENSIC NOTES (2025-06) ──────────────────────────────────────────────────

	OCR engine:
	The pipeline (full) backend uses paddleocr2pytorch — a self-contained
	PyTorch reimplementation of PaddleOCR bundled inside the magic-pdf wheel.
	It uses: torch, cv2, numpy, pyclipper, shapely, yaml.
	paddlepaddle and paddleocr packages are NOT installed and NOT needed.

	pp_structure_v2.py (which imports paddleocr) is only loaded in 'lite' model
	mode. Pipeline backend always uses 'full' mode (CustomPEKModel). That file is
	never imported at runtime.

	OCR model path resolution (from pytorch_paddle.py):
	ocr_models_dir = os.path.join(get_local_models_dir(), 'OCR', 'paddleocr_torch')
	det_model_path = os.path.join(ocr_models_dir, det_filename)
	where det_filename comes from models_config.yml keyed by language.

	Default CPU path: lang='ch' → forced to 'ch_lite' on CPU device.
	After Dockerfile Layer 3.5 patch:
	ch_lite.det = ch_PP-OCRv5_det_infer.pth (was ch_PP-OCRv3 — not in HF repo)
	ch_lite.rec = ch_PP-OCRv5_rec_infer.pth (unchanged — already in HF repo)

	Arch config lookup (from pytorchocr_utility.py):
	get_arch_config(model_path) uses Path(model_path).stem as the key into
	arch_config.yaml (bundled in magic-pdf wheel). Both replacement filenames
	have entries in arch_config.yaml — verified before patch was written.

	OpenCV conflict handling:
	doclayout-yolo, ultralytics, and rapid-table all declare opencv-python
	(non-headless) as a required dep. pip installs the full build in Layer 3.
	Layer 4 force-reinstalls opencv-python-headless to overwrite cv2. Both
	packages expose an identical cv2 API so all callers work correctly at
	runtime. pip-check shows warnings but they are harmless.

	onnxruntime:
	rapid-table declares onnxruntime>1.17.0 as a required (non-optional) dep.
	pip resolves it automatically when magic-pdf[full] is installed in Layer 3.

	slanet-plus.onnx (table model):
	Bundled inside the magic-pdf wheel at:
	magic_pdf/resources/slanet_plus/slanet-plus.onnx
	NOT downloaded from HF Hub — no separate download needed.
	"""

	import importlib
	import json
	import os
	import shutil
	import sys
	import tempfile
	import traceback

	SOFT_MODE = "--soft" in sys.argv # never exit 1, just print

	MODELS_DIR = "/app/models"
	EXTRACT_KIT_MODELS = os.path.join(MODELS_DIR, "PDF-Extract-Kit-1.0", "models")
	LAYOUT_MARKER = os.path.join(EXTRACT_KIT_MODELS, "Layout") # canary directory
	CONFIG_PATH = os.path.expanduser("~/magic-pdf.json")


	# ── helpers ────────────────────────────────────────────────────────────────────
	def ok(label: str, detail: str = "") -> None:
	suffix = f" ({detail})" if detail else ""
	print(f" ✓ {label}{suffix}", flush=True)


	def fail(label: str, detail: str, critical: bool = True) -> None:
	tag = "CRITICAL" if critical else "WARNING"
	print(f" ✗ [{tag}] {label}: {detail}", flush=True)


	def section(title: str) -> None:
	print(f"\n{'─' * 60}", flush=True)
	print(f" {title}", flush=True)
	print(f"{'─' * 60}", flush=True)


	# ── check registry ─────────────────────────────────────────────────────────────
	failures: list[tuple[str, str]] = []
	warnings: list[tuple[str, str]] = []


	def record_fail(label: str, detail: str, critical: bool = True) -> None:
	fail(label, detail, critical)
	if critical:
	failures.append((label, detail))
	else:
	warnings.append((label, detail))


	# ═══════════════════════════════════════════════════════════════════════════════
	print("\n" + "═" * 60, flush=True)
	print(" MinerU OCR Service — Pre-flight Validation", flush=True)
	print("═" * 60, flush=True)

	# ── 1. Python version ──────────────────────────────────────────────────────────
	section("1. Python runtime")
	pv = sys.version_info
	if pv >= (3, 10):
	ok("Python version", f"{pv.major}.{pv.minor}.{pv.micro}")
	else:
	record_fail("Python version",
	f"{pv.major}.{pv.minor} detected — magic-pdf requires >= 3.10")

	# ── 2. cv2 ─────────────────────────────────────────────────────────────────────
	section("2. OpenCV (cv2)")
	try:
	import cv2
	ok("cv2 import", f"version {cv2.__version__}")
	build = cv2.getBuildInformation()
	if "GTK" in build or "Qt" in build:
	record_fail("cv2 build", "GUI backend detected — use opencv-python-headless",
	critical=False)
	else:
	ok("cv2 headless", "no GUI backend detected")
	except ImportError as exc:
	record_fail(
	"cv2 import",
	f"{exc}. "
	"Layer 4 force-reinstall of opencv-python-headless may have failed. "
	"Check Docker build log for the 'pip install --force-reinstall opencv-python-headless' step.",
	)
	except Exception as exc:
	record_fail("cv2 import", f"unexpected error: {exc}")

	# ── 3. PyTorch ─────────────────────────────────────────────────────────────────
	section("3. PyTorch + TorchVision")
	try:
	import torch
	ok("torch import", f"version {torch.__version__}")
	if torch.cuda.is_available():
	record_fail("torch CUDA", "CUDA detected on CPU-only space — unexpected",
	critical=False)
	else:
	ok("torch device", "CPU-only (expected for free tier)")
	except ImportError as exc:
	record_fail(
	"torch import",
	f"{exc}. "
	"Install from PyTorch CPU index BEFORE magic-pdf in Dockerfile Layer 2: "
	"pip install --index-url https://download.pytorch.org/whl/cpu torch torchvision",
	)
	except Exception as exc:
	record_fail("torch import", f"unexpected: {exc}")

	try:
	import torchvision
	ok("torchvision import", f"version {torchvision.__version__}")
	except ImportError as exc:
	record_fail("torchvision import", str(exc))
	except Exception as exc:
	record_fail("torchvision import", f"unexpected: {exc}")

	# ── 4. ultralytics ─────────────────────────────────────────────────────────────
	section("4. ultralytics (YOLO — required by doclayout_yolo)")
	try:
	import ultralytics
	ok("ultralytics import", f"version {ultralytics.__version__}")
	except ImportError as exc:
	record_fail(
	"ultralytics import",
	f"{exc}. "
	"Provided by magic-pdf[full]. "
	"ROOT CAUSE: [full-cpu] is NOT a valid extra in magic-pdf 1.3.12 — "
	"pip silently installed only the base package when given an unknown extra. "
	"Dockerfile Layer 3 must use magic-pdf[full]==1.3.12 (not [full-cpu]).",
	)
	except Exception as exc:
	record_fail("ultralytics import", f"unexpected: {exc}")

	# ── 5. doclayout_yolo ──────────────────────────────────────────────────────────
	section("5. doclayout_yolo (layout detection model)")
	try:
	import doclayout_yolo
	ok("doclayout_yolo import", f"version {getattr(doclayout_yolo, '__version__', 'unknown')}")
	except ImportError as exc:
	record_fail(
	"doclayout_yolo import",
	f"{exc}. "
	"Provided by magic-pdf[full] (version 0.0.2b1). "
	"doclayout-yolo==0.0.2b1 is only on the myhloli custom wheel index — "
	"Dockerfile Layer 3 must include: "
	"--extra-index-url https://myhloli.github.io/wheels/",
	)
	except Exception as exc:
	record_fail("doclayout_yolo import", f"unexpected: {exc}")

	# ── 6. rapid_table ─────────────────────────────────────────────────────────────
	section("6. rapid_table (table extraction)")
	try:
	import rapid_table
	ok("rapid_table import", f"version {getattr(rapid_table, '__version__', 'unknown')}")
	except ImportError as exc:
	record_fail(
	"rapid_table import",
	f"{exc}. Provided by magic-pdf[full]. Check Layer 3 install.",
	)
	except Exception as exc:
	record_fail("rapid_table import", f"unexpected: {exc}")

	# ── 7. onnxruntime ─────────────────────────────────────────────────────────────
	section("7. onnxruntime (required by rapid-table for table model inference)")
	# onnxruntime is a required (non-optional) dep of rapid-table>=1.0.5.
	# pip resolves it automatically when magic-pdf[full] is installed in Layer 3.
	# If it is missing it means rapid-table itself failed to install.
	try:
	import onnxruntime
	ok("onnxruntime import", f"version {onnxruntime.__version__}")
	except ImportError as exc:
	record_fail(
	"onnxruntime import",
	f"{exc}. "
	"onnxruntime is a required dep of rapid-table>=1.0.5. "
	"Its absence means rapid-table failed to install in Layer 3. "
	"Check Docker build log for rapid-table install errors.",
	)
	except Exception as exc:
	record_fail("onnxruntime import", f"unexpected: {exc}")

	# ── 8. magic_pdf core imports ──────────────────────────────────────────────────
	section("8. magic_pdf core imports")

	REQUIRED_IMPORTS = [
	("magic_pdf.data.dataset", ["PymuDocDataset", "ImageDataset"]),
	("magic_pdf.data.data_reader_writer", ["FileBasedDataReader", "FileBasedDataWriter"]),
	("magic_pdf.model.doc_analyze_by_custom_model", ["doc_analyze"]),
	("magic_pdf.config.enums", ["SupportedPdfParseMethod"]),
	]

	for module_path, symbols in REQUIRED_IMPORTS:
	try:
	mod = importlib.import_module(module_path)
	missing = [s for s in symbols if not hasattr(mod, s)]
	if missing:
	record_fail(f"{module_path}", f"missing symbols: {missing}")
	else:
	ok(module_path, ", ".join(symbols))
	except ImportError as exc:
	record_fail(module_path, str(exc))
	except Exception as exc:
	record_fail(module_path, f"unexpected: {exc}")

	# ── 8b. paddleocr2pytorch (OCR engine bundled inside magic-pdf wheel) ──────────
	section("8b. paddleocr2pytorch (PyTorch OCR — bundled in magic-pdf wheel)")
	# This is the actual OCR engine for the pipeline backend.
	# It is NOT a separate pip package — it lives inside the magic-pdf wheel at
	# magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/
	# If it is missing, the entire magic-pdf package did not install correctly.
	try:
	from magic_pdf.model.sub_modules.ocr.paddleocr2pytorch.pytorch_paddle import PytorchPaddleOCR
	ok("PytorchPaddleOCR (paddleocr2pytorch)", "bundled inside magic-pdf wheel — no paddlepaddle pkg needed")
	except ImportError as exc:
	record_fail(
	"PytorchPaddleOCR import",
	f"{exc}. "
	"This module is bundled inside magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/. "
	"If missing, magic-pdf itself did not install correctly.",
	)
	except Exception as exc:
	record_fail("PytorchPaddleOCR import", f"unexpected: {exc}")

	# ── 8c. Deprecated API check ───────────────────────────────────────────────────
	section("8c. Deprecated API check (should NOT exist)")
	OBSOLETE = [
	"magic_pdf.pipe.UNIPipe",
	"magic_pdf.rw.DiskReaderWriter",
	]
	for mod_path in OBSOLETE:
	try:
	importlib.import_module(mod_path)
	record_fail(mod_path, "still importable — code may use old API", critical=False)
	except ImportError:
	ok(f"{mod_path} (correctly absent)")

	# ── 9. End-to-end pipeline smoke test ─────────────────────────────────────────
	section("9. End-to-end pipeline smoke test")
	try:
	from magic_pdf.model.doc_analyze_by_custom_model import doc_analyze # noqa: F401
	import ultralytics # noqa: F401
	from magic_pdf.data.dataset import ImageDataset # noqa: F401
	from magic_pdf.data.data_reader_writer import FileBasedDataReader, FileBasedDataWriter # noqa: F401
	ok("Pipeline imports (doc_analyze + ultralytics + ImageDataset + readers)", "all OK")
	except ImportError as exc:
	record_fail(
	"Pipeline smoke test",
	f"Full pipeline import chain failed: {exc}. "
	"This means POST /extract will fail on every request.",
	)
	except Exception as exc:
	record_fail("Pipeline smoke test", f"unexpected: {exc}")

	# ── 10. Config file ────────────────────────────────────────────────────────────
	section("10. MinerU config (magic-pdf.json)")
	_cfg: dict = {}
	if os.path.exists(CONFIG_PATH):
	try:
	with open(CONFIG_PATH) as f:
	_cfg = json.load(f)
	required_keys = ["models-dir", "device-mode"]
	missing_keys = [k for k in required_keys if k not in _cfg]
	if missing_keys:
	record_fail("Config keys", f"missing: {missing_keys}")
	else:
	ok("Config file", CONFIG_PATH)
	ok("device-mode", _cfg.get("device-mode", "?"))
	ok("models-dir", _cfg.get("models-dir", "?"))
	ok("formula-enable", str(_cfg.get("formula-config", {}).get("enable", "?")))
	ok("table-enable", str(_cfg.get("table-config", {}).get("enable", "?")))
	except json.JSONDecodeError as exc:
	record_fail("Config file", f"invalid JSON: {exc}")
	except Exception as exc:
	record_fail("Config file", str(exc))
	else:
	record_fail(
	"Config file",
	f"not found at {CONFIG_PATH}. "
	"Run download_models.py or check Docker build log.",
	)

	# ── 11. Model directory structure ─────────────────────────────────────────────
	section("11. Model directory structure")

	model_dir_checks = [
	("PDF-Extract-Kit-1.0 root", os.path.join(MODELS_DIR, "PDF-Extract-Kit-1.0")),
	("Layout models", os.path.join(EXTRACT_KIT_MODELS, "Layout")),
	("Layout/YOLO", os.path.join(EXTRACT_KIT_MODELS, "Layout", "YOLO")),
	("OCR models", os.path.join(EXTRACT_KIT_MODELS, "OCR")),
	("OCR/paddleocr_torch", os.path.join(EXTRACT_KIT_MODELS, "OCR", "paddleocr_torch")),
	("Table models (TabRec)", os.path.join(EXTRACT_KIT_MODELS, "TabRec")),
	]

	for label, path in model_dir_checks:
	if os.path.isdir(path):
	try:
	n = sum(1 for _ in os.scandir(path))
	ok(label, f"{n} entries [{path}]")
	except OSError:
	ok(label, path)
	else:
	record_fail(label, f"directory not found: {path}")

	lr_dir = os.path.join(MODELS_DIR, "layoutreader")
	if os.path.isdir(lr_dir):
	ok("layoutreader (optional)", lr_dir)
	else:
	record_fail("layoutreader (optional)",
	"not found — MinerU will use fallback ordering (non-critical)",
	critical=False)

	# ── 11b. Critical model weight files ──────────────────────────────────────────
	section("11b. Critical model weight files")
	#
	# These are the EXACT files MinerU will try to open when processing a document
	# on a CPU deployment (default language = ch → forced to ch_lite on CPU).
	#
	# After Dockerfile Layer 3.5 patch, models_config.yml now references:
	# ch_lite.det = ch_PP-OCRv5_det_infer.pth (patched from v3 — v3 NOT in repo)
	# ch_lite.rec = ch_PP-OCRv5_rec_infer.pth (unchanged — always in repo)
	#
	# Layout uses doclayout_yolo (from magic-pdf.json layout-config).
	# Table (rapid_table) uses slanet-plus.onnx BUNDLED IN THE WHEEL — not here.
	# Formula is DISABLED — MFD/MFR files not required.
	#
	# Any CRITICAL failure here = service boots but crashes on first document.

	_ocr_dir = os.path.join(EXTRACT_KIT_MODELS, "OCR", "paddleocr_torch")

	CRITICAL_WEIGHT_FILES: list[tuple[str, str, str]] = [
	# (label, relative-to-EXTRACT_KIT_MODELS, reason)
	(
	"OCR det weight (ch_lite, default CPU lang)",
	os.path.join("OCR", "paddleocr_torch", "ch_PP-OCRv5_det_infer.pth"),
	"Patched from ch_PP-OCRv3_det_infer.pth (absent in HF repo). "
	"Missing = all OCR will crash at model load time."
	),
	(
	"OCR rec weight (ch_lite)",
	os.path.join("OCR", "paddleocr_torch", "ch_PP-OCRv5_rec_infer.pth"),
	"Recognition model for ch_lite. "
	"Missing = OCR loads det but crashes at recognition."
	),
	(
	"OCR cls weight (angle classifier)",
	os.path.join("OCR", "paddleocr_torch", "ch_ptocr_mobile_v2.0_cls_infer.pth"),
	"Used when use_angle_cls=True. Default is False so non-critical, "
	"but its absence causes crash if angle classification is enabled."
	),
	(
	"Layout YOLO weight (doclayout_yolo)",
	os.path.join("Layout", "YOLO", "doclayout_yolo_docstructbench_imgsz1280_2501.pt"),
	"Layout detection model. Missing = layout detection crashes on every document."
	),
	(
	"Layout LayoutLMv3 weight",
	os.path.join("Layout", "LayoutLMv3", "model_final.pth"),
	"Alternative layout model. Required even when doclayout_yolo is primary "
	"because model_configs.yaml always lists it."
	),
	(
	"Multilingual OCR det (en/latin fallback)",
	os.path.join("OCR", "paddleocr_torch", "Multilingual_PP-OCRv3_det_infer.pth"),
	"Patched det for en and latin languages. Missing = crash if lang=en/latin."
	),
	]

	# cls weight is only critical if use_angle_cls=True (default False)
	NON_CRITICAL_LABELS = {"OCR cls weight (angle classifier)"}

	for label, rel_path, reason in CRITICAL_WEIGHT_FILES:
	full_path = os.path.join(EXTRACT_KIT_MODELS, rel_path)
	is_critical = label not in NON_CRITICAL_LABELS
	if os.path.isfile(full_path):
	size_mb = os.path.getsize(full_path) / (1024 * 1024)
	ok(label, f"{size_mb:.1f} MB [{full_path}]")
	else:
	record_fail(
	label,
	f"FILE NOT FOUND: {full_path}\n"
	f" Reason: {reason}",
	critical=is_critical,
	)

	# ── 11c. models_config.yml consistency check ──────────────────────────────────
	section("11c. models_config.yml consistency check")
	#
	# Reads the installed models_config.yml (inside magic_pdf package) and verifies
	# that every det/rec file it references for the default CPU language (ch_lite)
	# actually exists on disk in the expected location.
	#
	# This catches future version drift between the magic-pdf package and the HF repo
	# BEFORE the service starts, rather than mid-request.

	try:
	import magic_pdf
	import yaml as _yaml
	from pathlib import Path as _Path

	_pkg = _Path(magic_pdf.__file__).parent
	_mcfg = _pkg / 'model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/models_config.yml'

	if not _mcfg.exists():
	record_fail("models_config.yml", f"not found at expected path: {_mcfg}")
	else:
	with open(_mcfg) as _f:
	_mc = _yaml.safe_load(_f)

	_ocr_torch = os.path.join(EXTRACT_KIT_MODELS, "OCR", "paddleocr_torch")

	# Check the two languages actually used on this CPU deployment
	_check_langs = ["ch_lite", "ch"]
	_mc_ok = True
	for _lang in _check_langs:
	_entry = _mc.get("lang", {}).get(_lang, {})
	for _field in ("det", "rec"):
	_fname = _entry.get(_field)
	if not _fname:
	continue
	_fpath = os.path.join(_ocr_torch, _fname)
	if os.path.isfile(_fpath):
	ok(f"models_config[{_lang}].{_field}", _fname)
	else:
	record_fail(
	f"models_config[{_lang}].{_field}",
	f"Config references '{_fname}' but file not found at:\n"
	f" {_fpath}\n"
	f" Dockerfile Layer 3.5 patch may not have run, "
	f"or HF repo changed its file structure again.",
	critical=True,
	)
	_mc_ok = False

	if _mc_ok:
	ok("models_config.yml consistency", "all referenced det/rec files exist on disk")

	except Exception as _exc:
	record_fail("models_config.yml consistency check", f"unexpected error: {_exc}", critical=False)

	# ── 11d. Bundled wheel resources ──────────────────────────────────────────────
	section("11d. Bundled wheel resources (inside magic_pdf package)")
	#
	# These files are shipped inside the magic-pdf wheel itself.
	# They do NOT come from the HF download. Their absence means the wheel
	# installed incorrectly or was corrupted.

	try:
	import magic_pdf as _mp
	from pathlib import Path as _P

	_pkg_root = _P(_mp.__file__).parent
	_bundled = [
	("slanet-plus.onnx (table model)",
	_pkg_root / "resources" / "slanet_plus" / "slanet-plus.onnx"),
	("fasttext langdetect model",
	_pkg_root / "resources" / "fasttext-langdetect" / "lid.176.ftz"),
	("YOLO langdetect model",
	_pkg_root / "resources" / "yolov11-langdetect" / "yolo_v11_ft.pt"),
	("model_configs.yaml (weight path map)",
	_pkg_root / "resources" / "model_config" / "model_configs.yaml"),
	]
	for _lbl, _p in _bundled:
	if _p.exists():
	_sz = _p.stat().st_size / (1024 * 1024)
	ok(_lbl, f"{_sz:.2f} MB")
	else:
	record_fail(_lbl, f"expected inside wheel at {_p} — magic-pdf install may be corrupted")

	except Exception as _exc:
	record_fail("Bundled wheel resources check", f"unexpected: {_exc}", critical=False)

	# ── 12. Temp storage ───────────────────────────────────────────────────────────
	section("12. Temp storage")
	try:
	td = tempfile.mkdtemp(prefix="mineru_validate_")
	test_file = os.path.join(td, "write_test.bin")
	with open(test_file, "wb") as f:
	f.write(b"x" * 4096)
	assert os.path.getsize(test_file) == 4096
	shutil.rmtree(td)
	ok("Temp write + delete", tempfile.gettempdir())
	except Exception as exc:
	record_fail("Temp storage", str(exc))

	# ── 13. System memory (cgroups) ────────────────────────────────────────────────
	section("13. System memory (cgroups)")
	mem_source = "unknown"
	total_mb = used_mb = 0

	try:
	with open("/sys/fs/cgroup/memory.max") as f:
	raw = f.read().strip()
	if raw != "max":
	total_mb = int(raw) // (1024 * 1024)
	with open("/sys/fs/cgroup/memory.current") as f:
	used_mb = int(f.read().strip()) // (1024 * 1024)
	mem_source = "cgroups v2"
	except (FileNotFoundError, ValueError):
	pass

	if total_mb == 0:
	try:
	with open("/sys/fs/cgroup/memory/memory.limit_in_bytes") as f:
	limit = int(f.read().strip())
	with open("/sys/fs/cgroup/memory/memory.usage_in_bytes") as f:
	used_bytes = int(f.read().strip())
	if limit < 128 * 1024 * 1024 * 1024:
	total_mb = limit // (1024 * 1024)
	used_mb = used_bytes // (1024 * 1024)
	mem_source = "cgroups v1"
	except (FileNotFoundError, ValueError):
	pass

	if total_mb == 0:
	try:
	info: dict[str, int] = {}
	with open("/proc/meminfo") as f:
	for line in f:
	parts = line.split()
	if len(parts) >= 2:
	info[parts[0].rstrip(":")] = int(parts[1])
	total_mb = info.get("MemTotal", 0) // 1024
	used_mb = (info.get("MemTotal", 0) - info.get("MemAvailable", 0)) // 1024
	mem_source = "/proc/meminfo (may show host RAM)"
	except Exception:
	pass

	ok("Memory source", mem_source)
	ok("Total memory", f"{total_mb} MB")
	ok("Used memory", f"{used_mb} MB")
	ok("Free memory", f"{total_mb - used_mb} MB")

	if total_mb > 32 * 1024:
	record_fail(
	"Memory total",
	f"{total_mb} MB seems too large — cgroups may not be available; "
	"/proc/meminfo showing host RAM. Memory guard in main.py will be conservative.",
	critical=False,
	)

	# ── 14. /proc/meminfo sanity ───────────────────────────────────────────────────
	section("14. /proc/meminfo (reference)")
	try:
	with open("/proc/meminfo") as f:
	lines = f.readlines()[:5]
	for line in lines:
	parts = line.split()
	if len(parts) >= 2:
	kb = int(parts[1])
	ok(parts[0].rstrip(":"), f"{kb // 1024} MB")
	except Exception as exc:
	record_fail("/proc/meminfo", str(exc), critical=False)

	# ═══════════════════════════════════════════════════════════════════════════════
	# Summary
	# ═══════════════════════════════════════════════════════════════════════════════
	print("\n" + "═" * 60, flush=True)
	print(" Validation Summary", flush=True)
	print("═" * 60, flush=True)

	if warnings:
	print(f"\n ⚠ {len(warnings)} warning(s):", flush=True)
	for label, detail in warnings:
	print(f" • {label}: {detail}", flush=True)

	if failures:
	print(f"\n ✗ {len(failures)} CRITICAL failure(s):", flush=True)
	for label, detail in failures:
	print(f" • {label}: {detail}", flush=True)
	print("\n Service will NOT start until these are resolved.", flush=True)
	print(" Check Dockerfile pip layers and Docker build log.", flush=True)
	print("═" * 60 + "\n", flush=True)
	if not SOFT_MODE:
	sys.exit(1)
	else:
	print(f"\n ✓ All critical checks passed", flush=True)
	if warnings:
	print(f" ⚠ {len(warnings)} non-critical warning(s) — see above", flush=True)
	print("\n Service is ready to start.", flush=True)
	print("═" * 60 + "\n", flush=True)
	sys.exit(0)