#!/usr/bin/env python3 import json import os import shlex import sys from pathlib import Path DEFAULT_TEXT_OCR_CONFIG = { "det_model_name": "db_shufflenet_v2_small", "det_model_backend": "pytorch", "rec_model_name": "doc-densenet_lite_136-gru", "rec_model_backend": "onnx", } DEFAULT_CPU_THREADS = "2" THREAD_ENV_DEFAULTS = { "OMP_NUM_THREADS": DEFAULT_CPU_THREADS, "OPENBLAS_NUM_THREADS": DEFAULT_CPU_THREADS, "MKL_NUM_THREADS": DEFAULT_CPU_THREADS, "NUMEXPR_NUM_THREADS": DEFAULT_CPU_THREADS, "VECLIB_MAXIMUM_THREADS": DEFAULT_CPU_THREADS, } def env_flag(name: str, default: bool = False) -> bool: value = os.environ.get(name) if value is None: return default return value.strip().lower() in {"1", "true", "yes", "on"} def default_cache_root() -> Path: explicit_root = os.environ.get("PIX2TEXT_CACHE_ROOT") if explicit_root: return Path(explicit_root) persistent_root = Path("/data/pix2text-demo") if persistent_root.parent.exists() and os.access(persistent_root.parent, os.W_OK): return persistent_root return Path("/tmp/pix2text-demo") def configure_runtime_dirs() -> None: cache_root = default_cache_root() dir_map = { "HF_HOME": cache_root / "huggingface", "XDG_CACHE_HOME": cache_root / "xdg-cache", "MPLCONFIGDIR": cache_root / "matplotlib", "YOLO_CONFIG_DIR": cache_root / "ultralytics", "PIX2TEXT_HOME": cache_root / "pix2text", "CNOCR_HOME": cache_root / "cnocr", "CNSTD_HOME": cache_root / "cnstd", } for env_name, path in dir_map.items(): os.environ.setdefault(env_name, str(path)) Path(os.environ[env_name]).mkdir(parents=True, exist_ok=True) output_root = Path(os.environ.get("P2T_OUTPUT_MD_ROOT_DIR", "/tmp/pix2text-output")) output_root.mkdir(parents=True, exist_ok=True) def configure_cpu_threads() -> None: for env_name, value in THREAD_ENV_DEFAULTS.items(): os.environ.setdefault(env_name, value) def build_ui_command(host: str, port: str) -> list[str]: os.environ.setdefault("STREAMLIT_BROWSER_GATHER_USAGE_STATS", "false") os.environ.setdefault("STREAMLIT_SERVER_HEADLESS", "true") os.environ.setdefault("STREAMLIT_SERVER_ENABLE_XSRF_PROTECTION", "false") return [ sys.executable, "-m", "streamlit", "run", "pix2text/app.py", "--server.address", host, "--server.port", port, "--server.enableXsrfProtection=false", ] def build_api_command(host: str, port: str) -> list[str]: cmd = [ sys.executable, "-m", "pix2text.cli", "serve", "-l", os.environ.get("P2T_LANGUAGES", "en"), "--text-ocr-config", os.environ.get( "P2T_TEXT_OCR_CONFIG", json.dumps(DEFAULT_TEXT_OCR_CONFIG, separators=(",", ":")), ), "--device", os.environ.get("P2T_DEVICE", "cpu"), "-o", os.environ.get("P2T_OUTPUT_MD_ROOT_DIR", "/tmp/pix2text-output"), "-H", host, "-p", port, "--log-level", os.environ.get("P2T_LOG_LEVEL", "INFO"), ] optional_json_args = { "P2T_LAYOUT_CONFIG": "--layout-config", "P2T_MFD_CONFIG": "--mfd-config", "P2T_FORMULA_OCR_CONFIG": "--formula-ocr-config", } for env_name, cli_flag in optional_json_args.items(): value = os.environ.get(env_name) if value: cmd.extend([cli_flag, value]) cmd.append("--enable-formula" if env_flag("P2T_ENABLE_FORMULA", False) else "--disable-formula") cmd.append("--enable-table" if env_flag("P2T_ENABLE_TABLE", False) else "--disable-table") if env_flag("P2T_RELOAD", False): cmd.append("--reload") return cmd def main() -> int: configure_runtime_dirs() configure_cpu_threads() app_mode = os.environ.get("P2T_APP_MODE", "ui").strip().lower() host = os.environ.get("HOST", os.environ.get("P2T_HOST", "0.0.0.0")) port = os.environ.get("PORT") or os.environ.get("P2T_PORT") or "7860" if app_mode == "ui": cmd = build_ui_command(host, port) elif app_mode == "api": cmd = build_api_command(host, port) else: raise SystemExit( f"Unsupported P2T_APP_MODE={app_mode!r}. Use 'ui' or 'api'." ) if env_flag("P2T_DRY_RUN", False): print(" ".join(shlex.quote(part) for part in cmd)) return 0 os.execvpe(cmd[0], cmd, os.environ) return 0 if __name__ == "__main__": raise SystemExit(main())