alyrapictotext / scripts /start_app.py
uday170120's picture
Deploy Pix2Text Space
0a695d4 verified
#!/usr/bin/env python3
import json
import os
import shlex
import sys
from pathlib import Path
DEFAULT_TEXT_OCR_CONFIG = {
"det_model_name": "db_shufflenet_v2_small",
"det_model_backend": "pytorch",
"rec_model_name": "doc-densenet_lite_136-gru",
"rec_model_backend": "onnx",
}
DEFAULT_CPU_THREADS = "2"
THREAD_ENV_DEFAULTS = {
"OMP_NUM_THREADS": DEFAULT_CPU_THREADS,
"OPENBLAS_NUM_THREADS": DEFAULT_CPU_THREADS,
"MKL_NUM_THREADS": DEFAULT_CPU_THREADS,
"NUMEXPR_NUM_THREADS": DEFAULT_CPU_THREADS,
"VECLIB_MAXIMUM_THREADS": DEFAULT_CPU_THREADS,
}
def env_flag(name: str, default: bool = False) -> bool:
value = os.environ.get(name)
if value is None:
return default
return value.strip().lower() in {"1", "true", "yes", "on"}
def default_cache_root() -> Path:
explicit_root = os.environ.get("PIX2TEXT_CACHE_ROOT")
if explicit_root:
return Path(explicit_root)
persistent_root = Path("/data/pix2text-demo")
if persistent_root.parent.exists() and os.access(persistent_root.parent, os.W_OK):
return persistent_root
return Path("/tmp/pix2text-demo")
def configure_runtime_dirs() -> None:
cache_root = default_cache_root()
dir_map = {
"HF_HOME": cache_root / "huggingface",
"XDG_CACHE_HOME": cache_root / "xdg-cache",
"MPLCONFIGDIR": cache_root / "matplotlib",
"YOLO_CONFIG_DIR": cache_root / "ultralytics",
"PIX2TEXT_HOME": cache_root / "pix2text",
"CNOCR_HOME": cache_root / "cnocr",
"CNSTD_HOME": cache_root / "cnstd",
}
for env_name, path in dir_map.items():
os.environ.setdefault(env_name, str(path))
Path(os.environ[env_name]).mkdir(parents=True, exist_ok=True)
output_root = Path(os.environ.get("P2T_OUTPUT_MD_ROOT_DIR", "/tmp/pix2text-output"))
output_root.mkdir(parents=True, exist_ok=True)
def configure_cpu_threads() -> None:
for env_name, value in THREAD_ENV_DEFAULTS.items():
os.environ.setdefault(env_name, value)
def build_ui_command(host: str, port: str) -> list[str]:
os.environ.setdefault("STREAMLIT_BROWSER_GATHER_USAGE_STATS", "false")
os.environ.setdefault("STREAMLIT_SERVER_HEADLESS", "true")
os.environ.setdefault("STREAMLIT_SERVER_ENABLE_XSRF_PROTECTION", "false")
return [
sys.executable,
"-m",
"streamlit",
"run",
"pix2text/app.py",
"--server.address",
host,
"--server.port",
port,
"--server.enableXsrfProtection=false",
]
def build_api_command(host: str, port: str) -> list[str]:
cmd = [
sys.executable,
"-m",
"pix2text.cli",
"serve",
"-l",
os.environ.get("P2T_LANGUAGES", "en"),
"--text-ocr-config",
os.environ.get(
"P2T_TEXT_OCR_CONFIG",
json.dumps(DEFAULT_TEXT_OCR_CONFIG, separators=(",", ":")),
),
"--device",
os.environ.get("P2T_DEVICE", "cpu"),
"-o",
os.environ.get("P2T_OUTPUT_MD_ROOT_DIR", "/tmp/pix2text-output"),
"-H",
host,
"-p",
port,
"--log-level",
os.environ.get("P2T_LOG_LEVEL", "INFO"),
]
optional_json_args = {
"P2T_LAYOUT_CONFIG": "--layout-config",
"P2T_MFD_CONFIG": "--mfd-config",
"P2T_FORMULA_OCR_CONFIG": "--formula-ocr-config",
}
for env_name, cli_flag in optional_json_args.items():
value = os.environ.get(env_name)
if value:
cmd.extend([cli_flag, value])
cmd.append("--enable-formula" if env_flag("P2T_ENABLE_FORMULA", False) else "--disable-formula")
cmd.append("--enable-table" if env_flag("P2T_ENABLE_TABLE", False) else "--disable-table")
if env_flag("P2T_RELOAD", False):
cmd.append("--reload")
return cmd
def main() -> int:
configure_runtime_dirs()
configure_cpu_threads()
app_mode = os.environ.get("P2T_APP_MODE", "ui").strip().lower()
host = os.environ.get("HOST", os.environ.get("P2T_HOST", "0.0.0.0"))
port = os.environ.get("PORT") or os.environ.get("P2T_PORT") or "7860"
if app_mode == "ui":
cmd = build_ui_command(host, port)
elif app_mode == "api":
cmd = build_api_command(host, port)
else:
raise SystemExit(
f"Unsupported P2T_APP_MODE={app_mode!r}. Use 'ui' or 'api'."
)
if env_flag("P2T_DRY_RUN", False):
print(" ".join(shlex.quote(part) for part in cmd))
return 0
os.execvpe(cmd[0], cmd, os.environ)
return 0
if __name__ == "__main__":
raise SystemExit(main())