#!/bin/bash # Start the Docling + Gemini hybrid parser API. # # UVICORN_WORKERS: number of worker processes (default 2). # Each worker loads its own copy of the Docling model, so don't set this # higher than RAM allows. On T4 Small (15GB RAM), 2 is a safe default. # Set UVICORN_WORKERS=1 to revert to single-process mode. exec uvicorn app:app \ --host 0.0.0.0 \ --port 7860 \ --workers "${UVICORN_WORKERS:-2}"