CoreReader / modal_app.py
shreyas-joshi's picture
Fix WS recv race + session recycle 20 with async overlap + Modal deploy
91dd53b
import os
import subprocess
import sys
from pathlib import Path
import modal
APP_NAME = "corereader-backend"
MODEL_VOLUME_NAME = os.getenv("MODEL_VOLUME_NAME", "corereader-models")
# --- Image ---
# Keep it simple: CPU-only. Kokoro ONNX + ORT CPU should run fine on Modal CPU.
image = (
modal.Image.debian_slim(python_version="3.12")
.apt_install("curl", "ca-certificates")
.pip_install(
"fastapi>=0.128.0",
"uvicorn[standard]>=0.30.0",
"aiohttp>=3.9.5",
"beautifulsoup4>=4.12.3",
"lxml>=5.2.2",
"numpy>=1.26.0",
"onnxruntime>=1.20.0",
"kokoro-onnx>=0.2.6",
"requests>=2.32.0",
"soundfile>=0.12.0",
)
.env(
{
# ORT threading: match the reserved CPU cores by default.
"ORT_INTRA_OP_THREADS": os.getenv("ORT_INTRA_OP_THREADS", "3"),
"ORT_INTER_OP_THREADS": os.getenv("ORT_INTER_OP_THREADS", "1"),
"ORT_ALLOW_SPINNING": os.getenv("ORT_ALLOW_SPINNING", "1"),
}
)
# Copy backend source code into the container image (keep this last).
.add_local_dir("backend", remote_path="/app/backend")
)
app = modal.App(APP_NAME)
models_volume = modal.Volume.from_name(MODEL_VOLUME_NAME, create_if_missing=True)
def _ensure_models() -> None:
backend_dir = Path("/app/backend")
models_dir = backend_dir / "models"
model_path = models_dir / "kokoro-v1.0.onnx"
voices_path = models_dir / "voices-v1.0.bin"
if model_path.exists() and voices_path.exists():
return
models_dir.mkdir(parents=True, exist_ok=True)
subprocess.run(
[sys.executable, str(backend_dir / "download_models.py")],
cwd=str(backend_dir),
check=True,
)
@app.function(
image=image,
cpu=3.0,
memory=4096,
timeout=60 * 60,
volumes={"/app/backend/models": models_volume},
)
@modal.concurrent(max_inputs=20)
@modal.asgi_app()
def fastapi_app():
# Match Docker behavior: run with backend directory as CWD so relative
# paths and download scripts behave consistently.
os.chdir("/app/backend")
sys.path.insert(0, "/app/backend")
# Download model/voices on cold start if missing (cached in Volume).
_ensure_models()
# Import the FastAPI app.
from server import app as web_app # type: ignore
return web_app