File size: 2,351 Bytes
91dd53b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import os
import subprocess
import sys
from pathlib import Path

import modal

APP_NAME = "corereader-backend"
MODEL_VOLUME_NAME = os.getenv("MODEL_VOLUME_NAME", "corereader-models")

# --- Image ---
# Keep it simple: CPU-only. Kokoro ONNX + ORT CPU should run fine on Modal CPU.
image = (
    modal.Image.debian_slim(python_version="3.12")
    .apt_install("curl", "ca-certificates")
    .pip_install(
        "fastapi>=0.128.0",
        "uvicorn[standard]>=0.30.0",
        "aiohttp>=3.9.5",
        "beautifulsoup4>=4.12.3",
        "lxml>=5.2.2",
        "numpy>=1.26.0",
        "onnxruntime>=1.20.0",
        "kokoro-onnx>=0.2.6",
        "requests>=2.32.0",
        "soundfile>=0.12.0",
    )
    .env(
        {
            # ORT threading: match the reserved CPU cores by default.
            "ORT_INTRA_OP_THREADS": os.getenv("ORT_INTRA_OP_THREADS", "3"),
            "ORT_INTER_OP_THREADS": os.getenv("ORT_INTER_OP_THREADS", "1"),
            "ORT_ALLOW_SPINNING": os.getenv("ORT_ALLOW_SPINNING", "1"),
        }
    )
    # Copy backend source code into the container image (keep this last).
    .add_local_dir("backend", remote_path="/app/backend")
)

app = modal.App(APP_NAME)
models_volume = modal.Volume.from_name(MODEL_VOLUME_NAME, create_if_missing=True)


def _ensure_models() -> None:
    backend_dir = Path("/app/backend")
    models_dir = backend_dir / "models"
    model_path = models_dir / "kokoro-v1.0.onnx"
    voices_path = models_dir / "voices-v1.0.bin"

    if model_path.exists() and voices_path.exists():
        return

    models_dir.mkdir(parents=True, exist_ok=True)
    subprocess.run(
        [sys.executable, str(backend_dir / "download_models.py")],
        cwd=str(backend_dir),
        check=True,
    )


@app.function(
    image=image,
    cpu=3.0,
    memory=4096,
    timeout=60 * 60,
    volumes={"/app/backend/models": models_volume},
)
@modal.concurrent(max_inputs=20)
@modal.asgi_app()
def fastapi_app():
    # Match Docker behavior: run with backend directory as CWD so relative
    # paths and download scripts behave consistently.
    os.chdir("/app/backend")
    sys.path.insert(0, "/app/backend")

    # Download model/voices on cold start if missing (cached in Volume).
    _ensure_models()

    # Import the FastAPI app.
    from server import app as web_app  # type: ignore

    return web_app