File size: 2,121 Bytes
ed37502
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
"""Job routing service — decides whether to run generation locally or in the cloud.

For v1 (single GPU, no cloud accounts), everything routes locally.
The router checks ComfyUI's queue depth and VRAM before dispatching.
"""

from __future__ import annotations

import logging
from enum import Enum

from content_engine.config import settings
from content_engine.services.comfyui_client import ComfyUIClient

logger = logging.getLogger(__name__)


class Backend(str, Enum):
    LOCAL = "local"
    CLOUD = "cloud"


class RouterService:
    """Routes generation jobs to local GPU or cloud APIs."""

    def __init__(self, comfyui_client: ComfyUIClient):
        self.comfyui = comfyui_client
        self.max_queue_depth = settings.comfyui.max_local_queue_depth
        self.min_vram_gb = settings.comfyui.min_vram_gb

    async def route(self, estimated_vram_gb: float = 4.0) -> Backend:
        """Decide where to run a generation job.

        Returns Backend.LOCAL if the local GPU is available,
        Backend.CLOUD if it should be offloaded.
        """
        # v1: No cloud providers configured — always local
        if not settings.cloud_providers:
            return Backend.LOCAL

        # Check if ComfyUI is reachable
        if not await self.comfyui.is_available():
            logger.warning("ComfyUI unavailable, routing to cloud")
            return Backend.CLOUD

        # Check queue depth
        queue_depth = await self.comfyui.get_queue_depth()
        if queue_depth >= self.max_queue_depth:
            logger.info(
                "Local queue full (%d/%d), routing to cloud",
                queue_depth,
                self.max_queue_depth,
            )
            return Backend.CLOUD

        # Check VRAM
        vram_free = await self.comfyui.get_vram_free_gb()
        if vram_free is not None and vram_free < self.min_vram_gb:
            logger.info(
                "Insufficient VRAM (%.1f GB free, need %.1f GB), routing to cloud",
                vram_free,
                self.min_vram_gb,
            )
            return Backend.CLOUD

        return Backend.LOCAL