File size: 7,126 Bytes
60d4850
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
"""Background task queue for model inference offloading.

Phase 8: Provides async task queue for long-running inference tasks.
Uses ARQ (async Redis queue) when Redis is available, otherwise falls back
to asyncio.create_task for single-process operation.

Set ``TASK_QUEUE_ENABLED=true`` and ``TASK_QUEUE_BROKER_URL`` to enable.
"""

import asyncio
import logging
import time
import uuid
from dataclasses import dataclass, field
from enum import Enum
from typing import Any, Callable, Coroutine, Dict, Optional

from app.config import settings

logger = logging.getLogger(__name__)


class TaskStatus(str, Enum):
    PENDING = "pending"
    RUNNING = "running"
    COMPLETED = "completed"
    FAILED = "failed"


@dataclass
class TaskResult:
    task_id: str
    status: TaskStatus
    result: Any = None
    error: Optional[str] = None
    created_at: float = field(default_factory=time.time)
    completed_at: Optional[float] = None
    duration_seconds: Optional[float] = None


class InMemoryTaskQueue:
    """Asyncio-based task queue for single-process deployments."""

    def __init__(self, max_concurrent: int = 2, max_queue_size: int = 20):
        self._semaphore = asyncio.Semaphore(max_concurrent)
        self._results: Dict[str, TaskResult] = {}
        self._max_queue_size = max_queue_size
        self._active_count = 0

    async def enqueue(
        self,
        func: Callable[..., Coroutine],
        *args: Any,
        task_id: Optional[str] = None,
        **kwargs: Any,
    ) -> str:
        """Submit a coroutine for background execution."""
        task_id = task_id or str(uuid.uuid4())

        if len(self._results) >= self._max_queue_size:
            # Prune completed tasks to make room
            completed = [
                k for k, v in self._results.items()
                if v.status in (TaskStatus.COMPLETED, TaskStatus.FAILED)
            ]
            for k in completed[:len(completed) // 2]:
                del self._results[k]

        if len(self._results) >= self._max_queue_size:
            raise RuntimeError("Task queue is full")

        self._results[task_id] = TaskResult(
            task_id=task_id, status=TaskStatus.PENDING
        )
        asyncio.create_task(self._run(task_id, func, *args, **kwargs))
        return task_id

    async def _run(
        self,
        task_id: str,
        func: Callable[..., Coroutine],
        *args: Any,
        **kwargs: Any,
    ) -> None:
        async with self._semaphore:
            self._active_count += 1
            result = self._results[task_id]
            result.status = TaskStatus.RUNNING
            start = time.time()
            try:
                result.result = await func(*args, **kwargs)
                result.status = TaskStatus.COMPLETED
            except Exception as e:
                result.status = TaskStatus.FAILED
                result.error = str(e)
                logger.error("Task %s failed: %s", task_id, e)
            finally:
                result.completed_at = time.time()
                result.duration_seconds = result.completed_at - start
                self._active_count -= 1

    def get_result(self, task_id: str) -> Optional[TaskResult]:
        return self._results.get(task_id)

    @property
    def active_tasks(self) -> int:
        return self._active_count

    @property
    def pending_tasks(self) -> int:
        return sum(
            1 for r in self._results.values() if r.status == TaskStatus.PENDING
        )

    def stats(self) -> Dict[str, Any]:
        return {
            "active": self._active_count,
            "pending": self.pending_tasks,
            "total_tracked": len(self._results),
            "backend": "asyncio",
        }


class ARQTaskQueue:
    """ARQ (async Redis queue) backend for distributed workers."""

    def __init__(self, broker_url: str, max_concurrent: int = 2):
        self._broker_url = broker_url
        self._max_concurrent = max_concurrent
        self._pool = None
        self._results: Dict[str, TaskResult] = {}

    async def _connect(self):
        if self._pool is not None:
            return
        try:
            import redis.asyncio as aioredis
            self._pool = aioredis.from_url(self._broker_url)
            await self._pool.ping()
            logger.info("ARQ task queue connected: %s", self._broker_url.split("@")[-1])
        except Exception as e:
            logger.warning("ARQ connection failed, tasks will run in-process: %s", e)
            self._pool = None

    async def enqueue(
        self,
        func: Callable[..., Coroutine],
        *args: Any,
        task_id: Optional[str] = None,
        **kwargs: Any,
    ) -> str:
        """Enqueue task. Falls back to in-process if Redis unavailable."""
        task_id = task_id or str(uuid.uuid4())
        await self._connect()

        # For now, run in-process with tracking (ARQ worker integration
        # would serialize func name and dispatch to worker process)
        self._results[task_id] = TaskResult(
            task_id=task_id, status=TaskStatus.PENDING
        )
        asyncio.create_task(self._run_local(task_id, func, *args, **kwargs))
        return task_id

    async def _run_local(
        self,
        task_id: str,
        func: Callable[..., Coroutine],
        *args: Any,
        **kwargs: Any,
    ) -> None:
        result = self._results[task_id]
        result.status = TaskStatus.RUNNING
        start = time.time()
        try:
            result.result = await func(*args, **kwargs)
            result.status = TaskStatus.COMPLETED
        except Exception as e:
            result.status = TaskStatus.FAILED
            result.error = str(e)
        finally:
            result.completed_at = time.time()
            result.duration_seconds = result.completed_at - start

    def get_result(self, task_id: str) -> Optional[TaskResult]:
        return self._results.get(task_id)

    def stats(self) -> Dict[str, Any]:
        return {
            "active": sum(1 for r in self._results.values() if r.status == TaskStatus.RUNNING),
            "pending": sum(1 for r in self._results.values() if r.status == TaskStatus.PENDING),
            "total_tracked": len(self._results),
            "backend": "arq" if self._pool else "asyncio-fallback",
        }


# ---------------------------------------------------------------------------
# Factory
# ---------------------------------------------------------------------------

_task_queue = None


def get_task_queue() -> InMemoryTaskQueue:
    """Get or create the global task queue."""
    global _task_queue
    if _task_queue is None:
        if settings.task_queue_enabled and settings.task_queue_broker_url:
            _task_queue = ARQTaskQueue(
                broker_url=settings.task_queue_broker_url,
                max_concurrent=settings.queue_max_concurrent_inferences,
            )
        else:
            _task_queue = InMemoryTaskQueue(
                max_concurrent=settings.queue_max_concurrent_inferences,
                max_queue_size=settings.queue_max_size,
            )
    return _task_queue