Spaces:
Sleeping
Sleeping
File size: 7,650 Bytes
4b62d23 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 | """
Task queue worker for ABSA processing.
Processes jobs from Redis queue asynchronously.
"""
import logging
import uuid
import threading
import time
from typing import Dict, Any, Optional
from datetime import datetime, timezone
from .redis_service import get_redis_service
from .mongodb_service import get_mongodb_service
logger = logging.getLogger(__name__)
class TaskQueue:
"""Task queue for asynchronous ABSA processing."""
def __init__(self, data_processor=None):
"""
Initialize task queue.
Args:
data_processor: DataProcessor instance for ABSA processing
"""
self.redis_service = get_redis_service()
self.mongodb_service = get_mongodb_service()
self.data_processor = data_processor
self._worker_thread: Optional[threading.Thread] = None
self._stop_worker = threading.Event()
def generate_job_id(self) -> str:
"""Generate unique job ID."""
return f"job_{uuid.uuid4().hex[:12]}"
def submit_job(
self,
data: Dict[str, Any],
device_id: str,
user_id: Optional[str] = None
) -> str:
"""
Submit ABSA job to queue.
Args:
data: Job data (e.g., CSV data, parameters)
device_id: Device identifier
user_id: Optional user identifier
Returns:
Job ID
"""
job_id = self.generate_job_id()
# Log ANALYSIS_REQUEST event
self.mongodb_service.log_event(
event_type="ANALYSIS_REQUEST",
device_id=device_id,
user_id=user_id,
metadata={"job_id": job_id}
)
# Enqueue task
task_data = {
"device_id": device_id,
"user_id": user_id,
"data": data
}
success = self.redis_service.enqueue_task(job_id, task_data)
if success:
# Log TASK_QUEUED event
self.mongodb_service.log_event(
event_type="TASK_QUEUED",
device_id=device_id,
user_id=user_id,
metadata={"job_id": job_id}
)
logger.info(f"Job {job_id} submitted successfully")
else:
logger.error(f"Failed to submit job {job_id}")
return job_id
def get_job_status(self, job_id: str) -> Optional[str]:
"""
Get job status.
Args:
job_id: Job identifier
Returns:
Status string (PENDING, RUNNING, DONE, FAILED) or None
"""
return self.redis_service.get_task_status(job_id)
def get_job_result(self, job_id: str) -> Optional[Dict[str, Any]]:
"""
Get job result if completed.
Args:
job_id: Job identifier
Returns:
Result data or None
"""
status = self.get_job_status(job_id)
if status == "DONE":
return self.redis_service.get_task_result(job_id)
return None
def _process_task(self, task: Dict[str, Any]) -> bool:
"""
Process a single task.
Args:
task: Task payload from queue
Returns:
True if successful, False otherwise
"""
job_id = task["job_id"]
task_data = task["data"]
device_id = task_data.get("device_id")
user_id = task_data.get("user_id")
logger.info(f"Processing task {job_id}")
try:
# Update status to RUNNING
self.redis_service.set_task_status(job_id, "RUNNING")
# Process ABSA (placeholder - actual processing depends on data_processor)
if self.data_processor:
# Example: process CSV data
csv_data = task_data["data"].get("csv_data")
if csv_data:
# Process with DataProcessor
result = self.data_processor.process_data(csv_data)
else:
result = {"status": "error", "message": "No CSV data provided"}
else:
# Simulate processing
time.sleep(2)
result = {
"status": "success",
"message": "Processing completed",
"processed_at": datetime.now(timezone.utc).isoformat()
}
# Store result
self.redis_service.set_task_result(job_id, result)
# Update status to DONE
self.redis_service.set_task_status(job_id, "DONE")
# Log TASK_COMPLETED event
self.mongodb_service.log_event(
event_type="TASK_COMPLETED",
device_id=device_id,
user_id=user_id,
metadata={"job_id": job_id, "success": True}
)
logger.info(f"Task {job_id} completed successfully")
return True
except Exception as e:
logger.error(f"Task {job_id} failed: {str(e)}")
# Update status to FAILED
self.redis_service.set_task_status(job_id, "FAILED")
# Store error result
error_result = {
"status": "error",
"message": str(e),
"failed_at": datetime.now(timezone.utc).isoformat()
}
self.redis_service.set_task_result(job_id, error_result)
# Log TASK_COMPLETED event with failure
self.mongodb_service.log_event(
event_type="TASK_COMPLETED",
device_id=device_id,
user_id=user_id,
metadata={"job_id": job_id, "success": False, "error": str(e)}
)
return False
def _worker_loop(self):
"""Worker loop to process tasks from queue."""
logger.info("Task worker started")
while not self._stop_worker.is_set():
try:
# Get next task (blocking with 1 second timeout)
task = self.redis_service.dequeue_task(timeout=1)
if task:
self._process_task(task)
except Exception as e:
logger.error(f"Worker error: {str(e)}")
time.sleep(1)
logger.info("Task worker stopped")
def start_worker(self):
"""Start background worker thread."""
if self._worker_thread and self._worker_thread.is_alive():
logger.warning("Worker already running")
return
self._stop_worker.clear()
self._worker_thread = threading.Thread(target=self._worker_loop, daemon=True)
self._worker_thread.start()
logger.info("Task worker thread started")
def stop_worker(self):
"""Stop background worker thread."""
if self._worker_thread and self._worker_thread.is_alive():
logger.info("Stopping task worker...")
self._stop_worker.set()
self._worker_thread.join(timeout=5)
logger.info("Task worker stopped")
# Global task queue instance
_task_queue = None
def get_task_queue(data_processor=None) -> TaskQueue:
"""Get task queue singleton."""
global _task_queue
if _task_queue is None:
_task_queue = TaskQueue(data_processor)
return _task_queue
|