Spaces:
Sleeping
Sleeping
File size: 899 Bytes
395651c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 | """Celery tasks for OCR-only worker queue (`ocr`)."""
from __future__ import annotations
import asyncio
import logging
from worker.celery_app import celery_app
logger = logging.getLogger(__name__)
@celery_app.task(name="worker.ocr_tasks.run_ocr_from_url")
def run_ocr_from_url(image_url: str) -> str:
"""
Download image from public URL and run OCR models only (YOLO / PaddleOCR / Pix2Tex).
LLM post-processing runs on the API via ``OCRAgent.refine_with_llm`` after the result is returned.
"""
from vision_ocr.pipeline import OcrVisionPipeline
pipeline = OcrVisionPipeline()
logger.info("[run_ocr_from_url] starting OCR for url host=%s", image_url.split("/")[2] if "/" in image_url else "?")
text = asyncio.run(pipeline.process_url(image_url))
logger.info("[run_ocr_from_url] done, text_len=%s", len(text or ""))
return text if text is not None else ""
|