Spaces:

outcomelabs
/

docling-parser

Running on T4

App Files Files Community

sidoutcome commited on Mar 13

Commit

53b94dc

1 Parent(s): c67903b

feat: v3.1.0 - DPI 150, parallel rendering, VLM retry, quality fixes

Browse files

Files changed (1) hide show

app.py +385 -351

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 """
-Docling VLM Parser API v3.0.0
 A FastAPI service using a VLM-FIRST hybrid architecture for document parsing:
   Pass 1 (GPU):  Qwen3-VL via vLLM — concurrent OCR on ALL pages (fast)
@@ -7,17 +7,15 @@ A FastAPI service using a VLM-FIRST hybrid architecture for document parsing:
   Pass 2 (CPU):  Docling TableFormer ONLY on table pages (targeted, minimal)
   Merge:         VLM text for all pages + TableFormer tables where detected
-Key insight: the previous architecture ran Docling's full CPU pipeline (DocLayNet +
-TableFormer + RapidOCR) on ALL pages, taking 60-565s. Most of that time was wasted
-on non-table pages. Now we run the fast GPU VLM first, detect which pages have tables,
-and only send those pages (as a mini-PDF) to Docling for table structure extraction.
-Features:
-- VLM-first: GPU-accelerated OCR on all pages via Qwen3-VL (concurrent)
-- Targeted TableFormer: CPU pipeline runs only on pages with tables
-- pypdf mini-PDF extraction for page-level Docling targeting
-- OpenCV image preprocessing (denoise, CLAHE contrast enhancement)
-- Image extraction with configurable resolution
 """
 import asyncio
@@ -97,6 +95,9 @@ VLM_PORT = os.getenv("VLM_PORT", "8000")
 IMAGES_SCALE = float(os.getenv("IMAGES_SCALE", "2.0"))
 MAX_FILE_SIZE_MB = int(os.getenv("MAX_FILE_SIZE_MB", "1024"))
 MAX_FILE_SIZE_BYTES = MAX_FILE_SIZE_MB * 1024 * 1024
 # Blocked hostnames for SSRF protection
 BLOCKED_HOSTNAMES = {
@@ -203,7 +204,7 @@ class ParseResponse(BaseModel):
     success: bool
     markdown: Optional[str] = None
     json_content: Optional[Union[dict, list]] = None
-    images_zip: Optional[str] = None  # Base64-encoded zip file containing all images
     image_count: int = 0
     error: Optional[str] = None
     pages_processed: int = 0
@@ -229,29 +230,27 @@ class URLParseRequest(BaseModel):
     url: str
     output_format: str = "markdown"
     images_scale: Optional[float] = None
-    start_page: int = 0  # Starting page (0-indexed)
-    end_page: Optional[int] = None  # Ending page (None = all pages)
     include_images: bool = False
 # ---------------------------------------------------------------------------
-# OpenCV Image Preprocessing
 # ---------------------------------------------------------------------------
 def _preprocess_image_for_ocr(image_path: str) -> str:
     """Enhance image quality for better OCR accuracy.
-    Applies: deskew correction, denoising, CLAHE contrast enhancement.
-    Returns the path to the preprocessed image (same path, overwritten).
     """
     img = cv2.imread(image_path)
     if img is None:
         return image_path
-    # Denoise
-    img = cv2.fastNlMeansDenoisingColored(img, None, 10, 10, 7, 21)
     # CLAHE contrast enhancement on L channel
     lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
     l, a, b = cv2.split(lab)
@@ -265,243 +264,350 @@ def _preprocess_image_for_ocr(image_path: str) -> str:
 # ---------------------------------------------------------------------------
-# VLM OCR (Pass 2)
 # ---------------------------------------------------------------------------
-def _vlm_ocr_page(page_image_bytes: bytes) -> str:
-    """Send a page image to Qwen3-VL via vLLM for text extraction.
-    Args:
-        page_image_bytes: PNG image bytes of the page
-    Returns:
-        Extracted markdown text from the page
     """
     b64_image = base64.b64encode(page_image_bytes).decode("utf-8")
-    response = httpx.post(
-        f"http://{VLM_HOST}:{VLM_PORT}/v1/chat/completions",
-        json={
-            "model": VLM_MODEL,
-            "messages": [
-                {
-                    "role": "user",
-                    "content": [
-                        {
-                            "type": "image_url",
-                            "image_url": {"url": f"data:image/png;base64,{b64_image}"},
-                        },
-                        {
-                            "type": "text",
-                            "text": (
-                                "OCR this document page to markdown. "
-                                "Extract ALL text exactly as written, preserving headings, lists, and paragraphs. "
-                                "For tables, output them as markdown tables. "
-                                "For handwritten text, transcribe as accurately as possible. "
-                                "Return ONLY the extracted content, no explanations."
-                            ),
-                        },
-                    ],
-                }
-            ],
-            "max_tokens": 16384,
-            "temperature": 0.1,
-        },
-        timeout=120.0,
-    )
-    if response.status_code != 200:
-        try:
-            err = response.json()
-            msg = err.get("message", err.get("detail", str(err)[:300]))
-        except Exception:
-            msg = response.text[:300]
-        logger.error(f"vLLM error ({response.status_code}): {msg}")
-    response.raise_for_status()
-    result = response.json()
-    choices = result.get("choices")
-    if not choices:
-        raise ValueError(f"vLLM returned no choices")
-    content = choices[0].get("message", {}).get("content")
-    if content is None:
-        raise ValueError(f"vLLM response missing content")
-    return content
-# ---------------------------------------------------------------------------
-# Table Extraction Helper
-# ---------------------------------------------------------------------------
-def _extract_table_markdowns(doc) -> dict:
-    """Extract table markdown from Docling document, keyed by page number."""
-    tables_by_page: dict[int, list[str]] = {}
-    for element, _ in doc.iterate_items():
-        if isinstance(element, TableItem):
-            page_no = element.prov[0].page_no if element.prov else -1
-            table_md = element.export_to_markdown(doc=doc)
-            if page_no not in tables_by_page:
-                tables_by_page[page_no] = []
-            tables_by_page[page_no].append(table_md)
-    return tables_by_page
 # ---------------------------------------------------------------------------
-# Merge: VLM Text + TableFormer Tables
 # ---------------------------------------------------------------------------
-def _merge_vlm_with_tables(vlm_text: str, table_markdowns: list) -> str:
-    """Replace VLM's table sections with TableFormer's more accurate tables.
-    Detects markdown table patterns (lines with |...|) in VLM output
-    and replaces them with TableFormer output.
-    """
-    if not table_markdowns:
-        return vlm_text
-    # Pattern: consecutive lines that look like markdown tables
-    # A markdown table has lines starting and ending with |
-    table_pattern = re.compile(r"((?:^\|[^\n]+\|$\n?)+)", re.MULTILINE)
-    vlm_table_count = len(table_pattern.findall(vlm_text))
-    if vlm_table_count != len(table_markdowns):
-        logger.warning(
-            f"Table count mismatch: VLM={vlm_table_count}, TableFormer={len(table_markdowns)}. "
-            f"Positional replacement may be imprecise."
-        )
-    table_idx = 0
-    def replace_table(match):
-        nonlocal table_idx
-        if table_idx < len(table_markdowns):
-            replacement = table_markdowns[table_idx]
-            table_idx += 1
-            return replacement.strip() + "\n"
-        return match.group(0)
-    result = table_pattern.sub(replace_table, vlm_text)
-    # If there are remaining TableFormer tables not matched, append them
-    while table_idx < len(table_markdowns):
-        result += "\n\n" + table_markdowns[table_idx].strip() + "\n"
-        table_idx += 1
-    return result
-# ---------------------------------------------------------------------------
-# Table Detection from VLM Output
-# ---------------------------------------------------------------------------
 def _detect_table_pages(vlm_page_texts: dict[int, Optional[str]]) -> set[int]:
     """Detect pages containing tables from VLM markdown output.
-    Looks for markdown table separator rows (e.g., | --- | --- |) which are
-    a reliable signal of table content. Returns set of 0-indexed page numbers.
     """
-    # Markdown table separator: | --- | --- | (with optional colons for alignment)
-    separator_pattern = re.compile(r"^\|[\s\-:]+(?:\|[\s\-:]+)+\|?\s*$", re.MULTILINE)
     table_pages: set[int] = set()
     for page_no, text in vlm_page_texts.items():
-        if text and separator_pattern.search(text):
             table_pages.add(page_no)
     return table_pages
 def _extract_pages_to_pdf(
     input_path: Path, page_numbers: list[int], request_id: str
 ) -> tuple[Path, dict[int, int]]:
-    """Extract specific pages from a PDF into a mini-PDF.
     Args:
         input_path: Path to the original PDF
         page_numbers: 0-indexed page numbers to extract
-        request_id: For logging
     Returns:
-        (mini_pdf_path, page_map) where page_map maps Docling 1-indexed pages
-        in the mini-PDF back to 0-indexed original page numbers.
     """
     from pypdf import PdfReader, PdfWriter
     reader = PdfReader(str(input_path))
     writer = PdfWriter()
-    # page_map: {docling_1indexed_mini_page: original_0indexed_page}
     page_map: dict[int, int] = {}
-    for idx, orig_page in enumerate(page_numbers):
         if orig_page < len(reader.pages):
             writer.add_page(reader.pages[orig_page])
             page_map[idx + 1] = orig_page  # Docling uses 1-indexed pages
     mini_pdf_path = input_path.parent / f"table_pages_{request_id}.pdf"
     with open(mini_pdf_path, "wb") as f:
         writer.write(f)
-    logger.info(f"[{request_id}] Created mini-PDF: {len(page_map)} table pages from original")
     return mini_pdf_path, page_map
 # ---------------------------------------------------------------------------
-# PDF to Page Images
 # ---------------------------------------------------------------------------
-def _pdf_to_page_images(
-    input_path: Path, start_page: int = 0, end_page: Optional[int] = None
-) -> list:
-    """Convert PDF pages to PNG image bytes using pdf2image.
-    Processes one page at a time to avoid loading all pages into memory.
-    Returns list of (page_no, png_bytes) tuples.
     """
-    page_images: list[tuple[int, bytes]] = []
     try:
-        # Determine total page count first
         from pdf2image.pdf2image import pdfinfo_from_path
         info = pdfinfo_from_path(str(input_path))
         total_pages = info["Pages"]
         last_page = min(end_page + 1, total_pages) if end_page is not None else total_pages
-        for i in range(start_page, last_page):
-            # Convert one page at a time (pdf2image is 1-indexed)
-            images = convert_from_path(
-                str(input_path), dpi=300, first_page=i + 1, last_page=i + 1
-            )
-            if not images:
-                continue
-            img = images[0]
-            # Save to temp file for OpenCV preprocessing
-            with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
-                tmp_path = tmp.name
-                img.save(tmp_path, format="PNG")
-            try:
-                _preprocess_image_for_ocr(tmp_path)
-                with open(tmp_path, "rb") as f:
-                    page_images.append((i, f.read()))
-            finally:
-                os.unlink(tmp_path)
     except Exception as e:
-        # Fallback: log warning — caller handles empty list
-        logger.warning(f"pdf2image failed, VLM OCR may be limited: {e}")
     return page_images
 # ---------------------------------------------------------------------------
-# Docling Converter (Pass 1)
 # ---------------------------------------------------------------------------
 def _create_converter(images_scale: float = 2.0) -> DocumentConverter:
     """Create a Docling converter with Standard Pipeline.
-    Uses DocLayNet (layout) + TableFormer ACCURATE (tables) + RapidOCR (baseline text).
     """
     device = _get_device()
     logger.info(f"Creating converter with device: {device}")
@@ -512,15 +618,11 @@ def _create_converter(images_scale: float = 2.0) -> DocumentConverter:
     pipeline_options.table_structure_options.mode = TableFormerMode.ACCURATE
     pipeline_options.table_structure_options.do_cell_matching = True
-    # Use RapidOCR as baseline (VLM will enhance text in pass 2)
     pipeline_options.ocr_options = RapidOcrOptions()
     pipeline_options.ocr_options.force_full_page_ocr = True
-    # Enable page image generation (needed for VLM pass)
     pipeline_options.generate_page_images = True
     pipeline_options.images_scale = images_scale
-    # Also enable picture image extraction
     pipeline_options.generate_picture_images = True
     pipeline_options.accelerator_options = AcceleratorOptions(
@@ -548,7 +650,7 @@ def _get_converter() -> DocumentConverter:
 # ---------------------------------------------------------------------------
-# Hybrid Conversion (Pass 1 + Pass 2 + Merge)
 # ---------------------------------------------------------------------------
@@ -562,51 +664,42 @@ def _convert_document(
     end_page: Optional[int] = None,
 ) -> tuple:
     """
-    VLM-first hybrid conversion: Qwen3-VL for text + targeted TableFormer for tables.
-    Pass 1 (GPU):  VLM OCR on ALL pages — fast concurrent processing
-    Detect:        Identify pages with tables from VLM markdown output
-    Pass 2 (CPU):  Docling TableFormer ONLY on table pages — minimal CPU work
-    Merge:         VLM text + TableFormer tables
     Returns: (markdown_content, json_content, pages_processed, image_count)
     """
-    total_start = time.time()
-    # --- RENDER: Convert PDF pages to images ---
-    render_start = time.time()
-    page_images = _pdf_to_page_images(input_path, start_page, end_page)
-    render_time = time.time() - render_start
-    logger.info(
-        f"[{request_id}] Rendered {len(page_images)} pages in {render_time:.2f}s"
-    )
     if not page_images:
-        logger.warning(
-            f"[{request_id}] No page images available, falling back to full Docling pipeline"
-        )
         return _convert_document_full_docling(
             input_path, output_dir, images_scale, include_images, request_id
         )
-    # --- PASS 1 (GPU): VLM OCR on all pages ---
     logger.info(f"[{request_id}] Pass 1: VLM OCR via Qwen3-VL ({VLM_MODEL})")
     vlm_page_texts: dict[int, Optional[str]] = {}
     vlm_start = time.time()
-    max_workers = min(2, len(page_images))
-    logger.info(
-        f"[{request_id}] Sending {len(page_images)} pages to VLM ({max_workers} concurrent)"
-    )
-    with ThreadPoolExecutor(max_workers=max_workers) as pool:
-        futures = {
-            pool.submit(_vlm_ocr_page, page_bytes): page_no
             for page_no, page_bytes in page_images
         }
-        for future in as_completed(futures):
-            page_no = futures[future]
             try:
                 vlm_text = future.result()
                 vlm_page_texts[page_no] = vlm_text
@@ -614,18 +707,15 @@ def _convert_document(
                     f"[{request_id}] VLM processed page {page_no + 1} ({len(vlm_text)} chars)"
                 )
             except Exception as e:
-                logger.warning(
-                    f"[{request_id}] VLM failed on page {page_no + 1}: {e}"
-                )
                 vlm_page_texts[page_no] = None
     vlm_time = time.time() - vlm_start
-    logger.info(
-        f"[{request_id}] Pass 1 completed in {vlm_time:.2f}s ({len(vlm_page_texts)} pages)"
-    )
-    # --- DETECT: Find pages with tables in VLM output ---
     table_pages = _detect_table_pages(vlm_page_texts)
     if table_pages:
         logger.info(
             f"[{request_id}] Tables detected on {len(table_pages)} pages: "
@@ -634,90 +724,57 @@ def _convert_document(
     else:
         logger.info(f"[{request_id}] No tables detected — skipping Docling entirely")
-    # --- PASS 2 (CPU): Docling TableFormer ONLY on table pages ---
     tables_by_page: dict[int, list[str]] = {}
-    pass2_time = 0.0
-    image_count = 0
-    image_dir = output_dir / "images"
     if table_pages:
-        pass2_start = time.time()
         logger.info(
             f"[{request_id}] Pass 2: Docling TableFormer on {len(table_pages)} table pages"
         )
         try:
-            # Create mini-PDF containing only table pages
             mini_pdf_path, page_map = _extract_pages_to_pdf(
                 input_path, sorted(table_pages), request_id
             )
-            # Run Docling on mini-PDF (full pipeline for accurate table cell text)
             converter = _get_converter()
             result = converter.convert(mini_pdf_path)
             doc = result.document
-            if doc:
-                # Extract tables, mapping mini-PDF pages back to original page numbers
-                for element, _ in doc.iterate_items():
-                    if isinstance(element, TableItem):
-                        mini_page = element.prov[0].page_no if element.prov else -1
-                        orig_page = page_map.get(mini_page, mini_page)
-                        table_md = element.export_to_markdown(doc=doc)
-                        tables_by_page.setdefault(orig_page, []).append(table_md)
-                # Extract images from Docling if requested
-                if include_images:
-                    image_dir.mkdir(parents=True, exist_ok=True)
-                    for element, _ in doc.iterate_items():
-                        if isinstance(element, PictureItem):
-                            if element.image and element.image.pil_image:
-                                pg = element.prov[0].page_no if element.prov else 0
-                                orig_pg = page_map.get(pg, pg)
-                                image_id = element.self_ref.split("/")[-1]
-                                image_name = f"page_{orig_pg + 1}_{image_id}.png"
-                                image_name = re.sub(r'[\\/*?:"<>|]', "", image_name)
-                                image_path = image_dir / image_name
-                                try:
-                                    element.image.pil_image.save(image_path, format="PNG")
-                                    image_count += 1
-                                except Exception as e:
-                                    logger.warning(
-                                        f"[{request_id}] Failed to save image: {e}"
-                                    )
             # Clean up mini-PDF
-            try:
-                os.unlink(mini_pdf_path)
-            except OSError:
-                pass
-            pass2_time = time.time() - pass2_start
-            total_tables = sum(len(v) for v in tables_by_page.values())
-            logger.info(
-                f"[{request_id}] Pass 2 completed in {pass2_time:.2f}s — "
-                f"{total_tables} TableFormer tables extracted"
-            )
         except Exception as e:
-            pass2_time = time.time() - pass2_start
-            logger.warning(
-                f"[{request_id}] TableFormer pass failed ({e}), using VLM tables only"
-            )
-    # --- MERGE: VLM text + TableFormer tables ---
     md_parts: list[str] = []
-    pages_seen: set[int] = set()
     for page_no in sorted(vlm_page_texts.keys()):
-        pages_seen.add(page_no)
         md_parts.append(f"\n\n<!-- Page {page_no + 1} -->\n\n")
         vlm_text = vlm_page_texts[page_no]
         if vlm_text is None:
-            md_parts.append(f"<!-- VLM failed on this page -->\n")
         else:
             page_tables = tables_by_page.get(page_no, [])
             if page_tables:
                 merged = _merge_vlm_with_tables(vlm_text, page_tables)
@@ -725,14 +782,39 @@ def _convert_document(
             else:
                 md_parts.append(vlm_text)
     markdown_content = "".join(md_parts)
-    pages_processed = len(pages_seen)
-    total_time = time.time() - total_start
     logger.info(
         f"[{request_id}] VLM-first conversion complete: {pages_processed} pages — "
         f"render {render_time:.1f}s + VLM {vlm_time:.1f}s + "
-        f"TableFormer {pass2_time:.1f}s = {total_time:.2f}s total"
     )
     if pages_processed > 0:
         logger.info(f"[{request_id}] Speed: {pages_processed / total_time:.2f} pages/sec")
@@ -747,19 +829,18 @@ def _convert_document_full_docling(
     include_images: bool,
     request_id: str,
 ) -> tuple:
-    """Fallback: Full Docling pipeline when page images are unavailable."""
-    logger.info(f"[{request_id}] Running full Docling pipeline (fallback mode)")
     converter = _get_converter()
     start_time = time.time()
     result = converter.convert(input_path)
     doc = result.document
     if doc is None:
         raise ValueError("Docling failed to parse document")
     elapsed = time.time() - start_time
-    logger.info(f"[{request_id}] Docling completed in {elapsed:.2f}s")
     markdown_content = doc.export_to_markdown()
     pages_processed = len(
@@ -773,9 +854,9 @@ def _convert_document_full_docling(
         for element, _ in doc.iterate_items():
             if isinstance(element, PictureItem):
                 if element.image and element.image.pil_image:
-                    page_no = element.prov[0].page_no if element.prov else 0
                     image_id = element.self_ref.split("/")[-1]
-                    image_name = f"page_{page_no + 1}_{image_id}.png"
                     image_name = re.sub(r'[\\/*?:"<>|]', "", image_name)
                     image_path = image_dir / image_name
                     try:
@@ -826,7 +907,7 @@ def _create_images_zip(output_dir: Path) -> tuple[Optional[str], int]:
 async def lifespan(app: FastAPI):
     """Startup: initialize Docling converter and check vLLM."""
     logger.info("=" * 60)
-    logger.info("Starting Docling VLM Parser API v3.0.0...")
     device = _get_device()
     logger.info(f"Device: {device}")
@@ -835,11 +916,13 @@ async def lifespan(app: FastAPI):
         logger.info(f"GPU: {torch.cuda.get_device_name(0)}")
         logger.info(f"CUDA Version: {torch.version.cuda}")
         logger.info(
-            f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB"
         )
     logger.info(f"VLM Model: {VLM_MODEL}")
     logger.info(f"VLM Endpoint: http://{VLM_HOST}:{VLM_PORT}")
     logger.info(f"Images scale: {IMAGES_SCALE}")
     logger.info(f"Max file size: {MAX_FILE_SIZE_MB}MB")
@@ -875,8 +958,8 @@ async def lifespan(app: FastAPI):
 app = FastAPI(
     title="Docling VLM Parser API",
-    description="VLM-first hybrid parser: Qwen3-VL OCR (GPU) + targeted TableFormer (CPU)",
-    version="3.0.0",
     lifespan=lifespan,
 )
@@ -890,11 +973,7 @@ app = FastAPI(
 async def health_check() -> HealthResponse:
     """Health check endpoint."""
     device = _get_device()
-    gpu_name = None
-    if device == "cuda":
-        gpu_name = torch.cuda.get_device_name(0)
-    # Check vLLM status (async to avoid blocking event loop)
     vlm_status = "unknown"
     try:
         async with httpx.AsyncClient(timeout=5) as client:
@@ -905,10 +984,10 @@ async def health_check() -> HealthResponse:
     return HealthResponse(
         status="healthy",
-        version="3.0.0",
         device=device,
-        gpu_name=None,  # Don't leak GPU details on unauthenticated endpoint
-        vlm_model="active",  # Confirm VLM is configured without leaking model name
         vlm_status=vlm_status,
         images_scale=IMAGES_SCALE,
     )
@@ -918,25 +997,13 @@ async def health_check() -> HealthResponse:
 async def parse_document(
     file: UploadFile = File(..., description="PDF or image file to parse"),
     output_format: str = Form(default="markdown", description="Output format: markdown or json"),
-    images_scale: Optional[float] = Form(default=None, description="Image resolution scale (default: 2.0)"),
     start_page: int = Form(default=0, description="Starting page (0-indexed)"),
     end_page: Optional[int] = Form(default=None, description="Ending page (None = all pages)"),
-    include_images: bool = Form(default=False, description="Include extracted images in response"),
     _token: str = Depends(verify_token),
 ) -> ParseResponse:
-    """
-    Parse a document file (PDF or image) and return extracted content.
-    Uses a VLM-first hybrid approach:
-      Pass 1 (GPU): Qwen3-VL via vLLM for OCR on all pages (concurrent)
-      Detect: Identify pages with tables from VLM output
-      Pass 2 (CPU): Docling TableFormer only on table pages
-      Merge: VLM text + TableFormer tables
-    Supports:
-    - PDF files (.pdf)
-    - Images (.png, .jpg, .jpeg, .tiff, .bmp)
-    """
     request_id = str(uuid4())[:8]
     start_time = time.time()
@@ -949,7 +1016,7 @@ async def parse_document(
     if output_format not in ("markdown",):
         raise HTTPException(
             status_code=400,
-            detail="Only 'markdown' output_format is supported in v3.0.0",
         )
     # Validate file size
@@ -961,7 +1028,6 @@ async def parse_document(
     logger.info(f"[{request_id}] File size: {file_size_mb:.2f} MB")
     if file_size > MAX_FILE_SIZE_BYTES:
-        logger.error(f"[{request_id}] File too large: {file_size_mb:.2f} MB > {MAX_FILE_SIZE_MB} MB")
         raise HTTPException(
             status_code=413,
             detail=f"File size exceeds maximum allowed size of {MAX_FILE_SIZE_MB}MB",
@@ -971,32 +1037,25 @@ async def parse_document(
     allowed_extensions = {".pdf", ".png", ".jpg", ".jpeg", ".tiff", ".bmp"}
     file_ext = Path(file.filename).suffix.lower() if file.filename else ""
     if file_ext not in allowed_extensions:
-        logger.error(f"[{request_id}] Unsupported file type: {file_ext}")
         raise HTTPException(
             status_code=400,
             detail=f"Unsupported file type. Allowed: {', '.join(allowed_extensions)}",
         )
-    # Use defaults if not specified
     use_images_scale = images_scale if images_scale is not None else IMAGES_SCALE
     logger.info(f"[{request_id}] Images scale: {use_images_scale}, VLM: {VLM_MODEL}")
     logger.info(f"[{request_id}] Page range: {start_page} to {end_page or 'end'}")
     temp_dir = tempfile.mkdtemp()
-    logger.debug(f"[{request_id}] Created temp directory: {temp_dir}")
     try:
-        # Save uploaded file
         input_path = Path(temp_dir) / f"input{file_ext}"
         await asyncio.to_thread(_save_uploaded_file, input_path, file.file)
-        logger.debug(f"[{request_id}] Saved file to: {input_path}")
-        # Create output directory
         output_dir = Path(temp_dir) / "output"
         output_dir.mkdir(exist_ok=True)
-        # Convert document (hybrid two-pass)
         markdown_content, json_content, pages_processed, image_count = await asyncio.to_thread(
             _convert_document,
             input_path,
@@ -1008,11 +1067,9 @@ async def parse_document(
             end_page,
         )
-        # Create images zip if requested
         images_zip = None
         if include_images and image_count > 0:
             images_zip, image_count = _create_images_zip(output_dir)
-            logger.info(f"[{request_id}] Created images zip with {image_count} images")
         total_duration = time.time() - start_time
         logger.info(f"[{request_id}] {'='*50}")
@@ -1046,7 +1103,6 @@ async def parse_document(
         )
     finally:
         shutil.rmtree(temp_dir, ignore_errors=True)
-        logger.debug(f"[{request_id}] Cleaned up temp directory")
 @app.post("/parse/url", response_model=ParseResponse)
@@ -1054,14 +1110,7 @@ async def parse_document_from_url(
     request: URLParseRequest,
     _token: str = Depends(verify_token),
 ) -> ParseResponse:
-    """
-    Parse a document from a URL.
-    Downloads the file and processes it through the hybrid two-pass pipeline:
-      Pass 1: Docling Standard Pipeline (DocLayNet + TableFormer + RapidOCR)
-      Pass 2: Qwen3-VL via vLLM for enhanced text recognition
-      Merge: TableFormer tables preserved, VLM text replaces RapidOCR text
-    """
     request_id = str(uuid4())[:8]
     start_time = time.time()
@@ -1073,16 +1122,12 @@ async def parse_document_from_url(
     if request.output_format not in ("markdown",):
         raise HTTPException(
             status_code=400,
-            detail="Only 'markdown' output_format is supported in v3.0.0",
         )
-    # Validate URL
-    logger.info(f"[{request_id}] Validating URL...")
     _validate_url(request.url)
-    logger.info(f"[{request_id}] URL validation passed")
     temp_dir = tempfile.mkdtemp()
-    logger.debug(f"[{request_id}] Created temp directory: {temp_dir}")
     try:
         # Download file
@@ -1091,19 +1136,18 @@ async def parse_document_from_url(
         async with httpx.AsyncClient(timeout=60.0, follow_redirects=True) as client:
             response = await client.get(request.url)
             response.raise_for_status()
-        download_duration = time.time() - download_start
         file_size_mb = len(response.content) / (1024 * 1024)
-        logger.info(f"[{request_id}] Download completed in {download_duration:.2f}s")
-        logger.info(f"[{request_id}] File size: {file_size_mb:.2f} MB")
-        # Determine file extension from URL path, Content-Type header, or default to .pdf
-        allowed_extensions = {".pdf", ".png", ".jpg", ".jpeg", ".tiff", ".bmp"}
         url_path = Path(request.url.split("?")[0])
         file_ext = url_path.suffix.lower()
-        if file_ext not in allowed_extensions:
-            # Try Content-Type header
             content_type = response.headers.get("content-type", "").lower()
             ct_map = {
                 "application/pdf": ".pdf",
@@ -1113,33 +1157,26 @@ async def parse_document_from_url(
                 "image/bmp": ".bmp",
             }
             file_ext = next((v for k, v in ct_map.items() if k in content_type), ".pdf")
-            logger.info(f"[{request_id}] URL suffix not recognized, using: {file_ext} (from content-type: {content_type})")
         if len(response.content) > MAX_FILE_SIZE_BYTES:
-            logger.error(
-                f"[{request_id}] File too large: {file_size_mb:.2f} MB > {MAX_FILE_SIZE_MB} MB"
-            )
             raise HTTPException(
                 status_code=413,
                 detail=f"File size exceeds maximum allowed size of {MAX_FILE_SIZE_MB}MB",
             )
-        # Save downloaded file
         input_path = Path(temp_dir) / f"input{file_ext}"
         await asyncio.to_thread(_save_downloaded_content, input_path, response.content)
-        logger.debug(f"[{request_id}] Saved file to: {input_path}")
-        # Create output directory
         output_dir = Path(temp_dir) / "output"
         output_dir.mkdir(exist_ok=True)
-        # Use defaults if not specified
         use_images_scale = request.images_scale if request.images_scale is not None else IMAGES_SCALE
         logger.info(f"[{request_id}] Images scale: {use_images_scale}, VLM: {VLM_MODEL}")
-        logger.info(f"[{request_id}] Page range: {request.start_page} to {request.end_page or 'end'}")
-        # Convert document (hybrid two-pass)
         markdown_content, json_content, pages_processed, image_count = await asyncio.to_thread(
             _convert_document,
             input_path,
@@ -1151,11 +1188,9 @@ async def parse_document_from_url(
             request.end_page,
         )
-        # Create images zip if requested
         images_zip = None
         if request.include_images and image_count > 0:
             images_zip, image_count = _create_images_zip(output_dir)
-            logger.info(f"[{request_id}] Created images zip with {image_count} images")
         total_duration = time.time() - start_time
         logger.info(f"[{request_id}] {'='*50}")
@@ -1196,7 +1231,6 @@ async def parse_document_from_url(
         )
     finally:
         shutil.rmtree(temp_dir, ignore_errors=True)
-        logger.debug(f"[{request_id}] Cleaned up temp directory")
 if __name__ == "__main__":

 """
+Docling VLM Parser API v3.1.0
 A FastAPI service using a VLM-FIRST hybrid architecture for document parsing:
   Pass 1 (GPU):  Qwen3-VL via vLLM — concurrent OCR on ALL pages (fast)
   Pass 2 (CPU):  Docling TableFormer ONLY on table pages (targeted, minimal)
   Merge:         VLM text for all pages + TableFormer tables where detected
+v3.1.0 fixes over v3.0.0:
+  - Quality: VLM prompt enforces markdown tables (no LaTeX), strips <think> tokens
+  - Quality: VLM retry on timeout/failure (1 retry with longer timeout)
+  - Quality: Table detection catches both markdown and LaTeX table patterns
+  - Quality: Proper page_map translation for mini-PDF → original page numbers
+  - Speed: DPI 200 (from 300) — sufficient for VLM, 55% fewer pixels
+  - Speed: Dropped fastNlMeansDenoisingColored (saves ~10s/page), kept only CLAHE
+  - Speed: Parallel page rendering via ThreadPoolExecutor
+  - Speed: Increased VLM concurrency from 2 to 4 workers
 """
 import asyncio
 IMAGES_SCALE = float(os.getenv("IMAGES_SCALE", "2.0"))
 MAX_FILE_SIZE_MB = int(os.getenv("MAX_FILE_SIZE_MB", "1024"))
 MAX_FILE_SIZE_BYTES = MAX_FILE_SIZE_MB * 1024 * 1024
+VLM_TIMEOUT = float(os.getenv("VLM_TIMEOUT", "300"))
+VLM_CONCURRENCY = int(os.getenv("VLM_CONCURRENCY", "4"))
+RENDER_DPI = int(os.getenv("RENDER_DPI", "150"))
 # Blocked hostnames for SSRF protection
 BLOCKED_HOSTNAMES = {
     success: bool
     markdown: Optional[str] = None
     json_content: Optional[Union[dict, list]] = None
+    images_zip: Optional[str] = None
     image_count: int = 0
     error: Optional[str] = None
     pages_processed: int = 0
     url: str
     output_format: str = "markdown"
     images_scale: Optional[float] = None
+    start_page: int = 0
+    end_page: Optional[int] = None
     include_images: bool = False
 # ---------------------------------------------------------------------------
+# OpenCV Image Preprocessing (CLAHE only — fast)
 # ---------------------------------------------------------------------------
 def _preprocess_image_for_ocr(image_path: str) -> str:
     """Enhance image quality for better OCR accuracy.
+    Applies CLAHE contrast enhancement only (fast).
+    Denoising was removed in v3.1.0 — it added ~10s/page with minimal
+    benefit for VLM-based OCR which handles noise well.
     """
     img = cv2.imread(image_path)
     if img is None:
         return image_path
     # CLAHE contrast enhancement on L channel
     lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
     l, a, b = cv2.split(lab)
 # ---------------------------------------------------------------------------
+# VLM OCR with retry
 # ---------------------------------------------------------------------------
+# Strip Qwen3 <think>...</think> reasoning blocks
+_THINK_PATTERN = re.compile(r"<think>.*?</think>\s*", re.DOTALL)
+def _vlm_ocr_page(page_image_bytes: bytes, request_id: str = "", page_no: int = 0) -> str:
+    """Send a page image to Qwen3-VL via vLLM for text extraction.
+    Includes retry logic: on timeout/failure, retries once with longer timeout.
+    Strips <think> reasoning tokens from Qwen3 output.
     """
     b64_image = base64.b64encode(page_image_bytes).decode("utf-8")
+    payload = {
+        "model": VLM_MODEL,
+        "messages": [
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "image_url",
+                        "image_url": {"url": f"data:image/png;base64,{b64_image}"},
+                    },
+                    {
+                        "type": "text",
+                        "text": (
+                            "OCR this document page to markdown. "
+                            "Extract ALL text exactly as written, preserving headings, lists, and paragraphs. "
+                            "For tables, output them as MARKDOWN tables using | delimiters and --- separator rows. "
+                            "NEVER use LaTeX tabular format. ALWAYS use markdown pipe tables. "
+                            "For handwritten text, transcribe as accurately as possible. "
+                            "Return ONLY the extracted content, no explanations or commentary."
+                        ),
+                    },
+                ],
+            }
+        ],
+        "max_tokens": 16384,
+        "temperature": 0.1,
+    }
+    url = f"http://{VLM_HOST}:{VLM_PORT}/v1/chat/completions"
+    # Try with primary timeout, then retry once with extended timeout
+    for attempt, timeout in enumerate([VLM_TIMEOUT, VLM_TIMEOUT * 1.5], start=1):
+        try:
+            response = httpx.post(url, json=payload, timeout=timeout)
+            if response.status_code != 200:
+                try:
+                    err = response.json()
+                    msg = err.get("message", err.get("detail", str(err)[:300]))
+                except Exception:
+                    msg = response.text[:300]
+                logger.error(f"[{request_id}] vLLM error ({response.status_code}) page {page_no}: {msg}")
+                if attempt == 1:
+                    logger.info(f"[{request_id}] Retrying page {page_no}...")
+                    continue
+                response.raise_for_status()
+            result = response.json()
+            choices = result.get("choices")
+            if not choices:
+                raise ValueError("vLLM returned no choices")
+            content = choices[0].get("message", {}).get("content")
+            if content is None:
+                raise ValueError("vLLM response missing content")
+            # Strip <think>...</think> reasoning blocks from Qwen3
+            content = _THINK_PATTERN.sub("", content).strip()
+            return content
+        except (httpx.TimeoutException, httpx.ConnectError) as e:
+            if attempt == 1:
+                logger.warning(
+                    f"[{request_id}] VLM attempt {attempt} failed on page {page_no}: {e}. Retrying..."
+                )
+                continue
+            raise
+    raise RuntimeError(f"VLM failed after 2 attempts on page {page_no}")
 # ---------------------------------------------------------------------------
+# Table Detection from VLM Output
 # ---------------------------------------------------------------------------
+# Markdown table separator: | --- | --- | or |:---:|---:|
+_MD_TABLE_SEPARATOR = re.compile(
+    r"^\|[\s\-:]+(?:\|[\s\-:]+)+\|?\s*$", re.MULTILINE
+)
+# LaTeX table markers (fallback if VLM ignores markdown instruction)
+_LATEX_TABLE_PATTERN = re.compile(r"\\begin\{tabular\}")
 def _detect_table_pages(vlm_page_texts: dict[int, Optional[str]]) -> set[int]:
     """Detect pages containing tables from VLM markdown output.
+    Checks for both markdown table separators and LaTeX tabular markers.
     """
     table_pages: set[int] = set()
     for page_no, text in vlm_page_texts.items():
+        if text and (
+            _MD_TABLE_SEPARATOR.search(text) or _LATEX_TABLE_PATTERN.search(text)
+        ):
             table_pages.add(page_no)
     return table_pages
+# ---------------------------------------------------------------------------
+# Mini-PDF Extraction (pypdf)
+# ---------------------------------------------------------------------------
 def _extract_pages_to_pdf(
     input_path: Path, page_numbers: list[int], request_id: str
 ) -> tuple[Path, dict[int, int]]:
+    """Extract specific pages from a PDF into a mini-PDF using pypdf.
     Args:
         input_path: Path to the original PDF
         page_numbers: 0-indexed page numbers to extract
+        request_id: Request ID for logging
     Returns:
+        (mini_pdf_path, page_map) where page_map maps Docling 1-indexed
+        page numbers in the mini-PDF back to 0-indexed original page numbers.
     """
     from pypdf import PdfReader, PdfWriter
     reader = PdfReader(str(input_path))
     writer = PdfWriter()
+    # page_map: {docling_page_no (1-indexed in mini-PDF) → original_page_no (0-indexed)}
     page_map: dict[int, int] = {}
+    for idx, orig_page in enumerate(sorted(page_numbers)):
         if orig_page < len(reader.pages):
             writer.add_page(reader.pages[orig_page])
             page_map[idx + 1] = orig_page  # Docling uses 1-indexed pages
+        else:
+            logger.warning(
+                f"[{request_id}] Page {orig_page} out of range (total: {len(reader.pages)})"
+            )
     mini_pdf_path = input_path.parent / f"table_pages_{request_id}.pdf"
     with open(mini_pdf_path, "wb") as f:
         writer.write(f)
+    logger.info(
+        f"[{request_id}] Created mini-PDF: {len(page_map)} table pages from original"
+    )
     return mini_pdf_path, page_map
 # ---------------------------------------------------------------------------
+# Table Extraction from Docling
 # ---------------------------------------------------------------------------
+def _extract_table_markdowns(doc, page_map: dict[int, int]) -> dict[int, list[str]]:
+    """Extract table markdown from Docling document, keyed by ORIGINAL page number.
+    Uses page_map to translate from Docling's 1-indexed mini-PDF pages
+    back to the original 0-indexed page numbers.
     """
+    tables_by_page: dict[int, list[str]] = {}
+    for element, _ in doc.iterate_items():
+        if isinstance(element, TableItem):
+            docling_page = element.prov[0].page_no if element.prov else -1
+            # Translate mini-PDF page → original page
+            orig_page = page_map.get(docling_page, docling_page - 1)
+            table_md = element.export_to_markdown(doc=doc)
+            if orig_page not in tables_by_page:
+                tables_by_page[orig_page] = []
+            tables_by_page[orig_page].append(table_md)
+    return tables_by_page
+# ---------------------------------------------------------------------------
+# Merge: VLM Text + TableFormer Tables
+# ---------------------------------------------------------------------------
+# Consecutive lines with | delimiters (markdown tables)
+_VLM_TABLE_BLOCK = re.compile(r"((?:^\|[^\n]+\|$\n?)+)", re.MULTILINE)
+# LaTeX table blocks
+_VLM_LATEX_BLOCK = re.compile(
+    r"(\\begin\{tabular\}.*?\\end\{tabular\})", re.DOTALL
+)
+def _merge_vlm_with_tables(vlm_text: str, table_markdowns: list[str]) -> str:
+    """Replace VLM's table sections with TableFormer's more accurate tables.
+    Handles both markdown pipe tables and LaTeX tabular blocks in VLM output.
+    """
+    if not table_markdowns:
+        return vlm_text
+    # Find all table blocks (markdown first, then LaTeX)
+    md_tables = list(_VLM_TABLE_BLOCK.finditer(vlm_text))
+    latex_tables = list(_VLM_LATEX_BLOCK.finditer(vlm_text))
+    # Combine and sort all table positions
+    all_tables = [(m.start(), m.end(), "md") for m in md_tables]
+    all_tables += [(m.start(), m.end(), "latex") for m in latex_tables]
+    all_tables.sort(key=lambda x: x[0])
+    # Remove overlapping matches (prefer earlier match)
+    filtered: list[tuple[int, int, str]] = []
+    last_end = -1
+    for start, end, kind in all_tables:
+        if start >= last_end:
+            filtered.append((start, end, kind))
+            last_end = end
+    vlm_table_count = len(filtered)
+    tf_table_count = len(table_markdowns)
+    if vlm_table_count != tf_table_count:
+        logger.warning(
+            f"Table count mismatch: VLM={vlm_table_count}, TableFormer={tf_table_count}. "
+            f"Using positional replacement for min({vlm_table_count}, {tf_table_count}) tables."
+        )
+    # Replace VLM tables with TableFormer tables (positional)
+    result_parts: list[str] = []
+    prev_end = 0
+    table_idx = 0
+    for start, end, kind in filtered:
+        result_parts.append(vlm_text[prev_end:start])
+        if table_idx < tf_table_count:
+            result_parts.append(table_markdowns[table_idx].strip() + "\n")
+            table_idx += 1
+        else:
+            # More VLM tables than TableFormer — keep VLM version
+            result_parts.append(vlm_text[start:end])
+        prev_end = end
+    result_parts.append(vlm_text[prev_end:])
+    # If there are remaining TableFormer tables not matched, append them
+    while table_idx < tf_table_count:
+        result_parts.append("\n\n" + table_markdowns[table_idx].strip() + "\n")
+        table_idx += 1
+    return "".join(result_parts)
+# ---------------------------------------------------------------------------
+# PDF to Page Images (parallel, optimized)
+# ---------------------------------------------------------------------------
+def _render_single_page(
+    input_path: Path, page_idx: int, dpi: int
+) -> tuple[int, Optional[bytes]]:
+    """Render a single PDF page to PNG bytes with CLAHE preprocessing.
+    Returns (page_idx, png_bytes) or (page_idx, None) on failure.
+    """
+    try:
+        images = convert_from_path(
+            str(input_path), dpi=dpi, first_page=page_idx + 1, last_page=page_idx + 1
+        )
+        if not images:
+            return page_idx, None
+        img = images[0]
+        with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
+            tmp_path = tmp.name
+            img.save(tmp_path, format="PNG")
+        try:
+            _preprocess_image_for_ocr(tmp_path)
+            with open(tmp_path, "rb") as f:
+                return page_idx, f.read()
+        finally:
+            os.unlink(tmp_path)
+    except Exception as e:
+        logger.warning(f"Failed to render page {page_idx + 1}: {e}")
+        return page_idx, None
+def _pdf_to_page_images(
+    input_path: Path,
+    request_id: str,
+    start_page: int = 0,
+    end_page: Optional[int] = None,
+) -> list[tuple[int, bytes]]:
+    """Convert PDF pages to PNG image bytes using parallel rendering.
+    Uses ThreadPoolExecutor for concurrent page rendering.
+    Returns list of (page_no, png_bytes) tuples, sorted by page number.
+    """
     try:
         from pdf2image.pdf2image import pdfinfo_from_path
         info = pdfinfo_from_path(str(input_path))
         total_pages = info["Pages"]
         last_page = min(end_page + 1, total_pages) if end_page is not None else total_pages
     except Exception as e:
+        logger.warning(f"[{request_id}] Could not get PDF info: {e}")
+        return []
+    page_indices = list(range(start_page, last_page))
+    start_time = time.time()
+    page_images: list[tuple[int, bytes]] = []
+    # Render pages in parallel (4 threads — I/O bound, not CPU bound for poppler)
+    with ThreadPoolExecutor(max_workers=4) as executor:
+        futures = {
+            executor.submit(_render_single_page, input_path, idx, RENDER_DPI): idx
+            for idx in page_indices
+        }
+        for future in as_completed(futures):
+            page_idx, png_bytes = future.result()
+            if png_bytes is not None:
+                page_images.append((page_idx, png_bytes))
+    page_images.sort(key=lambda x: x[0])
+    render_time = time.time() - start_time
+    logger.info(
+        f"[{request_id}] Rendered {len(page_images)} pages in {render_time:.2f}s "
+        f"({render_time / max(len(page_images), 1):.1f}s/page, DPI={RENDER_DPI})"
+    )
     return page_images
 # ---------------------------------------------------------------------------
+# Docling Converter (for TableFormer only)
 # ---------------------------------------------------------------------------
 def _create_converter(images_scale: float = 2.0) -> DocumentConverter:
     """Create a Docling converter with Standard Pipeline.
+    Used ONLY for TableFormer on table pages (not for full document OCR).
     """
     device = _get_device()
     logger.info(f"Creating converter with device: {device}")
     pipeline_options.table_structure_options.mode = TableFormerMode.ACCURATE
     pipeline_options.table_structure_options.do_cell_matching = True
     pipeline_options.ocr_options = RapidOcrOptions()
     pipeline_options.ocr_options.force_full_page_ocr = True
     pipeline_options.generate_page_images = True
     pipeline_options.images_scale = images_scale
     pipeline_options.generate_picture_images = True
     pipeline_options.accelerator_options = AcceleratorOptions(
 # ---------------------------------------------------------------------------
+# VLM-First Conversion (Pass 1: VLM, Pass 2: TableFormer, Merge)
 # ---------------------------------------------------------------------------
     end_page: Optional[int] = None,
 ) -> tuple:
     """
+    VLM-first hybrid conversion.
+    Pass 1 (GPU): VLM OCR on ALL pages (fast, concurrent)
+    Detect: Find table pages from VLM markdown output
+    Pass 2 (CPU): Docling TableFormer ONLY on table pages (mini-PDF)
+    Merge: VLM text for all pages + TableFormer tables
     Returns: (markdown_content, json_content, pages_processed, image_count)
     """
+    overall_start = time.time()
+    # ---- RENDER ALL PAGES ----
+    page_images = _pdf_to_page_images(input_path, request_id, start_page, end_page)
     if not page_images:
+        logger.warning(f"[{request_id}] No page images — falling back to full Docling pipeline")
         return _convert_document_full_docling(
             input_path, output_dir, images_scale, include_images, request_id
         )
+    render_time = time.time() - overall_start
+    # ---- PASS 1: VLM OCR ALL PAGES (GPU, concurrent) ----
     logger.info(f"[{request_id}] Pass 1: VLM OCR via Qwen3-VL ({VLM_MODEL})")
+    logger.info(f"[{request_id}] Sending {len(page_images)} pages to VLM ({VLM_CONCURRENCY} concurrent)")
     vlm_page_texts: dict[int, Optional[str]] = {}
     vlm_start = time.time()
+    with ThreadPoolExecutor(max_workers=VLM_CONCURRENCY) as executor:
+        future_to_page = {
+            executor.submit(_vlm_ocr_page, page_bytes, request_id, page_no + 1): page_no
             for page_no, page_bytes in page_images
         }
+        for future in as_completed(future_to_page):
+            page_no = future_to_page[future]
             try:
                 vlm_text = future.result()
                 vlm_page_texts[page_no] = vlm_text
                     f"[{request_id}] VLM processed page {page_no + 1} ({len(vlm_text)} chars)"
                 )
             except Exception as e:
+                logger.warning(f"[{request_id}] VLM failed on page {page_no + 1}: {e}")
                 vlm_page_texts[page_no] = None
     vlm_time = time.time() - vlm_start
+    logger.info(f"[{request_id}] Pass 1 completed in {vlm_time:.2f}s ({len(vlm_page_texts)} pages)")
+    # ---- DETECT TABLE PAGES ----
     table_pages = _detect_table_pages(vlm_page_texts)
     if table_pages:
         logger.info(
             f"[{request_id}] Tables detected on {len(table_pages)} pages: "
     else:
         logger.info(f"[{request_id}] No tables detected — skipping Docling entirely")
+    # ---- PASS 2: DOCLING TABLEFORMER ON TABLE PAGES ONLY ----
     tables_by_page: dict[int, list[str]] = {}
+    tableformer_time = 0.0
     if table_pages:
         logger.info(
             f"[{request_id}] Pass 2: Docling TableFormer on {len(table_pages)} table pages"
         )
+        tf_start = time.time()
         try:
             mini_pdf_path, page_map = _extract_pages_to_pdf(
                 input_path, sorted(table_pages), request_id
             )
             converter = _get_converter()
             result = converter.convert(mini_pdf_path)
             doc = result.document
+            if doc is not None:
+                tables_by_page = _extract_table_markdowns(doc, page_map)
+                total_tables = sum(len(v) for v in tables_by_page.values())
+                logger.info(
+                    f"[{request_id}] Pass 2 completed in {time.time() - tf_start:.2f}s — "
+                    f"{total_tables} TableFormer tables extracted"
+                )
+            else:
+                logger.warning(f"[{request_id}] Docling returned None document for table pages")
             # Clean up mini-PDF
+            mini_pdf_path.unlink(missing_ok=True)
         except Exception as e:
+            logger.error(f"[{request_id}] TableFormer pass failed: {e}")
+        tableformer_time = time.time() - tf_start
+    # ---- MERGE: VLM TEXT + TABLEFORMER TABLES ----
     md_parts: list[str] = []
+    image_count = 0
     for page_no in sorted(vlm_page_texts.keys()):
         md_parts.append(f"\n\n<!-- Page {page_no + 1} -->\n\n")
         vlm_text = vlm_page_texts[page_no]
         if vlm_text is None:
+            # VLM failed — note the gap
+            md_parts.append(f"[Page {page_no + 1}: VLM extraction failed]\n\n")
         else:
+            # Merge VLM text with TableFormer tables for this page (if any)
             page_tables = tables_by_page.get(page_no, [])
             if page_tables:
                 merged = _merge_vlm_with_tables(vlm_text, page_tables)
             else:
                 md_parts.append(vlm_text)
+    # ---- IMAGES (from Docling if requested and tables were processed) ----
+    if include_images and table_pages:
+        image_dir = output_dir / "images"
+        image_dir.mkdir(parents=True, exist_ok=True)
+        try:
+            converter = _get_converter()
+            result = converter.convert(input_path)
+            doc = result.document
+            if doc:
+                for element, _ in doc.iterate_items():
+                    if isinstance(element, PictureItem):
+                        if element.image and element.image.pil_image:
+                            pg = element.prov[0].page_no if element.prov else 0
+                            image_id = element.self_ref.split("/")[-1]
+                            image_name = f"page_{pg}_{image_id}.png"
+                            image_name = re.sub(r'[\\/*?:"<>|]', "", image_name)
+                            image_path = image_dir / image_name
+                            try:
+                                element.image.pil_image.save(image_path, format="PNG")
+                                image_count += 1
+                            except Exception as e:
+                                logger.warning(f"[{request_id}] Failed to save image: {e}")
+        except Exception as e:
+            logger.warning(f"[{request_id}] Image extraction failed: {e}")
     markdown_content = "".join(md_parts)
+    pages_processed = len(vlm_page_texts)
+    total_time = time.time() - overall_start
     logger.info(
         f"[{request_id}] VLM-first conversion complete: {pages_processed} pages — "
         f"render {render_time:.1f}s + VLM {vlm_time:.1f}s + "
+        f"TableFormer {tableformer_time:.1f}s = {total_time:.2f}s total"
     )
     if pages_processed > 0:
         logger.info(f"[{request_id}] Speed: {pages_processed / total_time:.2f} pages/sec")
     include_images: bool,
     request_id: str,
 ) -> tuple:
+    """Fallback: full Docling pipeline when page images are unavailable."""
+    logger.info(f"[{request_id}] Fallback: running full Docling pipeline")
     converter = _get_converter()
     start_time = time.time()
     result = converter.convert(input_path)
     doc = result.document
     if doc is None:
         raise ValueError("Docling failed to parse document")
     elapsed = time.time() - start_time
+    logger.info(f"[{request_id}] Full Docling pipeline completed in {elapsed:.2f}s")
     markdown_content = doc.export_to_markdown()
     pages_processed = len(
         for element, _ in doc.iterate_items():
             if isinstance(element, PictureItem):
                 if element.image and element.image.pil_image:
+                    pg = element.prov[0].page_no if element.prov else 0
                     image_id = element.self_ref.split("/")[-1]
+                    image_name = f"page_{pg}_{image_id}.png"
                     image_name = re.sub(r'[\\/*?:"<>|]', "", image_name)
                     image_path = image_dir / image_name
                     try:
 async def lifespan(app: FastAPI):
     """Startup: initialize Docling converter and check vLLM."""
     logger.info("=" * 60)
+    logger.info("Starting Docling VLM Parser API v3.1.0...")
     device = _get_device()
     logger.info(f"Device: {device}")
         logger.info(f"GPU: {torch.cuda.get_device_name(0)}")
         logger.info(f"CUDA Version: {torch.version.cuda}")
         logger.info(
+            f"GPU Memory: {torch.cuda.get_device_properties(0).total_mem / 1024**3:.1f} GB"
         )
     logger.info(f"VLM Model: {VLM_MODEL}")
     logger.info(f"VLM Endpoint: http://{VLM_HOST}:{VLM_PORT}")
+    logger.info(f"VLM Timeout: {VLM_TIMEOUT}s, Concurrency: {VLM_CONCURRENCY}")
+    logger.info(f"Render DPI: {RENDER_DPI}")
     logger.info(f"Images scale: {IMAGES_SCALE}")
     logger.info(f"Max file size: {MAX_FILE_SIZE_MB}MB")
 app = FastAPI(
     title="Docling VLM Parser API",
+    description="VLM-first hybrid parser: Qwen3-VL OCR + targeted TableFormer tables",
+    version="3.1.0",
     lifespan=lifespan,
 )
 async def health_check() -> HealthResponse:
     """Health check endpoint."""
     device = _get_device()
     vlm_status = "unknown"
     try:
         async with httpx.AsyncClient(timeout=5) as client:
     return HealthResponse(
         status="healthy",
+        version="3.1.0",
         device=device,
+        gpu_name=None,
+        vlm_model="active",
         vlm_status=vlm_status,
         images_scale=IMAGES_SCALE,
     )
 async def parse_document(
     file: UploadFile = File(..., description="PDF or image file to parse"),
     output_format: str = Form(default="markdown", description="Output format: markdown or json"),
+    images_scale: Optional[float] = Form(default=None, description="Image resolution scale"),
     start_page: int = Form(default=0, description="Starting page (0-indexed)"),
     end_page: Optional[int] = Form(default=None, description="Ending page (None = all pages)"),
+    include_images: bool = Form(default=False, description="Include extracted images"),
     _token: str = Depends(verify_token),
 ) -> ParseResponse:
+    """Parse a document file using VLM-first hybrid pipeline."""
     request_id = str(uuid4())[:8]
     start_time = time.time()
     if output_format not in ("markdown",):
         raise HTTPException(
             status_code=400,
+            detail="Only 'markdown' output_format is supported",
         )
     # Validate file size
     logger.info(f"[{request_id}] File size: {file_size_mb:.2f} MB")
     if file_size > MAX_FILE_SIZE_BYTES:
         raise HTTPException(
             status_code=413,
             detail=f"File size exceeds maximum allowed size of {MAX_FILE_SIZE_MB}MB",
     allowed_extensions = {".pdf", ".png", ".jpg", ".jpeg", ".tiff", ".bmp"}
     file_ext = Path(file.filename).suffix.lower() if file.filename else ""
     if file_ext not in allowed_extensions:
         raise HTTPException(
             status_code=400,
             detail=f"Unsupported file type. Allowed: {', '.join(allowed_extensions)}",
         )
     use_images_scale = images_scale if images_scale is not None else IMAGES_SCALE
     logger.info(f"[{request_id}] Images scale: {use_images_scale}, VLM: {VLM_MODEL}")
     logger.info(f"[{request_id}] Page range: {start_page} to {end_page or 'end'}")
     temp_dir = tempfile.mkdtemp()
     try:
         input_path = Path(temp_dir) / f"input{file_ext}"
         await asyncio.to_thread(_save_uploaded_file, input_path, file.file)
         output_dir = Path(temp_dir) / "output"
         output_dir.mkdir(exist_ok=True)
         markdown_content, json_content, pages_processed, image_count = await asyncio.to_thread(
             _convert_document,
             input_path,
             end_page,
         )
         images_zip = None
         if include_images and image_count > 0:
             images_zip, image_count = _create_images_zip(output_dir)
         total_duration = time.time() - start_time
         logger.info(f"[{request_id}] {'='*50}")
         )
     finally:
         shutil.rmtree(temp_dir, ignore_errors=True)
 @app.post("/parse/url", response_model=ParseResponse)
     request: URLParseRequest,
     _token: str = Depends(verify_token),
 ) -> ParseResponse:
+    """Parse a document from a URL using VLM-first hybrid pipeline."""
     request_id = str(uuid4())[:8]
     start_time = time.time()
     if request.output_format not in ("markdown",):
         raise HTTPException(
             status_code=400,
+            detail="Only 'markdown' output_format is supported",
         )
     _validate_url(request.url)
     temp_dir = tempfile.mkdtemp()
     try:
         # Download file
         async with httpx.AsyncClient(timeout=60.0, follow_redirects=True) as client:
             response = await client.get(request.url)
             response.raise_for_status()
         file_size_mb = len(response.content) / (1024 * 1024)
+        logger.info(
+            f"[{request_id}] Download completed in {time.time() - download_start:.2f}s "
+            f"({file_size_mb:.2f} MB)"
+        )
+        # Determine file extension (with Content-Type fallback)
         url_path = Path(request.url.split("?")[0])
         file_ext = url_path.suffix.lower()
+        if not file_ext or file_ext not in {".pdf", ".png", ".jpg", ".jpeg", ".tiff", ".bmp"}:
             content_type = response.headers.get("content-type", "").lower()
             ct_map = {
                 "application/pdf": ".pdf",
                 "image/bmp": ".bmp",
             }
             file_ext = next((v for k, v in ct_map.items() if k in content_type), ".pdf")
         if len(response.content) > MAX_FILE_SIZE_BYTES:
             raise HTTPException(
                 status_code=413,
                 detail=f"File size exceeds maximum allowed size of {MAX_FILE_SIZE_MB}MB",
             )
         input_path = Path(temp_dir) / f"input{file_ext}"
         await asyncio.to_thread(_save_downloaded_content, input_path, response.content)
         output_dir = Path(temp_dir) / "output"
         output_dir.mkdir(exist_ok=True)
         use_images_scale = request.images_scale if request.images_scale is not None else IMAGES_SCALE
         logger.info(f"[{request_id}] Images scale: {use_images_scale}, VLM: {VLM_MODEL}")
+        logger.info(
+            f"[{request_id}] Page range: {request.start_page} to {request.end_page or 'end'}"
+        )
         markdown_content, json_content, pages_processed, image_count = await asyncio.to_thread(
             _convert_document,
             input_path,
             request.end_page,
         )
         images_zip = None
         if request.include_images and image_count > 0:
             images_zip, image_count = _create_images_zip(output_dir)
         total_duration = time.time() - start_time
         logger.info(f"[{request_id}] {'='*50}")
         )
     finally:
         shutil.rmtree(temp_dir, ignore_errors=True)
 if __name__ == "__main__":