Spaces:
Running on T4
Running on T4
Ibad ur Rehman commited on
Commit ·
dd23733
1
Parent(s): b586eeb
feat: switch to unsloth gguf runtime
Browse files- Dockerfile +16 -11
- app.py +70 -124
- config.py +12 -11
- pipeline.py +45 -154
- requirements.txt +1 -6
- start.sh +12 -4
Dockerfile
CHANGED
|
@@ -1,40 +1,45 @@
|
|
| 1 |
-
# Hugging Face Spaces Dockerfile for Qwen3-VL parser API
|
| 2 |
-
# v5.
|
| 3 |
|
| 4 |
-
FROM
|
| 5 |
|
| 6 |
USER root
|
| 7 |
|
| 8 |
-
# Install fonts and PDF utilities for document parsing
|
| 9 |
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
|
|
|
|
|
| 10 |
fonts-noto-core fonts-noto-cjk fontconfig \
|
| 11 |
-
libgl1 libglib2.0-0 poppler-utils curl
|
| 12 |
&& fc-cache -fv && rm -rf /var/lib/apt/lists/*
|
| 13 |
|
| 14 |
-
# Create non-root user for HF Spaces
|
| 15 |
RUN useradd -m -u 1000 user
|
| 16 |
|
| 17 |
ENV PYTHONUNBUFFERED=1 \
|
| 18 |
PYTHONDONTWRITEBYTECODE=1 \
|
| 19 |
IMAGES_SCALE=2.0 \
|
| 20 |
MAX_FILE_SIZE_MB=1024 \
|
|
|
|
| 21 |
HF_HOME=/home/user/.cache/huggingface \
|
| 22 |
XDG_CACHE_HOME=/home/user/.cache \
|
| 23 |
HOME=/home/user \
|
| 24 |
PATH=/home/user/.local/bin:/usr/local/bin:/usr/bin:$PATH
|
| 25 |
|
| 26 |
-
RUN mkdir -p /home/user/.cache/huggingface /home/user/
|
| 27 |
&& chown -R user:user /home/user
|
| 28 |
|
| 29 |
USER user
|
| 30 |
-
WORKDIR /home/user
|
| 31 |
|
| 32 |
-
|
|
|
|
|
|
|
|
|
|
| 33 |
|
| 34 |
-
|
|
|
|
|
|
|
| 35 |
|
| 36 |
COPY --chown=user:user . .
|
| 37 |
-
|
| 38 |
RUN chmod +x start.sh
|
| 39 |
|
| 40 |
EXPOSE 7860
|
|
|
|
| 1 |
+
# Hugging Face Spaces Dockerfile for Unsloth GGUF Qwen3-VL parser API
|
| 2 |
+
# v5.2.0 - llama.cpp + Unsloth Qwen3-VL-8B-Instruct GGUF
|
| 3 |
|
| 4 |
+
FROM nvidia/cuda:12.4.1-devel-ubuntu22.04
|
| 5 |
|
| 6 |
USER root
|
| 7 |
|
|
|
|
| 8 |
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 9 |
+
python3 python3-pip python3-venv \
|
| 10 |
+
build-essential cmake git \
|
| 11 |
fonts-noto-core fonts-noto-cjk fontconfig \
|
| 12 |
+
libgl1 libglib2.0-0 poppler-utils curl \
|
| 13 |
&& fc-cache -fv && rm -rf /var/lib/apt/lists/*
|
| 14 |
|
|
|
|
| 15 |
RUN useradd -m -u 1000 user
|
| 16 |
|
| 17 |
ENV PYTHONUNBUFFERED=1 \
|
| 18 |
PYTHONDONTWRITEBYTECODE=1 \
|
| 19 |
IMAGES_SCALE=2.0 \
|
| 20 |
MAX_FILE_SIZE_MB=1024 \
|
| 21 |
+
LLAMA_SERVER_URL=http://127.0.0.1:8080 \
|
| 22 |
HF_HOME=/home/user/.cache/huggingface \
|
| 23 |
XDG_CACHE_HOME=/home/user/.cache \
|
| 24 |
HOME=/home/user \
|
| 25 |
PATH=/home/user/.local/bin:/usr/local/bin:/usr/bin:$PATH
|
| 26 |
|
| 27 |
+
RUN mkdir -p /home/user/.cache/huggingface /home/user/app \
|
| 28 |
&& chown -R user:user /home/user
|
| 29 |
|
| 30 |
USER user
|
| 31 |
+
WORKDIR /home/user
|
| 32 |
|
| 33 |
+
RUN git clone --depth 1 https://github.com/ggml-org/llama.cpp /home/user/llama.cpp
|
| 34 |
+
WORKDIR /home/user/llama.cpp
|
| 35 |
+
RUN cmake -B build -DGGML_CUDA=ON -DLLAMA_CURL=ON
|
| 36 |
+
RUN cmake --build build -j
|
| 37 |
|
| 38 |
+
WORKDIR /home/user/app
|
| 39 |
+
COPY --chown=user:user requirements.txt .
|
| 40 |
+
RUN python3 -m pip install --user --upgrade pip && python3 -m pip install --user -r requirements.txt
|
| 41 |
|
| 42 |
COPY --chown=user:user . .
|
|
|
|
| 43 |
RUN chmod +x start.sh
|
| 44 |
|
| 45 |
EXPOSE 7860
|
app.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
"""Qwen3-VL parser API."""
|
| 2 |
|
| 3 |
import asyncio
|
| 4 |
import re
|
|
@@ -16,14 +16,19 @@ from fastapi import Depends, FastAPI, File, Form, HTTPException, UploadFile
|
|
| 16 |
from auth import _validate_url, verify_token
|
| 17 |
from config import (
|
| 18 |
IMAGES_SCALE,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
MAX_FILE_SIZE_BYTES,
|
| 20 |
MAX_FILE_SIZE_MB,
|
| 21 |
-
QWEN_ATTN_IMPLEMENTATION,
|
| 22 |
-
QWEN_BATCH_SIZE,
|
| 23 |
-
QWEN_IMAGE_MAX_SIDE,
|
| 24 |
-
QWEN_MAX_NEW_TOKENS,
|
| 25 |
QWEN_MODEL,
|
| 26 |
-
QWEN_TORCH_DTYPE,
|
| 27 |
RENDER_DPI,
|
| 28 |
logger,
|
| 29 |
)
|
|
@@ -37,61 +42,52 @@ from pipeline import (
|
|
| 37 |
)
|
| 38 |
|
| 39 |
|
| 40 |
-
# ---------------------------------------------------------------------------
|
| 41 |
-
# Application Lifespan
|
| 42 |
-
# ---------------------------------------------------------------------------
|
| 43 |
-
|
| 44 |
-
|
| 45 |
@asynccontextmanager
|
| 46 |
async def lifespan(app: FastAPI):
|
| 47 |
-
"""Startup: initialize
|
| 48 |
logger.info("=" * 60)
|
| 49 |
-
logger.info("Starting Docling
|
| 50 |
-
logger.info("Initializing
|
| 51 |
_get_pipeline()
|
| 52 |
-
logger.info("
|
| 53 |
|
| 54 |
logger.info(f"Render DPI: {RENDER_DPI}")
|
| 55 |
logger.info(f"Images scale: {IMAGES_SCALE}")
|
| 56 |
logger.info(f"Max file size: {MAX_FILE_SIZE_MB}MB")
|
| 57 |
logger.info(f"Qwen Model: {QWEN_MODEL}")
|
| 58 |
-
logger.info(f"
|
| 59 |
-
logger.info(f"
|
| 60 |
-
logger.info(f"
|
| 61 |
-
logger.info(f"
|
| 62 |
-
logger.info(f"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
|
| 64 |
logger.info("=" * 60)
|
| 65 |
-
logger.info("Docling
|
| 66 |
logger.info("=" * 60)
|
| 67 |
yield
|
| 68 |
-
logger.info("Shutting down Docling
|
| 69 |
-
|
| 70 |
|
| 71 |
-
# ---------------------------------------------------------------------------
|
| 72 |
-
# FastAPI App
|
| 73 |
-
# ---------------------------------------------------------------------------
|
| 74 |
|
| 75 |
app = FastAPI(
|
| 76 |
-
title="Docling
|
| 77 |
-
description="Qwen3-VL local parser",
|
| 78 |
-
version="5.
|
| 79 |
lifespan=lifespan,
|
| 80 |
)
|
| 81 |
|
| 82 |
|
| 83 |
-
# ---------------------------------------------------------------------------
|
| 84 |
-
# Endpoints
|
| 85 |
-
# ---------------------------------------------------------------------------
|
| 86 |
-
|
| 87 |
-
|
| 88 |
@app.get("/", response_model=HealthResponse)
|
| 89 |
async def health_check() -> HealthResponse:
|
| 90 |
"""Health check endpoint."""
|
| 91 |
return HealthResponse(
|
| 92 |
status="healthy",
|
| 93 |
-
version="5.
|
| 94 |
-
model="Qwen3-VL-8B-Instruct",
|
| 95 |
gemini_status="not used",
|
| 96 |
images_scale=IMAGES_SCALE,
|
| 97 |
)
|
|
@@ -100,34 +96,33 @@ async def health_check() -> HealthResponse:
|
|
| 100 |
@app.post("/parse", response_model=ParseResponse)
|
| 101 |
async def parse_document(
|
| 102 |
file: UploadFile = File(..., description="PDF or image file to parse"),
|
| 103 |
-
output_format: str = Form(default="markdown", description="Output format: markdown
|
| 104 |
-
images_scale: Optional[float] = Form(default=None, description="
|
| 105 |
start_page: int = Form(default=0, description="Starting page (0-indexed)"),
|
| 106 |
end_page: Optional[int] = Form(default=None, description="Ending page (None = all pages)"),
|
| 107 |
include_images: bool = Form(default=False, description="Include extracted images"),
|
| 108 |
_token: str = Depends(verify_token),
|
| 109 |
) -> ParseResponse:
|
| 110 |
-
"""Parse a document file using
|
| 111 |
request_id = str(uuid4())[:8]
|
| 112 |
start_time = time.time()
|
| 113 |
|
| 114 |
-
logger.info(f"[{request_id}] {'='*50}")
|
| 115 |
logger.info(f"[{request_id}] New parse request received")
|
| 116 |
-
safe_filename = re.sub(r
|
| 117 |
logger.info(f"[{request_id}] Filename: {safe_filename}")
|
| 118 |
logger.info(f"[{request_id}] Output format: {output_format}")
|
| 119 |
|
| 120 |
-
if output_format
|
| 121 |
-
raise HTTPException(
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
|
|
|
| 125 |
|
| 126 |
-
# Validate file size
|
| 127 |
file.file.seek(0, 2)
|
| 128 |
file_size = file.file.tell()
|
| 129 |
file.file.seek(0)
|
| 130 |
-
|
| 131 |
file_size_mb = file_size / (1024 * 1024)
|
| 132 |
logger.info(f"[{request_id}] File size: {file_size_mb:.2f} MB")
|
| 133 |
|
|
@@ -137,20 +132,18 @@ async def parse_document(
|
|
| 137 |
detail=f"File size exceeds maximum allowed size of {MAX_FILE_SIZE_MB}MB",
|
| 138 |
)
|
| 139 |
|
| 140 |
-
# Validate file type
|
| 141 |
allowed_extensions = {".pdf", ".png", ".jpg", ".jpeg", ".tiff", ".bmp"}
|
| 142 |
file_ext = Path(file.filename).suffix.lower() if file.filename else ""
|
| 143 |
if file_ext not in allowed_extensions:
|
| 144 |
raise HTTPException(
|
| 145 |
status_code=400,
|
| 146 |
-
detail=f"Unsupported file type. Allowed: {', '.join(allowed_extensions)}",
|
| 147 |
)
|
| 148 |
|
| 149 |
logger.info(f"[{request_id}] Model: {QWEN_MODEL}")
|
| 150 |
-
logger.info(f"[{request_id}] Page range: {start_page} to {end_page
|
| 151 |
|
| 152 |
temp_dir = tempfile.mkdtemp()
|
| 153 |
-
|
| 154 |
try:
|
| 155 |
input_path = Path(temp_dir) / f"input{file_ext}"
|
| 156 |
await asyncio.to_thread(_save_uploaded_file, input_path, file.file)
|
|
@@ -173,35 +166,22 @@ async def parse_document(
|
|
| 173 |
images_zip, image_count = _create_images_zip(output_dir)
|
| 174 |
|
| 175 |
total_duration = time.time() - start_time
|
| 176 |
-
logger.info(f"[{request_id}] {
|
| 177 |
-
logger.info(f"[{request_id}] Request completed successfully")
|
| 178 |
-
logger.info(f"[{request_id}] Pages processed: {pages_processed}")
|
| 179 |
-
logger.info(f"[{request_id}] Total time: {total_duration:.2f}s")
|
| 180 |
-
if pages_processed > 0:
|
| 181 |
-
logger.info(f"[{request_id}] Speed: {pages_processed / total_duration:.2f} pages/sec")
|
| 182 |
-
logger.info(f"[{request_id}] {'='*50}")
|
| 183 |
|
| 184 |
return ParseResponse(
|
| 185 |
success=True,
|
| 186 |
-
markdown=markdown_content
|
| 187 |
-
json_content=json_content
|
| 188 |
images_zip=images_zip,
|
| 189 |
image_count=image_count,
|
| 190 |
pages_processed=pages_processed,
|
| 191 |
device_used="gpu",
|
| 192 |
vlm_model=QWEN_MODEL,
|
| 193 |
)
|
| 194 |
-
|
| 195 |
except Exception as e:
|
| 196 |
total_duration = time.time() - start_time
|
| 197 |
-
logger.error(f"[{request_id}] {
|
| 198 |
-
|
| 199 |
-
logger.error(f"[{request_id}] Error: {type(e).__name__}: {str(e)}", exc_info=True)
|
| 200 |
-
logger.error(f"[{request_id}] {'='*50}")
|
| 201 |
-
return ParseResponse(
|
| 202 |
-
success=False,
|
| 203 |
-
error=f"Processing failed (ref: {request_id})",
|
| 204 |
-
)
|
| 205 |
finally:
|
| 206 |
shutil.rmtree(temp_dir, ignore_errors=True)
|
| 207 |
|
|
@@ -211,43 +191,37 @@ async def parse_document_from_url(
|
|
| 211 |
request: URLParseRequest,
|
| 212 |
_token: str = Depends(verify_token),
|
| 213 |
) -> ParseResponse:
|
| 214 |
-
"""Parse a document from
|
| 215 |
request_id = str(uuid4())[:8]
|
| 216 |
start_time = time.time()
|
| 217 |
|
| 218 |
-
logger.info(f"[{request_id}] {'='*50}")
|
| 219 |
logger.info(f"[{request_id}] New URL parse request received")
|
| 220 |
logger.info(f"[{request_id}] URL: {request.url}")
|
| 221 |
-
logger.info(f"[{request_id}] Output format: {request.output_format}")
|
| 222 |
|
| 223 |
-
if request.output_format
|
| 224 |
-
raise HTTPException(
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
|
|
|
| 228 |
|
| 229 |
_validate_url(request.url)
|
| 230 |
|
| 231 |
temp_dir = tempfile.mkdtemp()
|
| 232 |
-
|
| 233 |
try:
|
| 234 |
-
# Download file
|
| 235 |
-
logger.info(f"[{request_id}] Downloading file from URL...")
|
| 236 |
-
download_start = time.time()
|
| 237 |
async with httpx.AsyncClient(timeout=60.0, follow_redirects=True) as client:
|
| 238 |
response = await client.get(request.url)
|
| 239 |
response.raise_for_status()
|
| 240 |
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
|
| 247 |
-
# Determine file extension (with Content-Type fallback)
|
| 248 |
url_path = Path(request.url.split("?")[0])
|
| 249 |
file_ext = url_path.suffix.lower()
|
| 250 |
-
|
| 251 |
if not file_ext or file_ext not in {".pdf", ".png", ".jpg", ".jpeg", ".tiff", ".bmp"}:
|
| 252 |
content_type = response.headers.get("content-type", "").lower()
|
| 253 |
ct_map = {
|
|
@@ -259,23 +233,12 @@ async def parse_document_from_url(
|
|
| 259 |
}
|
| 260 |
file_ext = next((v for k, v in ct_map.items() if k in content_type), ".pdf")
|
| 261 |
|
| 262 |
-
if len(response.content) > MAX_FILE_SIZE_BYTES:
|
| 263 |
-
raise HTTPException(
|
| 264 |
-
status_code=413,
|
| 265 |
-
detail=f"File size exceeds maximum allowed size of {MAX_FILE_SIZE_MB}MB",
|
| 266 |
-
)
|
| 267 |
-
|
| 268 |
input_path = Path(temp_dir) / f"input{file_ext}"
|
| 269 |
await asyncio.to_thread(_save_downloaded_content, input_path, response.content)
|
| 270 |
|
| 271 |
output_dir = Path(temp_dir) / "output"
|
| 272 |
output_dir.mkdir(exist_ok=True)
|
| 273 |
|
| 274 |
-
logger.info(f"[{request_id}] Model: {QWEN_MODEL}")
|
| 275 |
-
logger.info(
|
| 276 |
-
f"[{request_id}] Page range: {request.start_page} to {request.end_page or 'end'}"
|
| 277 |
-
)
|
| 278 |
-
|
| 279 |
markdown_content, json_content, pages_processed, image_count = await asyncio.to_thread(
|
| 280 |
_convert_document,
|
| 281 |
input_path,
|
|
@@ -291,42 +254,25 @@ async def parse_document_from_url(
|
|
| 291 |
images_zip, image_count = _create_images_zip(output_dir)
|
| 292 |
|
| 293 |
total_duration = time.time() - start_time
|
| 294 |
-
logger.info(f"[{request_id}] {
|
| 295 |
-
logger.info(f"[{request_id}] Request completed successfully")
|
| 296 |
-
logger.info(f"[{request_id}] Pages processed: {pages_processed}")
|
| 297 |
-
logger.info(f"[{request_id}] Total time: {total_duration:.2f}s")
|
| 298 |
-
if pages_processed > 0:
|
| 299 |
-
logger.info(f"[{request_id}] Speed: {pages_processed / total_duration:.2f} pages/sec")
|
| 300 |
-
logger.info(f"[{request_id}] {'='*50}")
|
| 301 |
|
| 302 |
return ParseResponse(
|
| 303 |
success=True,
|
| 304 |
-
markdown=markdown_content
|
| 305 |
-
json_content=json_content
|
| 306 |
images_zip=images_zip,
|
| 307 |
image_count=image_count,
|
| 308 |
pages_processed=pages_processed,
|
| 309 |
device_used="gpu",
|
| 310 |
vlm_model=QWEN_MODEL,
|
| 311 |
)
|
| 312 |
-
|
| 313 |
except httpx.HTTPError as e:
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
return ParseResponse(
|
| 317 |
-
success=False,
|
| 318 |
-
error=f"Failed to download file from URL (ref: {request_id})",
|
| 319 |
-
)
|
| 320 |
except Exception as e:
|
| 321 |
total_duration = time.time() - start_time
|
| 322 |
-
logger.error(f"[{request_id}] {
|
| 323 |
-
|
| 324 |
-
logger.error(f"[{request_id}] Error: {type(e).__name__}: {str(e)}", exc_info=True)
|
| 325 |
-
logger.error(f"[{request_id}] {'='*50}")
|
| 326 |
-
return ParseResponse(
|
| 327 |
-
success=False,
|
| 328 |
-
error=f"Processing failed (ref: {request_id})",
|
| 329 |
-
)
|
| 330 |
finally:
|
| 331 |
shutil.rmtree(temp_dir, ignore_errors=True)
|
| 332 |
|
|
|
|
| 1 |
+
"""Unsloth Qwen3-VL GGUF parser API."""
|
| 2 |
|
| 3 |
import asyncio
|
| 4 |
import re
|
|
|
|
| 16 |
from auth import _validate_url, verify_token
|
| 17 |
from config import (
|
| 18 |
IMAGES_SCALE,
|
| 19 |
+
LLAMA_CTX_SIZE,
|
| 20 |
+
LLAMA_FLASH_ATTN,
|
| 21 |
+
LLAMA_GPU_LAYERS,
|
| 22 |
+
LLAMA_HF_FILE,
|
| 23 |
+
LLAMA_HF_REPO,
|
| 24 |
+
LLAMA_MAX_TOKENS,
|
| 25 |
+
LLAMA_MMPROJ_FILE,
|
| 26 |
+
LLAMA_SERVER_TIMEOUT,
|
| 27 |
+
LLAMA_SERVER_URL,
|
| 28 |
+
LLAMA_THREADS,
|
| 29 |
MAX_FILE_SIZE_BYTES,
|
| 30 |
MAX_FILE_SIZE_MB,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
QWEN_MODEL,
|
|
|
|
| 32 |
RENDER_DPI,
|
| 33 |
logger,
|
| 34 |
)
|
|
|
|
| 42 |
)
|
| 43 |
|
| 44 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
@asynccontextmanager
|
| 46 |
async def lifespan(app: FastAPI):
|
| 47 |
+
"""Startup: initialize local llama.cpp client."""
|
| 48 |
logger.info("=" * 60)
|
| 49 |
+
logger.info("Starting Docling Parser API v5.2.0...")
|
| 50 |
+
logger.info("Initializing local llama.cpp client...")
|
| 51 |
_get_pipeline()
|
| 52 |
+
logger.info("llama.cpp client ready")
|
| 53 |
|
| 54 |
logger.info(f"Render DPI: {RENDER_DPI}")
|
| 55 |
logger.info(f"Images scale: {IMAGES_SCALE}")
|
| 56 |
logger.info(f"Max file size: {MAX_FILE_SIZE_MB}MB")
|
| 57 |
logger.info(f"Qwen Model: {QWEN_MODEL}")
|
| 58 |
+
logger.info(f"llama-server URL: {LLAMA_SERVER_URL}")
|
| 59 |
+
logger.info(f"llama-server timeout: {LLAMA_SERVER_TIMEOUT}s")
|
| 60 |
+
logger.info(f"llama HF repo: {LLAMA_HF_REPO}")
|
| 61 |
+
logger.info(f"llama model file: {LLAMA_HF_FILE}")
|
| 62 |
+
logger.info(f"llama mmproj file: {LLAMA_MMPROJ_FILE}")
|
| 63 |
+
logger.info(f"llama max tokens: {LLAMA_MAX_TOKENS}")
|
| 64 |
+
logger.info(f"llama ctx size: {LLAMA_CTX_SIZE}")
|
| 65 |
+
logger.info(f"llama gpu layers: {LLAMA_GPU_LAYERS}")
|
| 66 |
+
logger.info(f"llama threads: {LLAMA_THREADS}")
|
| 67 |
+
logger.info(f"llama flash attention: {LLAMA_FLASH_ATTN}")
|
| 68 |
|
| 69 |
logger.info("=" * 60)
|
| 70 |
+
logger.info("Docling Parser API ready (Unsloth GGUF via llama.cpp)")
|
| 71 |
logger.info("=" * 60)
|
| 72 |
yield
|
| 73 |
+
logger.info("Shutting down Docling Parser API...")
|
|
|
|
| 74 |
|
|
|
|
|
|
|
|
|
|
| 75 |
|
| 76 |
app = FastAPI(
|
| 77 |
+
title="Docling Parser API",
|
| 78 |
+
description="Unsloth Qwen3-VL GGUF local parser",
|
| 79 |
+
version="5.2.0",
|
| 80 |
lifespan=lifespan,
|
| 81 |
)
|
| 82 |
|
| 83 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
@app.get("/", response_model=HealthResponse)
|
| 85 |
async def health_check() -> HealthResponse:
|
| 86 |
"""Health check endpoint."""
|
| 87 |
return HealthResponse(
|
| 88 |
status="healthy",
|
| 89 |
+
version="5.2.0",
|
| 90 |
+
model="Qwen3-VL-8B-Instruct GGUF",
|
| 91 |
gemini_status="not used",
|
| 92 |
images_scale=IMAGES_SCALE,
|
| 93 |
)
|
|
|
|
| 96 |
@app.post("/parse", response_model=ParseResponse)
|
| 97 |
async def parse_document(
|
| 98 |
file: UploadFile = File(..., description="PDF or image file to parse"),
|
| 99 |
+
output_format: str = Form(default="markdown", description="Output format: markdown only"),
|
| 100 |
+
images_scale: Optional[float] = Form(default=None, description="Reserved for compatibility"),
|
| 101 |
start_page: int = Form(default=0, description="Starting page (0-indexed)"),
|
| 102 |
end_page: Optional[int] = Form(default=None, description="Ending page (None = all pages)"),
|
| 103 |
include_images: bool = Form(default=False, description="Include extracted images"),
|
| 104 |
_token: str = Depends(verify_token),
|
| 105 |
) -> ParseResponse:
|
| 106 |
+
"""Parse a document file using local llama.cpp + Unsloth GGUF."""
|
| 107 |
request_id = str(uuid4())[:8]
|
| 108 |
start_time = time.time()
|
| 109 |
|
| 110 |
+
logger.info(f"[{request_id}] {'=' * 50}")
|
| 111 |
logger.info(f"[{request_id}] New parse request received")
|
| 112 |
+
safe_filename = re.sub(r"[\r\n\t\x00-\x1f\x7f]", "_", file.filename or "")[:255]
|
| 113 |
logger.info(f"[{request_id}] Filename: {safe_filename}")
|
| 114 |
logger.info(f"[{request_id}] Output format: {output_format}")
|
| 115 |
|
| 116 |
+
if output_format != "markdown":
|
| 117 |
+
raise HTTPException(status_code=400, detail="Only 'markdown' output_format is supported")
|
| 118 |
+
if start_page < 0:
|
| 119 |
+
raise HTTPException(status_code=400, detail="start_page must be >= 0")
|
| 120 |
+
if end_page is not None and end_page < start_page:
|
| 121 |
+
raise HTTPException(status_code=400, detail="end_page must be >= start_page")
|
| 122 |
|
|
|
|
| 123 |
file.file.seek(0, 2)
|
| 124 |
file_size = file.file.tell()
|
| 125 |
file.file.seek(0)
|
|
|
|
| 126 |
file_size_mb = file_size / (1024 * 1024)
|
| 127 |
logger.info(f"[{request_id}] File size: {file_size_mb:.2f} MB")
|
| 128 |
|
|
|
|
| 132 |
detail=f"File size exceeds maximum allowed size of {MAX_FILE_SIZE_MB}MB",
|
| 133 |
)
|
| 134 |
|
|
|
|
| 135 |
allowed_extensions = {".pdf", ".png", ".jpg", ".jpeg", ".tiff", ".bmp"}
|
| 136 |
file_ext = Path(file.filename).suffix.lower() if file.filename else ""
|
| 137 |
if file_ext not in allowed_extensions:
|
| 138 |
raise HTTPException(
|
| 139 |
status_code=400,
|
| 140 |
+
detail=f"Unsupported file type. Allowed: {', '.join(sorted(allowed_extensions))}",
|
| 141 |
)
|
| 142 |
|
| 143 |
logger.info(f"[{request_id}] Model: {QWEN_MODEL}")
|
| 144 |
+
logger.info(f"[{request_id}] Page range: {start_page} to {end_page if end_page is not None else 'end'}")
|
| 145 |
|
| 146 |
temp_dir = tempfile.mkdtemp()
|
|
|
|
| 147 |
try:
|
| 148 |
input_path = Path(temp_dir) / f"input{file_ext}"
|
| 149 |
await asyncio.to_thread(_save_uploaded_file, input_path, file.file)
|
|
|
|
| 166 |
images_zip, image_count = _create_images_zip(output_dir)
|
| 167 |
|
| 168 |
total_duration = time.time() - start_time
|
| 169 |
+
logger.info(f"[{request_id}] Request completed successfully in {total_duration:.2f}s")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 170 |
|
| 171 |
return ParseResponse(
|
| 172 |
success=True,
|
| 173 |
+
markdown=markdown_content,
|
| 174 |
+
json_content=json_content,
|
| 175 |
images_zip=images_zip,
|
| 176 |
image_count=image_count,
|
| 177 |
pages_processed=pages_processed,
|
| 178 |
device_used="gpu",
|
| 179 |
vlm_model=QWEN_MODEL,
|
| 180 |
)
|
|
|
|
| 181 |
except Exception as e:
|
| 182 |
total_duration = time.time() - start_time
|
| 183 |
+
logger.error(f"[{request_id}] Request failed after {total_duration:.2f}s: {type(e).__name__}: {e}", exc_info=True)
|
| 184 |
+
return ParseResponse(success=False, error=f"Processing failed (ref: {request_id})")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
finally:
|
| 186 |
shutil.rmtree(temp_dir, ignore_errors=True)
|
| 187 |
|
|
|
|
| 191 |
request: URLParseRequest,
|
| 192 |
_token: str = Depends(verify_token),
|
| 193 |
) -> ParseResponse:
|
| 194 |
+
"""Parse a document from URL using local llama.cpp + Unsloth GGUF."""
|
| 195 |
request_id = str(uuid4())[:8]
|
| 196 |
start_time = time.time()
|
| 197 |
|
| 198 |
+
logger.info(f"[{request_id}] {'=' * 50}")
|
| 199 |
logger.info(f"[{request_id}] New URL parse request received")
|
| 200 |
logger.info(f"[{request_id}] URL: {request.url}")
|
|
|
|
| 201 |
|
| 202 |
+
if request.output_format != "markdown":
|
| 203 |
+
raise HTTPException(status_code=400, detail="Only 'markdown' output_format is supported")
|
| 204 |
+
if request.start_page < 0:
|
| 205 |
+
raise HTTPException(status_code=400, detail="start_page must be >= 0")
|
| 206 |
+
if request.end_page is not None and request.end_page < request.start_page:
|
| 207 |
+
raise HTTPException(status_code=400, detail="end_page must be >= start_page")
|
| 208 |
|
| 209 |
_validate_url(request.url)
|
| 210 |
|
| 211 |
temp_dir = tempfile.mkdtemp()
|
|
|
|
| 212 |
try:
|
|
|
|
|
|
|
|
|
|
| 213 |
async with httpx.AsyncClient(timeout=60.0, follow_redirects=True) as client:
|
| 214 |
response = await client.get(request.url)
|
| 215 |
response.raise_for_status()
|
| 216 |
|
| 217 |
+
if len(response.content) > MAX_FILE_SIZE_BYTES:
|
| 218 |
+
raise HTTPException(
|
| 219 |
+
status_code=413,
|
| 220 |
+
detail=f"File size exceeds maximum allowed size of {MAX_FILE_SIZE_MB}MB",
|
| 221 |
+
)
|
| 222 |
|
|
|
|
| 223 |
url_path = Path(request.url.split("?")[0])
|
| 224 |
file_ext = url_path.suffix.lower()
|
|
|
|
| 225 |
if not file_ext or file_ext not in {".pdf", ".png", ".jpg", ".jpeg", ".tiff", ".bmp"}:
|
| 226 |
content_type = response.headers.get("content-type", "").lower()
|
| 227 |
ct_map = {
|
|
|
|
| 233 |
}
|
| 234 |
file_ext = next((v for k, v in ct_map.items() if k in content_type), ".pdf")
|
| 235 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 236 |
input_path = Path(temp_dir) / f"input{file_ext}"
|
| 237 |
await asyncio.to_thread(_save_downloaded_content, input_path, response.content)
|
| 238 |
|
| 239 |
output_dir = Path(temp_dir) / "output"
|
| 240 |
output_dir.mkdir(exist_ok=True)
|
| 241 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 242 |
markdown_content, json_content, pages_processed, image_count = await asyncio.to_thread(
|
| 243 |
_convert_document,
|
| 244 |
input_path,
|
|
|
|
| 254 |
images_zip, image_count = _create_images_zip(output_dir)
|
| 255 |
|
| 256 |
total_duration = time.time() - start_time
|
| 257 |
+
logger.info(f"[{request_id}] URL request completed successfully in {total_duration:.2f}s")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 258 |
|
| 259 |
return ParseResponse(
|
| 260 |
success=True,
|
| 261 |
+
markdown=markdown_content,
|
| 262 |
+
json_content=json_content,
|
| 263 |
images_zip=images_zip,
|
| 264 |
image_count=image_count,
|
| 265 |
pages_processed=pages_processed,
|
| 266 |
device_used="gpu",
|
| 267 |
vlm_model=QWEN_MODEL,
|
| 268 |
)
|
|
|
|
| 269 |
except httpx.HTTPError as e:
|
| 270 |
+
logger.error(f"[{request_id}] Download failed: {e}")
|
| 271 |
+
return ParseResponse(success=False, error=f"Failed to download file from URL (ref: {request_id})")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 272 |
except Exception as e:
|
| 273 |
total_duration = time.time() - start_time
|
| 274 |
+
logger.error(f"[{request_id}] URL request failed after {total_duration:.2f}s: {type(e).__name__}: {e}", exc_info=True)
|
| 275 |
+
return ParseResponse(success=False, error=f"Processing failed (ref: {request_id})")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 276 |
finally:
|
| 277 |
shutil.rmtree(temp_dir, ignore_errors=True)
|
| 278 |
|
config.py
CHANGED
|
@@ -1,9 +1,8 @@
|
|
| 1 |
-
"""Configuration, environment variables, and logging setup for the Qwen parser."""
|
| 2 |
|
| 3 |
import logging
|
| 4 |
import os
|
| 5 |
|
| 6 |
-
# Configure logging
|
| 7 |
logging.basicConfig(
|
| 8 |
level=logging.INFO,
|
| 9 |
format="%(asctime)s | %(levelname)-8s | %(message)s",
|
|
@@ -11,23 +10,25 @@ logging.basicConfig(
|
|
| 11 |
)
|
| 12 |
logger = logging.getLogger("docling-parser")
|
| 13 |
|
| 14 |
-
# Security
|
| 15 |
API_TOKEN = os.getenv("API_TOKEN")
|
| 16 |
|
| 17 |
-
# Configuration
|
| 18 |
IMAGES_SCALE = float(os.getenv("IMAGES_SCALE", "2.0"))
|
| 19 |
MAX_FILE_SIZE_MB = int(os.getenv("MAX_FILE_SIZE_MB", "1024"))
|
| 20 |
MAX_FILE_SIZE_BYTES = MAX_FILE_SIZE_MB * 1024 * 1024
|
| 21 |
RENDER_DPI = int(os.getenv("RENDER_DPI", "200"))
|
| 22 |
|
| 23 |
-
QWEN_MODEL = os.getenv("QWEN_MODEL", "
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
-
# Blocked hostnames for SSRF protection
|
| 31 |
BLOCKED_HOSTNAMES = {
|
| 32 |
"localhost",
|
| 33 |
"metadata",
|
|
|
|
| 1 |
+
"""Configuration, environment variables, and logging setup for the Unsloth Qwen parser."""
|
| 2 |
|
| 3 |
import logging
|
| 4 |
import os
|
| 5 |
|
|
|
|
| 6 |
logging.basicConfig(
|
| 7 |
level=logging.INFO,
|
| 8 |
format="%(asctime)s | %(levelname)-8s | %(message)s",
|
|
|
|
| 10 |
)
|
| 11 |
logger = logging.getLogger("docling-parser")
|
| 12 |
|
|
|
|
| 13 |
API_TOKEN = os.getenv("API_TOKEN")
|
| 14 |
|
|
|
|
| 15 |
IMAGES_SCALE = float(os.getenv("IMAGES_SCALE", "2.0"))
|
| 16 |
MAX_FILE_SIZE_MB = int(os.getenv("MAX_FILE_SIZE_MB", "1024"))
|
| 17 |
MAX_FILE_SIZE_BYTES = MAX_FILE_SIZE_MB * 1024 * 1024
|
| 18 |
RENDER_DPI = int(os.getenv("RENDER_DPI", "200"))
|
| 19 |
|
| 20 |
+
QWEN_MODEL = os.getenv("QWEN_MODEL", "unsloth/Qwen3-VL-8B-Instruct-GGUF")
|
| 21 |
+
LLAMA_SERVER_URL = os.getenv("LLAMA_SERVER_URL", "http://127.0.0.1:8080")
|
| 22 |
+
LLAMA_SERVER_TIMEOUT = float(os.getenv("LLAMA_SERVER_TIMEOUT", "300"))
|
| 23 |
+
LLAMA_MAX_TOKENS = int(os.getenv("LLAMA_MAX_TOKENS", "1536"))
|
| 24 |
+
LLAMA_HF_REPO = os.getenv("LLAMA_HF_REPO", "unsloth/Qwen3-VL-8B-Instruct-GGUF")
|
| 25 |
+
LLAMA_HF_FILE = os.getenv("LLAMA_HF_FILE", "Qwen3-VL-8B-Instruct-UD-Q4_K_XL.gguf")
|
| 26 |
+
LLAMA_MMPROJ_FILE = os.getenv("LLAMA_MMPROJ_FILE", "mmproj-F16.gguf")
|
| 27 |
+
LLAMA_CTX_SIZE = int(os.getenv("LLAMA_CTX_SIZE", "8192"))
|
| 28 |
+
LLAMA_GPU_LAYERS = int(os.getenv("LLAMA_GPU_LAYERS", "99"))
|
| 29 |
+
LLAMA_THREADS = int(os.getenv("LLAMA_THREADS", "8"))
|
| 30 |
+
LLAMA_FLASH_ATTN = os.getenv("LLAMA_FLASH_ATTN", "on")
|
| 31 |
|
|
|
|
| 32 |
BLOCKED_HOSTNAMES = {
|
| 33 |
"localhost",
|
| 34 |
"metadata",
|
pipeline.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
"""Qwen3-VL pipeline
|
| 2 |
|
| 3 |
import base64
|
| 4 |
import io
|
|
@@ -7,25 +7,18 @@ import zipfile
|
|
| 7 |
from pathlib import Path
|
| 8 |
from typing import BinaryIO, Optional
|
| 9 |
|
| 10 |
-
import
|
| 11 |
-
from PIL import Image
|
| 12 |
-
from transformers import AutoProcessor, Qwen3VLForConditionalGeneration
|
| 13 |
|
| 14 |
from config import (
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
QWEN_MAX_NEW_TOKENS,
|
| 19 |
QWEN_MODEL,
|
| 20 |
-
QWEN_TORCH_DTYPE,
|
| 21 |
logger,
|
| 22 |
)
|
| 23 |
from postprocess import _post_process_merged_markdown
|
| 24 |
from rendering import _image_file_to_png_bytes, _pdf_to_page_images
|
| 25 |
|
| 26 |
-
_model = None
|
| 27 |
-
_processor = None
|
| 28 |
-
|
| 29 |
_OCR_PROMPT = (
|
| 30 |
"Convert this document page to clean markdown.\n\n"
|
| 31 |
"Rules:\n"
|
|
@@ -39,50 +32,9 @@ _OCR_PROMPT = (
|
|
| 39 |
)
|
| 40 |
|
| 41 |
|
| 42 |
-
def
|
| 43 |
-
"""
|
| 44 |
-
|
| 45 |
-
"auto": "auto",
|
| 46 |
-
"bfloat16": torch.bfloat16,
|
| 47 |
-
"float16": torch.float16,
|
| 48 |
-
"float32": torch.float32,
|
| 49 |
-
}
|
| 50 |
-
return dtype_map.get(QWEN_TORCH_DTYPE.lower(), "auto")
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
def _get_pipeline() -> tuple[Qwen3VLForConditionalGeneration, AutoProcessor]:
|
| 54 |
-
"""Get or create the global Qwen3-VL pipeline."""
|
| 55 |
-
global _model, _processor
|
| 56 |
-
if _model is None or _processor is None:
|
| 57 |
-
logger.info(f"Loading Qwen model: {QWEN_MODEL}")
|
| 58 |
-
_processor = AutoProcessor.from_pretrained(QWEN_MODEL, trust_remote_code=True)
|
| 59 |
-
model_kwargs = {
|
| 60 |
-
"torch_dtype": _resolve_torch_dtype(),
|
| 61 |
-
"device_map": "auto",
|
| 62 |
-
"trust_remote_code": True,
|
| 63 |
-
}
|
| 64 |
-
if QWEN_ATTN_IMPLEMENTATION and QWEN_ATTN_IMPLEMENTATION.lower() != "none":
|
| 65 |
-
model_kwargs["attn_implementation"] = QWEN_ATTN_IMPLEMENTATION
|
| 66 |
-
try:
|
| 67 |
-
_model = Qwen3VLForConditionalGeneration.from_pretrained(
|
| 68 |
-
QWEN_MODEL,
|
| 69 |
-
**model_kwargs,
|
| 70 |
-
)
|
| 71 |
-
except Exception as e:
|
| 72 |
-
if "attn_implementation" in model_kwargs:
|
| 73 |
-
logger.warning(
|
| 74 |
-
f"Failed to load Qwen with attn_implementation={QWEN_ATTN_IMPLEMENTATION}: {e}. "
|
| 75 |
-
"Retrying without custom attention."
|
| 76 |
-
)
|
| 77 |
-
model_kwargs.pop("attn_implementation", None)
|
| 78 |
-
_model = Qwen3VLForConditionalGeneration.from_pretrained(
|
| 79 |
-
QWEN_MODEL,
|
| 80 |
-
**model_kwargs,
|
| 81 |
-
)
|
| 82 |
-
else:
|
| 83 |
-
raise
|
| 84 |
-
_model.eval()
|
| 85 |
-
return _model, _processor
|
| 86 |
|
| 87 |
|
| 88 |
def _save_uploaded_file(input_path: Path, file_obj: BinaryIO) -> None:
|
|
@@ -117,98 +69,6 @@ def _create_images_zip(output_dir: Path) -> tuple[Optional[str], int]:
|
|
| 117 |
return base64.b64encode(zip_buffer.getvalue()).decode("utf-8"), image_count
|
| 118 |
|
| 119 |
|
| 120 |
-
def _resize_image(image: Image.Image) -> Image.Image:
|
| 121 |
-
"""Downscale images to reduce visual token count and generation latency."""
|
| 122 |
-
max_side = max(image.size)
|
| 123 |
-
if max_side <= QWEN_IMAGE_MAX_SIDE:
|
| 124 |
-
return image
|
| 125 |
-
|
| 126 |
-
scale = QWEN_IMAGE_MAX_SIDE / max_side
|
| 127 |
-
new_size = (
|
| 128 |
-
max(1, int(image.size[0] * scale)),
|
| 129 |
-
max(1, int(image.size[1] * scale)),
|
| 130 |
-
)
|
| 131 |
-
return image.resize(new_size, Image.Resampling.LANCZOS)
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
def _extract_markdown_from_images(
|
| 135 |
-
page_images: list[tuple[int, bytes]],
|
| 136 |
-
request_id: str,
|
| 137 |
-
) -> dict[int, str]:
|
| 138 |
-
"""Run a batch of page images through Qwen3-VL."""
|
| 139 |
-
model, processor = _get_pipeline()
|
| 140 |
-
prompt_texts: list[str] = []
|
| 141 |
-
images: list[Image.Image] = []
|
| 142 |
-
page_indices: list[int] = []
|
| 143 |
-
|
| 144 |
-
for page_idx, image_bytes in page_images:
|
| 145 |
-
image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
|
| 146 |
-
image = _resize_image(image)
|
| 147 |
-
messages = [
|
| 148 |
-
{
|
| 149 |
-
"role": "user",
|
| 150 |
-
"content": [
|
| 151 |
-
{"type": "image", "image": image},
|
| 152 |
-
{"type": "text", "text": _OCR_PROMPT},
|
| 153 |
-
],
|
| 154 |
-
}
|
| 155 |
-
]
|
| 156 |
-
prompt_texts.append(
|
| 157 |
-
processor.apply_chat_template(
|
| 158 |
-
messages,
|
| 159 |
-
tokenize=False,
|
| 160 |
-
add_generation_prompt=True,
|
| 161 |
-
)
|
| 162 |
-
)
|
| 163 |
-
images.append(image)
|
| 164 |
-
page_indices.append(page_idx)
|
| 165 |
-
|
| 166 |
-
inputs = processor(
|
| 167 |
-
text=prompt_texts,
|
| 168 |
-
images=images,
|
| 169 |
-
padding=True,
|
| 170 |
-
return_tensors="pt",
|
| 171 |
-
)
|
| 172 |
-
|
| 173 |
-
device = next(model.parameters()).device
|
| 174 |
-
model_inputs = {
|
| 175 |
-
key: value.to(device) if hasattr(value, "to") else value
|
| 176 |
-
for key, value in inputs.items()
|
| 177 |
-
}
|
| 178 |
-
|
| 179 |
-
with torch.inference_mode():
|
| 180 |
-
generated_ids = model.generate(
|
| 181 |
-
**model_inputs,
|
| 182 |
-
max_new_tokens=QWEN_MAX_NEW_TOKENS,
|
| 183 |
-
do_sample=False,
|
| 184 |
-
)
|
| 185 |
-
|
| 186 |
-
input_lengths = model_inputs["attention_mask"].sum(dim=1).tolist()
|
| 187 |
-
decoded_pages: dict[int, str] = {}
|
| 188 |
-
for row_idx, prompt_length in enumerate(input_lengths):
|
| 189 |
-
output_ids = generated_ids[row_idx : row_idx + 1, int(prompt_length) :]
|
| 190 |
-
text = processor.batch_decode(
|
| 191 |
-
output_ids,
|
| 192 |
-
skip_special_tokens=True,
|
| 193 |
-
clean_up_tokenization_spaces=False,
|
| 194 |
-
)[0].strip()
|
| 195 |
-
page_idx = page_indices[row_idx]
|
| 196 |
-
decoded_pages[page_idx] = text
|
| 197 |
-
logger.info(f"[{request_id}:page:{page_idx + 1}] Qwen generated {len(text)} chars")
|
| 198 |
-
|
| 199 |
-
return decoded_pages
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
def _extract_markdown_from_image(
|
| 203 |
-
image_bytes: bytes,
|
| 204 |
-
page_label: str,
|
| 205 |
-
) -> str:
|
| 206 |
-
"""Backwards-compatible single-image wrapper."""
|
| 207 |
-
page_idx = 0
|
| 208 |
-
page_map = _extract_markdown_from_images([(page_idx, image_bytes)], page_label)
|
| 209 |
-
return page_map[page_idx]
|
| 210 |
-
|
| 211 |
-
|
| 212 |
def _collect_page_images(
|
| 213 |
input_path: Path,
|
| 214 |
request_id: str,
|
|
@@ -228,6 +88,39 @@ def _collect_page_images(
|
|
| 228 |
return [(0, _image_file_to_png_bytes(input_path))]
|
| 229 |
|
| 230 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 231 |
def _convert_document(
|
| 232 |
input_path: Path,
|
| 233 |
output_dir: Path,
|
|
@@ -236,17 +129,15 @@ def _convert_document(
|
|
| 236 |
start_page: int = 0,
|
| 237 |
end_page: Optional[int] = None,
|
| 238 |
) -> tuple:
|
| 239 |
-
"""Render pages and parse them with
|
| 240 |
page_images = _collect_page_images(input_path, request_id, start_page, end_page)
|
| 241 |
if not page_images:
|
| 242 |
raise ValueError("No pages available to parse")
|
| 243 |
|
| 244 |
markdown_pages: list[str] = []
|
| 245 |
-
for
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
for page_idx, _ in batch:
|
| 249 |
-
markdown_pages.append(batch_outputs.get(page_idx, ""))
|
| 250 |
|
| 251 |
markdown_content = "\n\n".join(p for p in markdown_pages if p).strip()
|
| 252 |
markdown_content = _post_process_merged_markdown(markdown_content)
|
|
|
|
| 1 |
+
"""Unsloth GGUF Qwen3-VL pipeline and file helpers."""
|
| 2 |
|
| 3 |
import base64
|
| 4 |
import io
|
|
|
|
| 7 |
from pathlib import Path
|
| 8 |
from typing import BinaryIO, Optional
|
| 9 |
|
| 10 |
+
import httpx
|
|
|
|
|
|
|
| 11 |
|
| 12 |
from config import (
|
| 13 |
+
LLAMA_MAX_TOKENS,
|
| 14 |
+
LLAMA_SERVER_TIMEOUT,
|
| 15 |
+
LLAMA_SERVER_URL,
|
|
|
|
| 16 |
QWEN_MODEL,
|
|
|
|
| 17 |
logger,
|
| 18 |
)
|
| 19 |
from postprocess import _post_process_merged_markdown
|
| 20 |
from rendering import _image_file_to_png_bytes, _pdf_to_page_images
|
| 21 |
|
|
|
|
|
|
|
|
|
|
| 22 |
_OCR_PROMPT = (
|
| 23 |
"Convert this document page to clean markdown.\n\n"
|
| 24 |
"Rules:\n"
|
|
|
|
| 32 |
)
|
| 33 |
|
| 34 |
|
| 35 |
+
def _get_pipeline() -> str:
|
| 36 |
+
"""Compatibility helper for app startup."""
|
| 37 |
+
return LLAMA_SERVER_URL
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
|
| 40 |
def _save_uploaded_file(input_path: Path, file_obj: BinaryIO) -> None:
|
|
|
|
| 69 |
return base64.b64encode(zip_buffer.getvalue()).decode("utf-8"), image_count
|
| 70 |
|
| 71 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
def _collect_page_images(
|
| 73 |
input_path: Path,
|
| 74 |
request_id: str,
|
|
|
|
| 88 |
return [(0, _image_file_to_png_bytes(input_path))]
|
| 89 |
|
| 90 |
|
| 91 |
+
def _call_llama_server(image_bytes: bytes, page_label: str) -> str:
|
| 92 |
+
"""Send a page image to the local llama.cpp OpenAI-compatible server."""
|
| 93 |
+
image_b64 = base64.b64encode(image_bytes).decode("utf-8")
|
| 94 |
+
payload = {
|
| 95 |
+
"model": QWEN_MODEL,
|
| 96 |
+
"messages": [
|
| 97 |
+
{
|
| 98 |
+
"role": "user",
|
| 99 |
+
"content": [
|
| 100 |
+
{"type": "text", "text": _OCR_PROMPT},
|
| 101 |
+
{
|
| 102 |
+
"type": "image_url",
|
| 103 |
+
"image_url": {"url": f"data:image/png;base64,{image_b64}"},
|
| 104 |
+
},
|
| 105 |
+
],
|
| 106 |
+
}
|
| 107 |
+
],
|
| 108 |
+
"temperature": 0.0,
|
| 109 |
+
"max_tokens": LLAMA_MAX_TOKENS,
|
| 110 |
+
}
|
| 111 |
+
|
| 112 |
+
response = httpx.post(
|
| 113 |
+
f"{LLAMA_SERVER_URL}/v1/chat/completions",
|
| 114 |
+
json=payload,
|
| 115 |
+
timeout=LLAMA_SERVER_TIMEOUT,
|
| 116 |
+
)
|
| 117 |
+
response.raise_for_status()
|
| 118 |
+
data = response.json()
|
| 119 |
+
text = data.get("choices", [{}])[0].get("message", {}).get("content", "").strip()
|
| 120 |
+
logger.info(f"[{page_label}] llama-server generated {len(text)} chars")
|
| 121 |
+
return text
|
| 122 |
+
|
| 123 |
+
|
| 124 |
def _convert_document(
|
| 125 |
input_path: Path,
|
| 126 |
output_dir: Path,
|
|
|
|
| 129 |
start_page: int = 0,
|
| 130 |
end_page: Optional[int] = None,
|
| 131 |
) -> tuple:
|
| 132 |
+
"""Render pages and parse them with the local Unsloth GGUF server."""
|
| 133 |
page_images = _collect_page_images(input_path, request_id, start_page, end_page)
|
| 134 |
if not page_images:
|
| 135 |
raise ValueError("No pages available to parse")
|
| 136 |
|
| 137 |
markdown_pages: list[str] = []
|
| 138 |
+
for page_idx, image_bytes in page_images:
|
| 139 |
+
page_label = f"{request_id}:page:{page_idx + 1}"
|
| 140 |
+
markdown_pages.append(_call_llama_server(image_bytes, page_label))
|
|
|
|
|
|
|
| 141 |
|
| 142 |
markdown_content = "\n\n".join(p for p in markdown_pages if p).strip()
|
| 143 |
markdown_content = _post_process_merged_markdown(markdown_content)
|
requirements.txt
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
# Qwen3-VL parser API dependencies
|
| 2 |
fastapi>=0.115.0
|
| 3 |
uvicorn[standard]>=0.32.0
|
| 4 |
python-multipart>=0.0.9
|
|
@@ -7,8 +7,3 @@ pydantic>=2.0.0
|
|
| 7 |
opencv-python-headless>=4.10.0
|
| 8 |
pdf2image>=1.17.0
|
| 9 |
huggingface-hub>=0.25.0
|
| 10 |
-
Pillow>=10.0.0
|
| 11 |
-
accelerate>=0.34.0
|
| 12 |
-
torch>=2.4.0
|
| 13 |
-
torchvision>=0.19.0
|
| 14 |
-
transformers @ git+https://github.com/huggingface/transformers.git
|
|
|
|
| 1 |
+
# Unsloth GGUF Qwen3-VL parser API dependencies
|
| 2 |
fastapi>=0.115.0
|
| 3 |
uvicorn[standard]>=0.32.0
|
| 4 |
python-multipart>=0.0.9
|
|
|
|
| 7 |
opencv-python-headless>=4.10.0
|
| 8 |
pdf2image>=1.17.0
|
| 9 |
huggingface-hub>=0.25.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
start.sh
CHANGED
|
@@ -1,7 +1,15 @@
|
|
| 1 |
#!/bin/bash
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
-
# Start FastAPI
|
| 7 |
exec uvicorn app:app --host 0.0.0.0 --port 7860 --workers 1
|
|
|
|
| 1 |
#!/bin/bash
|
| 2 |
+
set -euo pipefail
|
| 3 |
+
|
| 4 |
+
/home/user/llama.cpp/build/bin/llama-server \
|
| 5 |
+
--host 0.0.0.0 \
|
| 6 |
+
--port 8080 \
|
| 7 |
+
--hf-repo "${LLAMA_HF_REPO}" \
|
| 8 |
+
--hf-file "${LLAMA_HF_FILE}" \
|
| 9 |
+
--mmproj "${LLAMA_MMPROJ_FILE}" \
|
| 10 |
+
--ctx-size "${LLAMA_CTX_SIZE}" \
|
| 11 |
+
--n-gpu-layers "${LLAMA_GPU_LAYERS}" \
|
| 12 |
+
--threads "${LLAMA_THREADS}" \
|
| 13 |
+
--flash-attn "${LLAMA_FLASH_ATTN}" &
|
| 14 |
|
|
|
|
| 15 |
exec uvicorn app:app --host 0.0.0.0 --port 7860 --workers 1
|