Docgenie-API / api /requirements.txt
Ahadhassan-2003
deploy: update HF Space
dc4e6da
# ============================================
# DocGenie API Requirements
# ============================================
# NOTE: These dependencies are also specified in the root pyproject.toml
# This file exists for standalone API deployment convenience
# For development, use: uv sync (from root directory)
# For production API-only deployment: pip install -r requirements.txt
# Aligned with pyproject.toml versions used to run pipeline locally
# FastAPI Framework
fastapi>=0.109.0
uvicorn[standard]>=0.27.0
python-multipart>=0.0.6
# Pydantic for data validation
pydantic==2.11.7
pydantic-core==2.33.2
pydantic-settings>=2.11.0
# Environment variables
python-dotenv>=1.0.0
# HTTP client for async requests
httpx==0.28.1
aiohttp==3.12.15
# Retry logic for external services
tenacity>=8.2.3
# Claude API
anthropic==0.64.0
# HTML rendering and PDF generation
playwright>=1.55.0
beautifulsoup4==4.13.4
lxml>=5.1.0
# PDF processing
PyMuPDF==1.26.3
pdf2image==1.17.0
pypdf2==3.0.1
# Image processing for Stage 3
Pillow==11.3.0
numpy==1.26.4
# CSS parsing for Stage 3
cssutils==2.11.1
# Progress bars and logging
rich==14.1.0
# Additional utilities
python-dateutil==2.9.0.post0
requests==2.32.5
# Background job queue (Redis + RQ)
redis>=5.0.0
rq>=1.15.0
# Supabase client for database
supabase>=2.0.0
# Google Drive API integration
google-api-python-client>=2.100.0
google-auth-httplib2>=0.2.0
google-auth-oauthlib>=1.2.0
# ============================================
# Optional dependencies for advanced features
# ============================================
# OCR support (requires system tesseract-ocr)
pytesseract>=0.3.10
# Barcode generation
python-barcode>=0.15.1
# Dataset export in msgpack format
datadings>=0.4.3
# Fuzzy matching for GT verification (Stage 17/18)
python-Levenshtein>=0.25.0