File size: 1,818 Bytes
dc4e6da
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# ============================================
# DocGenie API Requirements
# ============================================
# NOTE: These dependencies are also specified in the root pyproject.toml
# This file exists for standalone API deployment convenience
# For development, use: uv sync (from root directory)
# For production API-only deployment: pip install -r requirements.txt
# Aligned with pyproject.toml versions used to run pipeline locally

# FastAPI Framework
fastapi>=0.109.0
uvicorn[standard]>=0.27.0
python-multipart>=0.0.6

# Pydantic for data validation
pydantic==2.11.7
pydantic-core==2.33.2
pydantic-settings>=2.11.0

# Environment variables
python-dotenv>=1.0.0

# HTTP client for async requests
httpx==0.28.1
aiohttp==3.12.15

# Retry logic for external services
tenacity>=8.2.3

# Claude API
anthropic==0.64.0

# HTML rendering and PDF generation
playwright>=1.55.0
beautifulsoup4==4.13.4
lxml>=5.1.0

# PDF processing
PyMuPDF==1.26.3
pdf2image==1.17.0
pypdf2==3.0.1

# Image processing for Stage 3
Pillow==11.3.0
numpy==1.26.4

# CSS parsing for Stage 3
cssutils==2.11.1

# Progress bars and logging
rich==14.1.0

# Additional utilities
python-dateutil==2.9.0.post0
requests==2.32.5

# Background job queue (Redis + RQ)
redis>=5.0.0
rq>=1.15.0

# Supabase client for database
supabase>=2.0.0

# Google Drive API integration
google-api-python-client>=2.100.0
google-auth-httplib2>=0.2.0
google-auth-oauthlib>=1.2.0

# ============================================
# Optional dependencies for advanced features
# ============================================
# OCR support (requires system tesseract-ocr)
pytesseract>=0.3.10

# Barcode generation
python-barcode>=0.15.1

# Dataset export in msgpack format
datadings>=0.4.3

# Fuzzy matching for GT verification (Stage 17/18)
python-Levenshtein>=0.25.0