| # OCR and PDF processing dependencies | |
| pytesseract==0.3.10 | |
| Pillow==10.0.1 | |
| PyMuPDF==1.23.8 | |
| opencv-python==4.8.1.78 | |
| pdf2image==1.16.3 | |
| python-dotenv==1.0.0 | |
| # Tesseract OCR engine (install system-wide) | |
| # On macOS: brew install tesseract | |
| # On Ubuntu: sudo apt-get install tesseract-ocr | |
| # Additional dependencies for text processing | |
| nltk==3.8.1 | |
| spacy==3.7.2 | |
| # python -m spacy download en_core_web_sm |