Spaces:
Running on CPU Upgrade
Running on CPU Upgrade
| # Core Dependencies | |
| python-dotenv==1.0.0 | |
| pydantic==2.5.0 | |
| pydantic-settings==2.1.0 | |
| # Web Scraping & Data Collection | |
| beautifulsoup4==4.12.2 | |
| selenium==4.16.0 | |
| playwright==1.40.0 | |
| httpx>=0.25.0 | |
| aiohttp==3.9.1 | |
| youtube-transcript-api>=0.6.2 | |
| # Document Processing (Multiple Formats) | |
| PyPDF2>=3.0.0 # PDF extraction | |
| pdfplumber>=0.10.0 # Advanced PDF extraction (tables) | |
| python-pptx>=0.6.0 # PowerPoint extraction | |
| python-docx>=1.0.0 # Word document extraction | |
| openpyxl>=3.1.0 # Excel extraction | |
| pytesseract>=0.3.0 # OCR for scanned documents (optional) | |
| Pillow>=10.0.0 # Image processing for OCR | |
| # AI & LLM | |
| openai>=1.6.0 | |
| anthropic>=0.8.0 | |
| langchain>=0.1.0 | |
| langchain-openai>=0.0.2 | |
| langchain-community>=0.0.10 | |
| langgraph>=0.0.20 | |
| langchain-core>=0.1.0 | |
| tiktoken>=0.5.0 | |
| # Model Context Protocol (MCP) - Optional, for local MCP server only | |
| # mcp>=0.1.0 | |
| # Data Processing & Lakehouse | |
| databricks-sdk==0.18.0 | |
| delta-spark==3.0.0 | |
| pyspark==3.5.0 | |
| pandas==2.1.4 | |
| polars==0.20.2 | |
| pyarrow==14.0.2 | |
| duckdb>=0.9.0 | |
| # NLP & Text Analysis | |
| spacy==3.7.2 | |
| transformers==4.36.2 | |
| sentence-transformers==2.2.2 | |
| nltk==3.8.1 | |
| datasets>=2.16.0 | |
| huggingface-hub>=0.20.0 | |
| # Database & Storage | |
| sqlalchemy>=2.0.0 | |
| chromadb>=0.4.0 | |
| qdrant-client>=1.7.0 | |
| # API & Web Framework | |
| fastapi==0.109.0 | |
| uvicorn[standard]==0.25.0 | |
| pydantic[email]==2.5.0 | |
| python-multipart==0.0.6 | |
| # Authentication & OAuth | |
| python-jose[cryptography]==3.3.0 # JWT tokens | |
| passlib[bcrypt]==1.7.4 # Password hashing | |
| itsdangerous==2.1.2 # Secure tokens | |
| psycopg2-binary==2.9.9 # PostgreSQL driver | |
| asyncpg==0.29.0 # Async PostgreSQL driver (for Neon) | |
| # Visualization | |
| plotly==5.18.0 | |
| folium==0.15.1 | |
| matplotlib==3.8.2 | |
| seaborn==0.13.1 | |
| # Utilities | |
| python-dateutil==2.8.2 | |
| pytz==2023.3 | |
| tenacity==8.2.3 | |
| loguru==0.7.2 | |
| click==8.1.7 | |
| tqdm==4.66.1 | |
| # Testing | |
| pytest>=7.4.0 | |
| pytest-asyncio>=0.21.0 | |
| pytest-cov>=4.1.0 | |
| pytest-mock>=3.12.0 | |
| pytest-httpx>=0.27.0 | |
| # Development | |
| black>=23.0.0 | |
| ruff>=0.1.0 | |
| mypy>=1.8.0 | |
| pre-commit>=3.6.0 | |
| # Optional: Google Data Commons Integration | |
| # Recommended for jurisdiction enrichment (replaces manual Census API calls) | |
| # Install with: pip install datacommons datacommons-pandas | |
| # Documentation: https://docs.datacommons.org/api/ | |
| # datacommons>=1.4.0 | |
| # datacommons-pandas>=0.1.0 | |