# ============================================================================= # pyproject.toml — single source of truth for the `captioning` Python package # ----------------------------------------------------------------------------- # This file follows PEP 621 (project metadata) and PEP 517/518 (build system). # It replaces a scattered mix of setup.py + requirements.txt + setup.cfg with # one canonical config. `pip install -e .` installs the package from `src/`. # # Why src/ layout? It prevents accidental imports of the package from the # repo root during testing — every test exercises the *installed* package, # the way users will actually import it. This is the layout used by the # Python Packaging Authority's example projects and recommended by pytest. # ============================================================================= [build-system] requires = ["setuptools>=68", "wheel"] build-backend = "setuptools.build_meta" # ----------------------------------------------------------------------------- # Project metadata — what `pip show captioning` will display. # ----------------------------------------------------------------------------- [project] name = "captioning" version = "0.1.0" description = "IEEE-published CNN+Transformer image captioning, restructured into a production-grade multimodal AI platform." readme = "README.md" requires-python = ">=3.10,<3.13" license = { text = "MIT" } authors = [ { name = "Apoorv Raj" }, ] keywords = [ "image-captioning", "multimodal", "transformer", "computer-vision", "tensorflow", "fastapi", ] classifiers = [ "Development Status :: 3 - Alpha", "Intended Audience :: Developers", "Intended Audience :: Science/Research", "License :: OSI Approved :: MIT License", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Topic :: Scientific/Engineering :: Artificial Intelligence", ] # ----------------------------------------------------------------------------- # Runtime dependencies for the core ML library + FastAPI backend. # Pinned hard. Floating versions across TF + transformers + torch is the most # common source of silent BLEU drift between training runs and deployments. # # - tensorflow-cpu (NOT tensorflow): saves ~600 MB and removes the CUDA driver # dependency. We are deploying to CPU-only HuggingFace Spaces. If you ever # train on a GPU box, install `tensorflow==2.15.0` in that environment only. # - 2.15.0 specifically: TF 2.16 swapped to Keras 3 by default and broke the # `tf.keras.layers.TextVectorization` saving behaviour the IEEE notebook # relies on. Stay on 2.15 for v1; upgrade is a deliberate Phase-5+ task. # - pydantic 2.x: required by FastAPI >= 0.100. Faster and stricter than v1. # ----------------------------------------------------------------------------- dependencies = [ "tensorflow-cpu==2.15.0", "numpy>=1.26,<2.0", # NumPy 2.0 broke TF 2.15 binary compat "pandas>=2.1,<3.0", "pillow>=10.0,<11.0", "pyyaml>=6.0,<7.0", "pydantic>=2.7,<3.0", "pydantic-settings>=2.3,<3.0", "fastapi>=0.111,<1.0", "uvicorn[standard]>=0.30,<1.0", "python-multipart>=0.0.9", # FastAPI multipart form data (image upload) "huggingface-hub>=0.23,<1.0", # Pulls weights from HF Hub at startup "structlog>=24.1,<25.0", # Structured JSON logs in prod, pretty in dev "anyio>=4.3,<5.0", # Thread-pool offload for sync TF inference "tqdm>=4.66,<5.0", "click>=8.1,<9.0", # CLI for scripts/ ] # ----------------------------------------------------------------------------- # Optional dependency groups — installed via `pip install ".[dev,eval]"`. # Splitting these keeps the production Docker image small (Phase 1 backend # image is ~1.1 GB; adding `hf` extras takes it to ~2.3 GB which is the # Phase 3 comparison image). # ----------------------------------------------------------------------------- [project.optional-dependencies] # Tier-1 multimodal upgrade: BLIP, ViT-GPT2, GIT models from HuggingFace. # torch CPU is large (~700 MB); only install when serving the comparison demo. hf = [ "transformers==4.41.2", "torch==2.3.0", "sentencepiece>=0.2.0", "accelerate>=0.30,<1.0", ] # Evaluation metrics. Pulled separately because pycocoevalcap drags Java # dependencies (METEOR), which we don't want in the serving image. eval = [ "sacrebleu>=2.4,<3.0", "nltk>=3.8,<4.0", "rouge-score>=0.1.2", "pycocoevalcap>=1.2", "matplotlib>=3.8,<4.0", ] # Experiment tracking. Local SQLite by default; points at DagsHub in prod. mlflow = [ "mlflow>=2.13,<3.0", ] # Developer tooling: lint, type-check, test. Never deployed. dev = [ "ruff>=0.5,<1.0", "mypy>=1.10,<2.0", "pytest>=8.2,<9.0", "pytest-cov>=5.0,<6.0", "pytest-asyncio>=0.23,<1.0", "httpx>=0.27,<1.0", # FastAPI TestClient backend "pre-commit>=3.7,<4.0", "nbstripout>=0.7,<1.0", "types-PyYAML", "types-requests", "pandas-stubs>=2.2,<3.0", ] # ----------------------------------------------------------------------------- # Where pip should install the package from (the `src/` layout). # ----------------------------------------------------------------------------- [tool.setuptools.packages.find] where = ["src"] include = ["captioning*"] [tool.setuptools.package-data] "captioning" = ["py.typed"] # PEP 561: ship type hints with the package # ============================================================================= # Tooling configuration — co-located so a single file owns project policy. # ============================================================================= # ---- Ruff: linter + formatter (replaces black + isort + flake8) ------------- # We prefer Ruff because it runs ~100x faster and is the de-facto modern # default in the Python ecosystem. One tool, one config, one cache. [tool.ruff] line-length = 100 target-version = "py310" src = ["src", "backend", "scripts", "tests"] extend-exclude = [ "notebooks", # Notebooks have their own conventions "outputs", "mlruns", "frontend", ] [tool.ruff.lint] # Curated rule set — pragmatic defaults, not the full strict catalogue. select = [ "E", # pycodestyle errors "W", # pycodestyle warnings "F", # pyflakes "I", # isort import sorting "B", # flake8-bugbear (likely bugs) "UP", # pyupgrade (modern syntax) "SIM", # flake8-simplify "RET", # flake8-return "PTH", # flake8-use-pathlib (prefer pathlib over os.path) "RUF", # Ruff's own rules ] ignore = [ "E501", # line length — formatter handles it; lint warnings are noise "B008", # function call in default arg (FastAPI's Depends() pattern) ] [tool.ruff.lint.per-file-ignores] "tests/**" = ["B011"] # asserts in tests are fine "scripts/**" = ["T201"] # print() in CLI scripts is fine [tool.ruff.format] quote-style = "double" indent-style = "space" docstring-code-format = true # ---- Mypy: static type checker ----------------------------------------------- # We only enforce types on our own code; third-party untyped libs are tolerated. [tool.mypy] python_version = "3.10" strict = false # Start lenient; tighten as types stabilise warn_unused_configs = true warn_redundant_casts = true warn_unused_ignores = true warn_no_return = true no_implicit_optional = true files = ["src/captioning", "backend/app", "scripts"] mypy_path = ["src", "backend"] explicit_package_bases = true namespace_packages = true [[tool.mypy.overrides]] module = [ "tensorflow.*", "transformers.*", "huggingface_hub.*", "PIL.*", "nltk.*", "sacrebleu.*", "rouge_score.*", "pycocoevalcap.*", ] ignore_missing_imports = true # ---- Pytest ------------------------------------------------------------------- [tool.pytest.ini_options] minversion = "8.0" testpaths = ["tests", "backend/app/tests"] pythonpath = ["backend"] # Lets `from app.* import ...` resolve in tests addopts = [ "-ra", # Show short summary for non-passing tests "--strict-markers", "--strict-config", "--showlocals", ] markers = [ "slow: tests that take >10 seconds (run with -m slow)", "gpu: tests requiring a GPU (skipped in CI by default)", ] filterwarnings = [ "ignore::DeprecationWarning:tensorflow.*", "ignore::FutureWarning:tensorflow.*", ] # ---- Coverage ----------------------------------------------------------------- [tool.coverage.run] branch = true source = ["src/captioning", "backend/app"] omit = ["*/tests/*", "*/__init__.py"] [tool.coverage.report] exclude_lines = [ "pragma: no cover", "raise NotImplementedError", "if TYPE_CHECKING:", "if __name__ == .__main__.:", ] show_missing = true skip_covered = false