Spaces:

apoorvrajdev
/

image-captioning-api

Configuration error

App Files Files Community

image-captioning-api / pyproject.toml

apoorvrajdev

fix(ci): use list-form mypy_path and switch frontend job to npm install

242c2a7 3 days ago

raw

history blame contribute delete

8.97 kB

	# =============================================================================
	# pyproject.toml — single source of truth for the `captioning` Python package
	# -----------------------------------------------------------------------------
	# This file follows PEP 621 (project metadata) and PEP 517/518 (build system).
	# It replaces a scattered mix of setup.py + requirements.txt + setup.cfg with
	# one canonical config. `pip install -e .` installs the package from `src/`.
	#
	# Why src/ layout? It prevents accidental imports of the package from the
	# repo root during testing — every test exercises the installed package,
	# the way users will actually import it. This is the layout used by the
	# Python Packaging Authority's example projects and recommended by pytest.
	# =============================================================================

	[build-system]
	requires = ["setuptools>=68", "wheel"]
	build-backend = "setuptools.build_meta"

	# -----------------------------------------------------------------------------
	# Project metadata — what `pip show captioning` will display.
	# -----------------------------------------------------------------------------
	[project]
	name = "captioning"
	version = "0.1.0"
	description = "IEEE-published CNN+Transformer image captioning, restructured into a production-grade multimodal AI platform."
	readme = "README.md"
	requires-python = ">=3.10,<3.13"
	license = { text = "MIT" }
	authors = [
	{ name = "Apoorv Raj" },
	]
	keywords = [
	"image-captioning",
	"multimodal",
	"transformer",
	"computer-vision",
	"tensorflow",
	"fastapi",
	]
	classifiers = [
	"Development Status :: 3 - Alpha",
	"Intended Audience :: Developers",
	"Intended Audience :: Science/Research",
	"License :: OSI Approved :: MIT License",
	"Programming Language :: Python :: 3",
	"Programming Language :: Python :: 3.10",
	"Programming Language :: Python :: 3.11",
	"Programming Language :: Python :: 3.12",
	"Topic :: Scientific/Engineering :: Artificial Intelligence",
	]

	# -----------------------------------------------------------------------------
	# Runtime dependencies for the core ML library + FastAPI backend.
	# Pinned hard. Floating versions across TF + transformers + torch is the most
	# common source of silent BLEU drift between training runs and deployments.
	#
	# - tensorflow-cpu (NOT tensorflow): saves ~600 MB and removes the CUDA driver
	# dependency. We are deploying to CPU-only HuggingFace Spaces. If you ever
	# train on a GPU box, install `tensorflow==2.15.0` in that environment only.
	# - 2.15.0 specifically: TF 2.16 swapped to Keras 3 by default and broke the
	# `tf.keras.layers.TextVectorization` saving behaviour the IEEE notebook
	# relies on. Stay on 2.15 for v1; upgrade is a deliberate Phase-5+ task.
	# - pydantic 2.x: required by FastAPI >= 0.100. Faster and stricter than v1.
	# -----------------------------------------------------------------------------
	dependencies = [
	"tensorflow-cpu==2.15.0",
	"numpy>=1.26,<2.0", # NumPy 2.0 broke TF 2.15 binary compat
	"pandas>=2.1,<3.0",
	"pillow>=10.0,<11.0",
	"pyyaml>=6.0,<7.0",
	"pydantic>=2.7,<3.0",
	"pydantic-settings>=2.3,<3.0",
	"fastapi>=0.111,<1.0",
	"uvicorn[standard]>=0.30,<1.0",
	"python-multipart>=0.0.9", # FastAPI multipart form data (image upload)
	"huggingface-hub>=0.23,<1.0", # Pulls weights from HF Hub at startup
	"structlog>=24.1,<25.0", # Structured JSON logs in prod, pretty in dev
	"anyio>=4.3,<5.0", # Thread-pool offload for sync TF inference
	"tqdm>=4.66,<5.0",
	"click>=8.1,<9.0", # CLI for scripts/
	]

	# -----------------------------------------------------------------------------
	# Optional dependency groups — installed via `pip install ".[dev,eval]"`.
	# Splitting these keeps the production Docker image small (Phase 1 backend
	# image is ~1.1 GB; adding `hf` extras takes it to ~2.3 GB which is the
	# Phase 3 comparison image).
	# -----------------------------------------------------------------------------
	[project.optional-dependencies]

	# Tier-1 multimodal upgrade: BLIP, ViT-GPT2, GIT models from HuggingFace.
	# torch CPU is large (~700 MB); only install when serving the comparison demo.
	hf = [
	"transformers==4.41.2",
	"torch==2.3.0",
	"sentencepiece>=0.2.0",
	"accelerate>=0.30,<1.0",
	]

	# Evaluation metrics. Pulled separately because pycocoevalcap drags Java
	# dependencies (METEOR), which we don't want in the serving image.
	eval = [
	"sacrebleu>=2.4,<3.0",
	"nltk>=3.8,<4.0",
	"rouge-score>=0.1.2",
	"pycocoevalcap>=1.2",
	"matplotlib>=3.8,<4.0",
	]

	# Experiment tracking. Local SQLite by default; points at DagsHub in prod.
	mlflow = [
	"mlflow>=2.13,<3.0",
	]

	# Developer tooling: lint, type-check, test. Never deployed.
	dev = [
	"ruff>=0.5,<1.0",
	"mypy>=1.10,<2.0",
	"pytest>=8.2,<9.0",
	"pytest-cov>=5.0,<6.0",
	"pytest-asyncio>=0.23,<1.0",
	"httpx>=0.27,<1.0", # FastAPI TestClient backend
	"pre-commit>=3.7,<4.0",
	"nbstripout>=0.7,<1.0",
	"types-PyYAML",
	"types-requests",
	"pandas-stubs>=2.2,<3.0",
	]

	# -----------------------------------------------------------------------------
	# Where pip should install the package from (the `src/` layout).
	# -----------------------------------------------------------------------------
	[tool.setuptools.packages.find]
	where = ["src"]
	include = ["captioning*"]

	[tool.setuptools.package-data]
	"captioning" = ["py.typed"] # PEP 561: ship type hints with the package

	# =============================================================================
	# Tooling configuration — co-located so a single file owns project policy.
	# =============================================================================

	# ---- Ruff: linter + formatter (replaces black + isort + flake8) -------------
	# We prefer Ruff because it runs ~100x faster and is the de-facto modern
	# default in the Python ecosystem. One tool, one config, one cache.
	[tool.ruff]
	line-length = 100
	target-version = "py310"
	src = ["src", "backend", "scripts", "tests"]
	extend-exclude = [
	"notebooks", # Notebooks have their own conventions
	"outputs",
	"mlruns",
	"frontend",
	]

	[tool.ruff.lint]
	# Curated rule set — pragmatic defaults, not the full strict catalogue.
	select = [
	"E", # pycodestyle errors
	"W", # pycodestyle warnings
	"F", # pyflakes
	"I", # isort import sorting
	"B", # flake8-bugbear (likely bugs)
	"UP", # pyupgrade (modern syntax)
	"SIM", # flake8-simplify
	"RET", # flake8-return
	"PTH", # flake8-use-pathlib (prefer pathlib over os.path)
	"RUF", # Ruff's own rules
	]
	ignore = [
	"E501", # line length — formatter handles it; lint warnings are noise
	"B008", # function call in default arg (FastAPI's Depends() pattern)
	]

	[tool.ruff.lint.per-file-ignores]
	"tests/**" = ["B011"] # asserts in tests are fine
	"scripts/**" = ["T201"] # print() in CLI scripts is fine

	[tool.ruff.format]
	quote-style = "double"
	indent-style = "space"
	docstring-code-format = true

	# ---- Mypy: static type checker -----------------------------------------------
	# We only enforce types on our own code; third-party untyped libs are tolerated.
	[tool.mypy]
	python_version = "3.10"
	strict = false # Start lenient; tighten as types stabilise
	warn_unused_configs = true
	warn_redundant_casts = true
	warn_unused_ignores = true
	warn_no_return = true
	no_implicit_optional = true
	files = ["src/captioning", "backend/app", "scripts"]
	mypy_path = ["src", "backend"]
	explicit_package_bases = true
	namespace_packages = true

	[[tool.mypy.overrides]]
	module = [
	"tensorflow.*",
	"transformers.*",
	"huggingface_hub.*",
	"PIL.*",
	"nltk.*",
	"sacrebleu.*",
	"rouge_score.*",
	"pycocoevalcap.*",
	]
	ignore_missing_imports = true

	# ---- Pytest -------------------------------------------------------------------
	[tool.pytest.ini_options]
	minversion = "8.0"
	testpaths = ["tests", "backend/app/tests"]
	pythonpath = ["backend"] # Lets `from app.* import ...` resolve in tests
	addopts = [
	"-ra", # Show short summary for non-passing tests
	"--strict-markers",
	"--strict-config",
	"--showlocals",
	]
	markers = [
	"slow: tests that take >10 seconds (run with -m slow)",
	"gpu: tests requiring a GPU (skipped in CI by default)",
	]
	filterwarnings = [
	"ignore::DeprecationWarning:tensorflow.*",
	"ignore::FutureWarning:tensorflow.*",
	]

	# ---- Coverage -----------------------------------------------------------------
	[tool.coverage.run]
	branch = true
	source = ["src/captioning", "backend/app"]
	omit = ["/tests/", "*/__init__.py"]

	[tool.coverage.report]
	exclude_lines = [
	"pragma: no cover",
	"raise NotImplementedError",
	"if TYPE_CHECKING:",
	"if __name__ == .__main__.:",
	]
	show_missing = true
	skip_covered = false