image-captioning-api / .pre-commit-config.yaml
apoorvrajdev's picture
feat: bootstrap production-grade ML repository tooling
b2594db
# =============================================================================
# .pre-commit-config.yaml — automated checks that run on `git commit`.
# -----------------------------------------------------------------------------
# Why pre-commit hooks?
# They make broken commits *physically impossible* — failed checks abort the
# commit. This catches lint/type/secret issues at the lowest-cost moment
# (before they enter history) and is what serious teams expect.
#
# Setup (one-time, per developer):
# pip install pre-commit
# pre-commit install # registers the hooks in .git/hooks/
# pre-commit run --all-files # run once over the whole repo
#
# After setup, hooks run automatically on every `git commit`. To bypass them
# in an emergency: `git commit --no-verify` (do not commit this habit).
# =============================================================================
# Run hooks against staged files only by default (faster). The CI workflow
# runs `pre-commit run --all-files` to catch anything missed locally.
default_install_hook_types: [pre-commit]
default_stages: [pre-commit]
fail_fast: false # Show ALL failures, not just first
repos:
# ---------------------------------------------------------------------------
# General hygiene: whitespace, line endings, accidentally-committed binaries.
# ---------------------------------------------------------------------------
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.6.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: mixed-line-ending
args: [--fix=lf] # Force LF; CRLF is a Windows trap
- id: check-yaml
exclude: ^(\.github/workflows/.*\.yml)$ # Some YAML uses GHA syntax
- id: check-toml
- id: check-merge-conflict
- id: check-added-large-files
args: [--maxkb=5000] # Reject >5MB blobs (use HF Hub)
- id: check-case-conflict
- id: detect-private-key
# ---------------------------------------------------------------------------
# Ruff: Python lint + format. Replaces black + isort + flake8 with one tool.
# Reads config from pyproject.toml so behaviour is identical here and in CI.
# ---------------------------------------------------------------------------
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.5.0
hooks:
- id: ruff
args: [--fix] # Auto-fix what's safely fixable
- id: ruff-format
# ---------------------------------------------------------------------------
# mypy: static type checking. Limited to package code so notebooks/scripts
# don't gate commits.
# ---------------------------------------------------------------------------
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.10.1
hooks:
- id: mypy
files: ^(src/captioning|backend/app)/
additional_dependencies:
- pydantic>=2.7
- pydantic-settings>=2.3
- types-PyYAML
- types-requests
# ---------------------------------------------------------------------------
# nbstripout: strips outputs from .ipynb files on commit.
# Why: notebook outputs include large base64-encoded images and run state,
# which makes diffs unreadable and can leak data. Outputs are a *render*
# of the code, not source — they belong in CI artefacts, not Git history.
# ---------------------------------------------------------------------------
- repo: https://github.com/kynan/nbstripout
rev: 0.7.1
hooks:
- id: nbstripout
# ---------------------------------------------------------------------------
# Prettier: format frontend (.ts, .tsx, .json, .md, .css). Limited to the
# frontend/ subtree to avoid stepping on Markdown owned by docs writers.
# ---------------------------------------------------------------------------
- repo: https://github.com/pre-commit/mirrors-prettier
rev: v4.0.0-alpha.8
hooks:
- id: prettier
files: ^frontend/.*\.(ts|tsx|js|jsx|json|md|css)$
# ---------------------------------------------------------------------------
# gitleaks: scans for accidentally committed credentials (API keys, tokens,
# private keys). Catches mistakes BEFORE they hit a public remote.
# ---------------------------------------------------------------------------
- repo: https://github.com/gitleaks/gitleaks
rev: v8.18.4
hooks:
- id: gitleaks