File size: 4,561 Bytes
b2594db
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# =============================================================================
# .pre-commit-config.yaml — automated checks that run on `git commit`.
# -----------------------------------------------------------------------------
# Why pre-commit hooks?
#   They make broken commits *physically impossible* — failed checks abort the
#   commit. This catches lint/type/secret issues at the lowest-cost moment
#   (before they enter history) and is what serious teams expect.
#
# Setup (one-time, per developer):
#   pip install pre-commit
#   pre-commit install                # registers the hooks in .git/hooks/
#   pre-commit run --all-files        # run once over the whole repo
#
# After setup, hooks run automatically on every `git commit`. To bypass them
# in an emergency: `git commit --no-verify` (do not commit this habit).
# =============================================================================

# Run hooks against staged files only by default (faster). The CI workflow
# runs `pre-commit run --all-files` to catch anything missed locally.
default_install_hook_types: [pre-commit]
default_stages: [pre-commit]
fail_fast: false                              # Show ALL failures, not just first

repos:
  # ---------------------------------------------------------------------------
  # General hygiene: whitespace, line endings, accidentally-committed binaries.
  # ---------------------------------------------------------------------------
  - repo: https://github.com/pre-commit/pre-commit-hooks
    rev: v4.6.0
    hooks:
      - id: trailing-whitespace
      - id: end-of-file-fixer
      - id: mixed-line-ending
        args: [--fix=lf]                      # Force LF; CRLF is a Windows trap
      - id: check-yaml
        exclude: ^(\.github/workflows/.*\.yml)$  # Some YAML uses GHA syntax
      - id: check-toml
      - id: check-merge-conflict
      - id: check-added-large-files
        args: [--maxkb=5000]                  # Reject >5MB blobs (use HF Hub)
      - id: check-case-conflict
      - id: detect-private-key

  # ---------------------------------------------------------------------------
  # Ruff: Python lint + format. Replaces black + isort + flake8 with one tool.
  # Reads config from pyproject.toml so behaviour is identical here and in CI.
  # ---------------------------------------------------------------------------
  - repo: https://github.com/astral-sh/ruff-pre-commit
    rev: v0.5.0
    hooks:
      - id: ruff
        args: [--fix]                         # Auto-fix what's safely fixable
      - id: ruff-format

  # ---------------------------------------------------------------------------
  # mypy: static type checking. Limited to package code so notebooks/scripts
  # don't gate commits.
  # ---------------------------------------------------------------------------
  - repo: https://github.com/pre-commit/mirrors-mypy
    rev: v1.10.1
    hooks:
      - id: mypy
        files: ^(src/captioning|backend/app)/
        additional_dependencies:
          - pydantic>=2.7
          - pydantic-settings>=2.3
          - types-PyYAML
          - types-requests

  # ---------------------------------------------------------------------------
  # nbstripout: strips outputs from .ipynb files on commit.
  # Why: notebook outputs include large base64-encoded images and run state,
  # which makes diffs unreadable and can leak data. Outputs are a *render*
  # of the code, not source — they belong in CI artefacts, not Git history.
  # ---------------------------------------------------------------------------
  - repo: https://github.com/kynan/nbstripout
    rev: 0.7.1
    hooks:
      - id: nbstripout

  # ---------------------------------------------------------------------------
  # Prettier: format frontend (.ts, .tsx, .json, .md, .css). Limited to the
  # frontend/ subtree to avoid stepping on Markdown owned by docs writers.
  # ---------------------------------------------------------------------------
  - repo: https://github.com/pre-commit/mirrors-prettier
    rev: v4.0.0-alpha.8
    hooks:
      - id: prettier
        files: ^frontend/.*\.(ts|tsx|js|jsx|json|md|css)$

  # ---------------------------------------------------------------------------
  # gitleaks: scans for accidentally committed credentials (API keys, tokens,
  # private keys). Catches mistakes BEFORE they hit a public remote.
  # ---------------------------------------------------------------------------
  - repo: https://github.com/gitleaks/gitleaks
    rev: v8.18.4
    hooks:
      - id: gitleaks