File size: 3,060 Bytes
e28a7b2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
from __future__ import annotations

import argparse
import logging
import sys

from .config import settings
from .duplicate_detector import DuplicateDetector
from .merchant_alias import MerchantAliasResolver
from .repositories import (
    ExpenseRepository,
    MerchantAliasRepository,
    MergeSuggestionRepository,
    build_client,
)


def configure_logging(verbose: bool) -> None:
    level = logging.DEBUG if verbose else logging.INFO
    logging.basicConfig(
        level=level,
        format="%(asctime)s %(levelname)s %(message)s",
    )


def parse_args(argv: list[str] | None = None) -> argparse.Namespace:
    parser = argparse.ArgumentParser(
        description="Detect near-duplicate expenses and write merge suggestions.",
    )
    parser.add_argument(
        "--minutes",
        dest="minutes",
        type=int,
        default=settings.time_tolerance_minutes,
        help="Time tolerance in minutes for comparing expenses (default: %(default)s).",
    )
    parser.add_argument(
        "--amount-pct",
        dest="amount_pct",
        type=float,
        default=float(settings.amount_tolerance_pct),
        help="Amount tolerance percentage (default: %(default)s).",
    )
    parser.add_argument(
        "--lookback-hours",
        dest="lookback_hours",
        type=int,
        default=settings.default_lookback_hours,
        help="How far back to fetch expenses (default: %(default)s).",
    )
    parser.add_argument(
        "--limit",
        dest="limit",
        type=int,
        default=settings.max_batch_size,
        help="Maximum number of expenses to scan (default: %(default)s).",
    )
    parser.add_argument(
        "--verbose",
        action="store_true",
        help="Enable debug logging.",
    )
    return parser.parse_args(argv)


def main(argv: list[str] | None = None) -> int:
    args = parse_args(argv)
    configure_logging(args.verbose)

    client = build_client()
    alias_repo = MerchantAliasRepository.from_client(client)
    alias_resolver = MerchantAliasResolver()
    alias_resolver.load_from_cursor(alias_repo.fetch_all())

    expense_repo = ExpenseRepository.from_client(client)
    expenses = expense_repo.fetch_recent(args.lookback_hours, args.limit)

    if not expenses:
        logging.info("No expenses found for lookback window")
        return 0

    suggestion_repo = MergeSuggestionRepository.from_client(client)
    detector = DuplicateDetector(
        alias_resolver=alias_resolver,
        suggestions_repo=suggestion_repo,
        amount_tolerance_pct=args.amount_pct,
        time_tolerance_minutes=args.minutes,
    )

    clusters = detector.find_clusters(expenses)
    if not clusters:
        logging.info("No duplicate clusters detected")
        return 0

    suggestion_ids = detector.persist_suggestions(clusters)
    logging.info(
        "Finished writing %d suggestions. Example message: %s",
        len(suggestion_ids),
        "These seem similar. Would you like to merge them?",
    )
    return 0


if __name__ == "__main__":
    sys.exit(main())