LogicGoInfotechSpaces's picture
Initial duplicate detector
e28a7b2
from __future__ import annotations
import argparse
import logging
import sys
from .config import settings
from .duplicate_detector import DuplicateDetector
from .merchant_alias import MerchantAliasResolver
from .repositories import (
ExpenseRepository,
MerchantAliasRepository,
MergeSuggestionRepository,
build_client,
)
def configure_logging(verbose: bool) -> None:
level = logging.DEBUG if verbose else logging.INFO
logging.basicConfig(
level=level,
format="%(asctime)s %(levelname)s %(message)s",
)
def parse_args(argv: list[str] | None = None) -> argparse.Namespace:
parser = argparse.ArgumentParser(
description="Detect near-duplicate expenses and write merge suggestions.",
)
parser.add_argument(
"--minutes",
dest="minutes",
type=int,
default=settings.time_tolerance_minutes,
help="Time tolerance in minutes for comparing expenses (default: %(default)s).",
)
parser.add_argument(
"--amount-pct",
dest="amount_pct",
type=float,
default=float(settings.amount_tolerance_pct),
help="Amount tolerance percentage (default: %(default)s).",
)
parser.add_argument(
"--lookback-hours",
dest="lookback_hours",
type=int,
default=settings.default_lookback_hours,
help="How far back to fetch expenses (default: %(default)s).",
)
parser.add_argument(
"--limit",
dest="limit",
type=int,
default=settings.max_batch_size,
help="Maximum number of expenses to scan (default: %(default)s).",
)
parser.add_argument(
"--verbose",
action="store_true",
help="Enable debug logging.",
)
return parser.parse_args(argv)
def main(argv: list[str] | None = None) -> int:
args = parse_args(argv)
configure_logging(args.verbose)
client = build_client()
alias_repo = MerchantAliasRepository.from_client(client)
alias_resolver = MerchantAliasResolver()
alias_resolver.load_from_cursor(alias_repo.fetch_all())
expense_repo = ExpenseRepository.from_client(client)
expenses = expense_repo.fetch_recent(args.lookback_hours, args.limit)
if not expenses:
logging.info("No expenses found for lookback window")
return 0
suggestion_repo = MergeSuggestionRepository.from_client(client)
detector = DuplicateDetector(
alias_resolver=alias_resolver,
suggestions_repo=suggestion_repo,
amount_tolerance_pct=args.amount_pct,
time_tolerance_minutes=args.minutes,
)
clusters = detector.find_clusters(expenses)
if not clusters:
logging.info("No duplicate clusters detected")
return 0
suggestion_ids = detector.persist_suggestions(clusters)
logging.info(
"Finished writing %d suggestions. Example message: %s",
len(suggestion_ids),
"These seem similar. Would you like to merge them?",
)
return 0
if __name__ == "__main__":
sys.exit(main())