import re CATEGORY_RULES = [ { "key": "income", "name": "Income", "patterns": [ "salary", "payroll", "paycheck", "credited", "deposit", "interest", "refund", ], "positive_only": True, }, { "key": "dining", "name": "Food & Dining", "patterns": [ "zomato", "swiggy", "mcdonald", "domino", "pizza", "restaurant", "cafe", "burger", "kfc", "subway", "dining", ], }, { "key": "coffee", "name": "Coffee & Cafes", "patterns": ["starbucks", "coffee", "ccd", "barista"], }, { "key": "groceries", "name": "Groceries", "patterns": [ "grocery", "groceries", "dmart", "bigbasket", "blinkit", "zepto", "walmart", "target", "whole foods", "supermarket", ], }, { "key": "transportation", "name": "Transportation", "patterns": [ "uber", "ola", "rapido", "metro", "fuel", "petrol", "gas", "shell", "parking", "toll", ], }, { "key": "entertainment", "name": "Entertainment", "patterns": [ "netflix", "spotify", "prime", "hotstar", "bookmyshow", "cinema", "movie", "youtube", ], }, { "key": "shopping", "name": "Shopping", "patterns": [ "amazon", "flipkart", "myntra", "ajio", "nykaa", "shopping", "store", "mall", ], }, { "key": "healthcare", "name": "Healthcare", "patterns": [ "pharmacy", "medical", "hospital", "clinic", "doctor", "apollo", "1mg", "pharmeasy", ], }, { "key": "education", "name": "Education", "patterns": [ "school", "college", "tuition", "course", "udemy", "coursera", "book", "exam", ], }, { "key": "housing", "name": "Housing", "patterns": [ "rent", "maintenance", "electricity", "water bill", "gas bill", "mortgage", "society", ], }, ] FALLBACK_CATEGORY = {"key": "miscellaneous", "name": "Miscellaneous"} def normalize_text(*values): text = " ".join(str(value or "") for value in values).lower() return re.sub(r"\s+", " ", text).strip() def categorize_transaction(merchant, note="", amount=None): text = normalize_text(merchant, note) numeric_amount = None try: numeric_amount = float(amount) if amount is not None else None except (TypeError, ValueError): numeric_amount = None for rule in CATEGORY_RULES: if rule.get("positive_only") and numeric_amount is not None and numeric_amount <= 0: continue if any(pattern in text for pattern in rule["patterns"]): return { "category": rule["name"], "category_key": rule["key"], "confidence": "high", "matched_rule": rule["key"], } if numeric_amount is not None and numeric_amount > 0: return { "category": "Income", "category_key": "income", "confidence": "medium", "matched_rule": "positive_amount", } return { "category": FALLBACK_CATEGORY["name"], "category_key": FALLBACK_CATEGORY["key"], "confidence": "low", "matched_rule": None, } def should_auto_categorize(category=None, category_key=None): generic_values = {"", None, "other", "misc", "miscellaneous", "uncategorized"} normalized_category = category.lower() if isinstance(category, str) else category normalized_key = category_key.lower() if isinstance(category_key, str) else category_key return normalized_key in generic_values or normalized_category in generic_values