| | """ |
| | FinEE Merchants - Tier 2 VPA-to-Merchant and Merchant-to-Category mapping. |
| | |
| | Rule-based mappings for enriching extracted entities with merchant names |
| | and transaction categories. |
| | """ |
| |
|
| | from typing import Optional, Dict, Tuple |
| | import re |
| |
|
| |
|
| | |
| | VPA_BANKS = { |
| | 'ybl': 'PhonePe', |
| | 'paytm': 'Paytm', |
| | 'okaxis': 'Google Pay', |
| | 'oksbi': 'Google Pay', |
| | 'okhdfcbank': 'Google Pay', |
| | 'axl': 'Google Pay', |
| | 'ibl': 'ICICI Bank', |
| | 'upi': 'Generic UPI', |
| | 'apl': 'Amazon Pay', |
| | 'fbl': 'Federal Bank', |
| | 'icici': 'ICICI Bank', |
| | 'hdfcbank': 'HDFC Bank', |
| | 'sbi': 'SBI', |
| | } |
| |
|
| |
|
| | |
| | KNOWN_MERCHANTS = { |
| | |
| | 'swiggy': ('Swiggy', 'food'), |
| | 'zomato': ('Zomato', 'food'), |
| | 'dominos': ("Domino's", 'food'), |
| | 'pizzahut': ('Pizza Hut', 'food'), |
| | 'mcdonalds': ("McDonald's", 'food'), |
| | 'burgerking': ('Burger King', 'food'), |
| | 'starbucks': ('Starbucks', 'food'), |
| | 'kfc': ('KFC', 'food'), |
| | 'subway': ('Subway', 'food'), |
| | 'dunkin': ('Dunkin', 'food'), |
| | 'blinkit': ('Blinkit', 'food'), |
| | 'zepto': ('Zepto', 'food'), |
| | 'bigbasket': ('BigBasket', 'food'), |
| | 'instamart': ('Swiggy Instamart', 'food'), |
| | |
| | |
| | 'amazon': ('Amazon', 'shopping'), |
| | 'flipkart': ('Flipkart', 'shopping'), |
| | 'myntra': ('Myntra', 'shopping'), |
| | 'ajio': ('Ajio', 'shopping'), |
| | 'nykaa': ('Nykaa', 'shopping'), |
| | 'meesho': ('Meesho', 'shopping'), |
| | 'snapdeal': ('Snapdeal', 'shopping'), |
| | 'tatacliq': ('Tata Cliq', 'shopping'), |
| | 'reliance': ('Reliance', 'shopping'), |
| | 'dmart': ('D-Mart', 'shopping'), |
| | 'croma': ('Croma', 'shopping'), |
| | 'vijaysales': ('Vijay Sales', 'shopping'), |
| | |
| | |
| | 'uber': ('Uber', 'transport'), |
| | 'ola': ('Ola', 'transport'), |
| | 'rapido': ('Rapido', 'transport'), |
| | 'irctc': ('IRCTC', 'transport'), |
| | 'redbus': ('redBus', 'transport'), |
| | 'makemytrip': ('MakeMyTrip', 'transport'), |
| | 'goibibo': ('Goibibo', 'transport'), |
| | 'yatra': ('Yatra', 'transport'), |
| | 'cleartrip': ('Cleartrip', 'transport'), |
| | 'easemytrip': ('EaseMyTrip', 'transport'), |
| | 'metro': ('Metro', 'transport'), |
| | 'fastag': ('FASTag', 'transport'), |
| | 'iocl': ('Indian Oil', 'transport'), |
| | 'bpcl': ('Bharat Petroleum', 'transport'), |
| | 'hpcl': ('HP Petrol', 'transport'), |
| | |
| | |
| | 'jio': ('Jio', 'utilities'), |
| | 'airtel': ('Airtel', 'utilities'), |
| | 'vi': ('Vi', 'utilities'), |
| | 'bsnl': ('BSNL', 'utilities'), |
| | 'tatapower': ('Tata Power', 'utilities'), |
| | 'adanigas': ('Adani Gas', 'utilities'), |
| | 'mahanagar': ('Mahanagar Gas', 'utilities'), |
| | 'bescom': ('BESCOM', 'utilities'), |
| | 'electricity': ('Electricity', 'utilities'), |
| | 'water': ('Water Bill', 'utilities'), |
| | 'gas': ('Gas Bill', 'utilities'), |
| | |
| | |
| | 'netflix': ('Netflix', 'entertainment'), |
| | 'prime': ('Amazon Prime', 'entertainment'), |
| | 'hotstar': ('Disney+ Hotstar', 'entertainment'), |
| | 'spotify': ('Spotify', 'entertainment'), |
| | 'bookmyshow': ('BookMyShow', 'entertainment'), |
| | 'pvr': ('PVR', 'entertainment'), |
| | 'inox': ('Inox', 'entertainment'), |
| | 'youtube': ('YouTube', 'entertainment'), |
| | 'zee5': ('Zee5', 'entertainment'), |
| | 'sonyliv': ('SonyLiv', 'entertainment'), |
| | 'jiocinema': ('JioCinema', 'entertainment'), |
| | |
| | |
| | 'apollo': ('Apollo', 'healthcare'), |
| | 'pharmeasy': ('PharmEasy', 'healthcare'), |
| | 'netmeds': ('Netmeds', 'healthcare'), |
| | '1mg': ('1mg', 'healthcare'), |
| | 'practo': ('Practo', 'healthcare'), |
| | 'medplus': ('MedPlus', 'healthcare'), |
| | |
| | |
| | 'byjus': ("Byju's", 'education'), |
| | 'unacademy': ('Unacademy', 'education'), |
| | 'upgrad': ('upGrad', 'education'), |
| | 'coursera': ('Coursera', 'education'), |
| | 'udemy': ('Udemy', 'education'), |
| | 'vedantu': ('Vedantu', 'education'), |
| | |
| | |
| | 'zerodha': ('Zerodha', 'investment'), |
| | 'groww': ('Groww', 'investment'), |
| | 'upstox': ('Upstox', 'investment'), |
| | 'paytmmoney': ('Paytm Money', 'investment'), |
| | 'kuvera': ('Kuvera', 'investment'), |
| | 'coin': ('Zerodha Coin', 'investment'), |
| | |
| | |
| | 'lic': ('LIC', 'investment'), |
| | 'policybazaar': ('PolicyBazaar', 'investment'), |
| | 'acko': ('Acko', 'investment'), |
| | 'digit': ('Digit Insurance', 'investment'), |
| | } |
| |
|
| |
|
| | |
| | CATEGORY_KEYWORDS = { |
| | 'food': ['food', 'restaurant', 'cafe', 'coffee', 'lunch', 'dinner', 'breakfast', |
| | 'snack', 'meal', 'pizza', 'burger', 'biryani', 'curry', 'thali'], |
| | 'shopping': ['shopping', 'purchase', 'order', 'buy', 'shop', 'store', 'mart', |
| | 'fashion', 'clothing', 'electronics', 'mobile', 'laptop'], |
| | 'transport': ['cab', 'taxi', 'ride', 'travel', 'flight', 'train', 'bus', |
| | 'petrol', 'diesel', 'fuel', 'toll', 'parking', 'metro'], |
| | 'utilities': ['recharge', 'bill', 'electricity', 'water', 'gas', 'internet', |
| | 'broadband', 'postpaid', 'prepaid', 'dth'], |
| | 'entertainment': ['movie', 'ticket', 'show', 'subscription', 'stream', |
| | 'music', 'game', 'concert', 'event'], |
| | 'transfer': ['transfer', 'sent', 'paid', 'payment'], |
| | 'salary': ['salary', 'wages', 'income', 'pay'], |
| | 'healthcare': ['hospital', 'clinic', 'medicine', 'pharmacy', 'doctor', |
| | 'health', 'medical', 'diagnostic'], |
| | 'education': ['school', 'college', 'university', 'course', 'tuition', |
| | 'fees', 'education', 'training'], |
| | } |
| |
|
| |
|
| | def extract_merchant_from_vpa(vpa: str) -> Optional[str]: |
| | """ |
| | Extract merchant name from UPI VPA. |
| | |
| | Args: |
| | vpa: UPI VPA (e.g., 'swiggy@ybl') |
| | |
| | Returns: |
| | Merchant name if found, None otherwise |
| | """ |
| | if not vpa: |
| | return None |
| | |
| | vpa_lower = vpa.lower().strip() |
| | |
| | |
| | username = vpa_lower.split('@')[0] if '@' in vpa_lower else vpa_lower |
| | |
| | |
| | if username in KNOWN_MERCHANTS: |
| | return KNOWN_MERCHANTS[username][0] |
| | |
| | |
| | for key, (merchant, _) in KNOWN_MERCHANTS.items(): |
| | if username.startswith(key) or key in username: |
| | return merchant |
| | |
| | return None |
| |
|
| |
|
| | def get_category_from_merchant(merchant: str) -> Optional[str]: |
| | """ |
| | Get category from merchant name. |
| | |
| | Args: |
| | merchant: Merchant name |
| | |
| | Returns: |
| | Category string if found, None otherwise |
| | """ |
| | if not merchant: |
| | return None |
| | |
| | merchant_lower = merchant.lower().strip() |
| | |
| | |
| | for key, (name, category) in KNOWN_MERCHANTS.items(): |
| | if key in merchant_lower or merchant_lower in name.lower(): |
| | return category |
| | |
| | return None |
| |
|
| |
|
| | def get_category_from_text(text: str) -> Optional[str]: |
| | """ |
| | Infer category from transaction text using keywords. |
| | |
| | Args: |
| | text: Transaction description |
| | |
| | Returns: |
| | Category string if found, None otherwise |
| | """ |
| | if not text: |
| | return None |
| | |
| | text_lower = text.lower() |
| | |
| | |
| | scores = {} |
| | for category, keywords in CATEGORY_KEYWORDS.items(): |
| | score = sum(1 for kw in keywords if kw in text_lower) |
| | if score > 0: |
| | scores[category] = score |
| | |
| | if scores: |
| | return max(scores, key=scores.get) |
| | |
| | return None |
| |
|
| |
|
| | def get_merchant_and_category(vpa: Optional[str] = None, |
| | text: Optional[str] = None) -> Tuple[Optional[str], Optional[str]]: |
| | """ |
| | Get merchant and category from VPA and/or text. |
| | |
| | Args: |
| | vpa: UPI VPA |
| | text: Transaction text |
| | |
| | Returns: |
| | Tuple of (merchant, category) |
| | """ |
| | merchant = None |
| | category = None |
| | |
| | |
| | if vpa: |
| | merchant = extract_merchant_from_vpa(vpa) |
| | if merchant: |
| | category = get_category_from_merchant(merchant) |
| | |
| | |
| | if not category and text: |
| | category = get_category_from_text(text) |
| | |
| | return merchant, category |
| |
|
| |
|
| | def get_bank_from_vpa(vpa: str) -> Optional[str]: |
| | """ |
| | Get bank/app name from VPA suffix. |
| | |
| | Args: |
| | vpa: UPI VPA |
| | |
| | Returns: |
| | Bank/app name if found, None otherwise |
| | """ |
| | if not vpa or '@' not in vpa: |
| | return None |
| | |
| | suffix = vpa.split('@')[1].lower() |
| | return VPA_BANKS.get(suffix) |
| |
|