| """ |
| Category classification utility with comprehensive keyword-based detection |
| """ |
|
|
| |
| EXPENSE_KEYWORDS = { |
| "Housing": [ |
| 'rent', 'mortgage', 'lease', 'hoa', 'property tax', 'home maintenance', |
| 'repair', 'pest control', 'apartment', 'realtor', 'landlord', 'security deposit' |
| ], |
| "Food & Dining": [ |
| 'ramen', 'sushi', 'pizza', 'burger', 'sandwich', 'salad', |
| 'coffee', 'tea', 'soda', 'drink', 'beverage', 'juice', |
| 'restaurant', 'cafe', 'diner', 'bistro', 'eatery', 'bar', |
| 'breakfast', 'lunch', 'dinner', 'brunch', 'meal', |
| 'food', 'dish', 'cuisine', 'menu', 'takeout', 'mcdonald', 'kfc', |
| 'subway', 'starbucks', 'domino', 'walmart grocery', 'instacart', 'doordash', 'ubereats', |
| 'grubhub', 'grocery', 'supermarket', 'mart', 'store', 'whole foods', |
| 'trader joe', 'costco food', 'canteen', 'dining hall', 'bakery', 'donut', 'patisserie', |
| 'winery', 'brewery', 'pub', 'liquor', 'wine', 'beer', 'safeway', 'kroger', 'tesco', |
| 'swiggy', 'zomato', 'deliveroo', 'spices', 'sweets', 'organic', 'vegetable', 'meat' |
| ], |
| "Transportation": [ |
| 'uber', 'lyft', 'taxi', 'cab', 'ride', 'gas', 'fuel', 'petrol', 'diesel', |
| 'gasoline', 'parking', 'toll', 'highway', 'bus', 'train', 'metro', 'subway', |
| 'transit', 'car', 'vehicle', 'auto', 'automobile', 'rental', 'rent-a-car', |
| 'hertz', 'avis', 'enterprise', 'oil change', 'tire', 'garage', |
| 'charging', 'ev', 'tesla supercharger', 'parking meter', 'amtrak', 'commuter', |
| 'fare', 'ferry', 'cycling', 'bike', 'scooter', 'fastag', 'puc', 'license', 'ola' |
| ], |
| "Shopping": [ |
| 'amazon', 'ebay', 'walmart', 'target', 'costco', 'clothing', 'apparel', |
| 'fashion', 'zara', 'h&m', 'electronics', 'laptop', 'phone', 'tablet', |
| 'computer', 'apple', 'best buy', 'furniture', 'home decor', 'ikea', |
| 'homedepot', 'lowe', 'book', 'toy', 'hobby lobby', 'michaels', 'gift', |
| 'mall', 'outlet', 'department store', 'boutique', 'stationary', 'hardware', |
| 'jewelry', 'shoes', 'sneakers', 'nike', 'adidas', 'macys', 'nordstrom', 'shein', |
| 'flipkart', 'myntra', 'ajio', 'meesho', 'nykaa', 'optical', 'watch', 'accessories' |
| ], |
| "Healthcare": [ |
| 'doctor', 'hospital', 'clinic', 'medical', 'health', 'pharmacy', 'medicine', |
| 'drug', 'prescription', 'cvs', 'walgreens', 'dental', 'dentist', 'teeth', |
| 'orthodont', 'vision', 'eye', 'glasses', 'contact', 'therapy', 'counseling', |
| 'psycholog', 'physio', 'chiropract', 'dermatolog', 'copay', 'deductible', |
| 'medical bill', 'vaccine', 'checkup', 'test', 'lab', 'blood work' |
| ], |
| "Insurance": [ |
| 'insurance', 'premium', 'policy', 'state farm', 'geico', 'progressive', |
| 'allstate', 'liberty mutual', 'auto insurance', 'life insurance', |
| 'home insurance', 'renters insurance', 'health insurance', 'lic', 'hdfc ergo', 'tata aig' |
| ], |
| "Entertainment": [ |
| 'movie', 'cinema', 'theater', 'film', 'concert', 'show', 'event', |
| 'ticketmaster', 'game', 'gaming', 'xbox', 'playstation', 'nintendo', 'steam', |
| 'gym', 'fitness', 'yoga', 'sport', 'recreation', 'park', 'museum', 'zoo', |
| 'aquarium', 'gallery', 'hobby', 'club', 'membership', 'nightlife', 'festival' |
| ], |
| "Subscriptions": [ |
| 'subscription', 'recurring', 'netflix', 'hbo', 'max', 'spotify', 'hulu', |
| 'disney', 'paramount', 'youtube premium', 'patreon', 'onlyfans', 'medium', |
| 'substack', 'nyt', 'wsj', 'icloud', 'google one', 'dropbox', 'microsoft 365', |
| 'adobe', 'canva', 'audible', 'itunes' |
| ], |
| "Bills & Utilities": [ |
| 'electric', 'power', 'water', 'sewer', 'trash', 'garbage', 'recycling', |
| 'gas bill', 'heating', 'cooling', 'hvac', 'internet', 'wifi', 'broadband', |
| 'cable', 'comcast', 'xfinity', 'phone bill', 'mobile', 'cellular', |
| 'verizon', 'at&t', 't-mobile', 'recharge', 'prepaid', 'postpaid', 'topup', |
| 'maintenance', 'landline', 'utility', 'energy' |
| ], |
| "Education": [ |
| 'tuition', 'school', 'college', 'university', 'course', 'class', 'training', |
| 'workshop', 'udemy', 'coursera', 'edx', 'skillshare', 'tutor', 'seminar', |
| 'textbook', 'learning', 'study', 'student' |
| ], |
| "Personal Care": [ |
| 'salon', 'haircut', 'barber', 'spa', 'massage', 'wellness', 'cosmetic', |
| 'makeup', 'beauty', 'skincare', 'sephora', 'ulta', 'shampoo', 'toiletries', |
| 'grooming', 'hygiene', 'barbershop' |
| ], |
| "Travel": [ |
| 'hotel', 'motel', 'airbnb', 'booking', 'accommodation', 'lodging', 'vacation', |
| 'trip', 'tour', 'travel', 'holiday', 'expedia', 'kayak', 'resort', 'cruise', |
| 'excursion', 'sightseeing', 'flight', 'airline', 'airport', 'ticket', |
| 'luggage', 'suitcase', 'backpack', 'travel insurance' |
| ], |
| "Pets": [ |
| 'pet', 'dog', 'cat', 'vet', 'veterinary', 'chewy', 'pet food', 'pet store', |
| 'grooming', 'petco', 'petsmart', 'animal', 'puppy', 'kitten', 'boarding', 'daycare' |
| ], |
| "Family & Kids": [ |
| 'childcare', 'daycare', 'babysitter', 'nanny', 'diapers', 'baby food', 'toys', |
| 'child support', 'allowance', 'school supplies', 'tutoring', 'camp', 'extracurricular' |
| ], |
| "Gifts & Donations": [ |
| 'gift', 'present', 'donation', 'charity', 'goodwill', 'contribution', |
| 'fundraiser', 'nonprofit', 'gofundme', 'birthday', 'wedding', 'anniversary', |
| 'floral', 'flowers' |
| ], |
| "Taxes": [ |
| 'tax', 'irs', 'state tax', 'federal tax', 'income tax', 'estimated tax', |
| 'tax return payment', 'turbo tax', 'hr block', 'h&r block' |
| ], |
| "Professional Services": [ |
| 'legal', 'lawyer', 'attorney', 'accountant', 'tax prep', 'consultant', |
| 'advisor', 'notary', 'architect', 'engineer', 'design fee', 'fiverr', 'upwork', 'freelancer' |
| ], |
| "Investments & Savings": [ |
| 'contribution', 'vanguard', 'fidelity', 'charles schwab', |
| 'robinhood', '401k', '401-k', 'ira', 'roth', 'stock purchase', 'etf purchase', |
| 'crypto purchase', 'sip', 'mutual fund', 'zerodha', 'upstox', 'groww' |
| ], |
| "Other Expenses": [ |
| 'other', 'misc', 'miscellaneous', 'general', 'extra' |
| ] |
| } |
|
|
| |
| INCOME_KEYWORDS = { |
| "Salary": [ |
| 'salary', 'wage', 'payroll', 'paycheck', 'direct deposit', |
| 'monthly pay', 'biweekly', 'weekly pay', 'stipend', |
| 'employment', 'employer', 'company pay' |
| ], |
| "Freelance & Gigs": [ |
| 'freelance', 'contract', 'gig', 'project', 'consulting', 'consultant', |
| 'client work', 'upwork', 'fiverr', 'freelancer', 'toptal', 'taskrabbit', |
| 'uber income', 'lyft income', 'doordash income' |
| ], |
| "Business Revenue": [ |
| 'business', 'revenue', 'sales', 'profit', 'commission', 'merchant', |
| 'customer payment', 'invoice', 'client payment', 'stripe', 'paypal', |
| 'square', 'shopify', 'payout', 'e-commerce' |
| ], |
| "Investments & Dividends": [ |
| 'dividend', 'interest', 'capital gains', 'payout', 'return on investment', |
| 'portfolio', 'profit sharing', 'yield', 'invest', 'stock return', |
| 'stock', 'share', 'nasdaq', 'nyse', 'trading', 'brokerage', 'equity', |
| 'robinhood', 'vanguard', 'fidelity', 'charles schwab', 'etrade', 'ameritrade' |
| ], |
| "Rental Income": [ |
| 'rent', 'rental income', 'tenant', 'lease', 'property', 'airbnb income', |
| 'sublease', 'real estate' |
| ], |
| "Refunds & Cashbacks": [ |
| 'refund', 'reimbursement', 'cashback', 'rebate', 'rakuten', 'return', |
| 'credit', 'compensation', 'claim', 'honey', 'retailmenot', 'venmo refund' |
| ], |
| "Gifts & Bonuses": [ |
| 'gift', 'present', 'bonus', 'prize', 'award', 'lottery', 'inheritance', |
| 'donation received', 'birthday gift', 'monetary gift', 'sign-on bonus', |
| 'performance bonus' |
| ], |
| "Crypto Bit Coins": [ |
| 'crypto', 'bitcoin', 'btc', 'eth', 'ethereum', 'binance', 'coinbase', |
| 'wallet', 'blockchain', 'ledger', 'kraken', 'doge', 'solana', 'nft', 'tether', 'usdt' |
| ], |
| "Grants & Scholarships": [ |
| 'grant', 'scholarship', 'financial aid', 'fellowship', 'bursary', |
| 'stipend', 'education award', 'research grant', 'academic award' |
| ], |
| "Pensions & Social Security": [ |
| 'pension', 'social security', 'ssi', 'retirement income', |
| '401k withdrawal', 'ira withdrawal', 'annuity' |
| ], |
| "Other Income": [ |
| 'miscellaneous', 'other', 'extra', 'side hustle', 'passive income', |
| 'royalty', 'allowance', 'government', 'tax refund', 'stimulus' |
| ] |
| } |
|
|
| def classify_transaction(title, transaction_type='Expense'): |
| """ |
| Classify transaction category based on title keywords |
| """ |
| if not title: |
| return {'category': 'Other', 'type': transaction_type} |
|
|
| |
| try: |
| from finance.ai_helper import classify_text_with_llm |
| ai_result = classify_text_with_llm(title) |
| if ai_result and ai_result.get('category'): |
| ai_result["source"] = ai_result.get("source", "ai_cloud") |
| return ai_result |
| except Exception as e: |
| print(f"AI Classification failed: {e}. Falling back to Local Rules.") |
| |
| return _classify_locally(title, transaction_type) |
|
|
| def _classify_locally(title, transaction_type='Expense'): |
| """ |
| Internal helper for keyword-based classification without AI calls. |
| """ |
| if not title: |
| return {'category': 'Other', 'type': transaction_type} |
|
|
| title_lower = title.lower() |
| t_type_norm = "Income" if str(transaction_type).lower() == "income" else "Expense" |
| |
| |
| if t_type_norm == 'Income': |
| for category, keywords in INCOME_KEYWORDS.items(): |
| for kw in keywords: |
| if kw.lower() in title_lower: |
| return {'category': category, 'type': 'Income'} |
| else: |
| for category, keywords in EXPENSE_KEYWORDS.items(): |
| for kw in keywords: |
| if kw.lower() in title_lower: |
| return {'category': category, 'type': 'Expense'} |
| |
| |
| if t_type_norm == 'Income': |
| return {'category': 'Other Income', 'type': 'Income'} |
| return {'category': 'Uncategorized', 'type': 'Expense'} |
|
|
| def batch_classify_transactions(transactions): |
| """ |
| Classify a list of transactions efficiently using a mix of local rules and batch AI. |
| """ |
| if not transactions: |
| return [] |
|
|
| results = [None] * len(transactions) |
| to_ai_indices = [] |
| to_ai_titles = [] |
| to_ai_types = [] |
|
|
| for idx, tx in enumerate(transactions): |
| title = tx.get('title', '') |
| t_type = tx.get('type', 'Expense') |
| if t_type.lower() not in ['expense', 'income']: |
| t_type = 'Unknown' |
|
|
| |
| local_result = _classify_locally(title, t_type) |
| if local_result['category'] != 'Uncategorized' and t_type != 'Unknown': |
| results[idx] = local_result |
| else: |
| to_ai_indices.append(idx) |
| to_ai_titles.append(title) |
| to_ai_types.append(t_type) |
|
|
| |
| if to_ai_indices: |
| try: |
| from finance.ai_helper import batch_classify_transactions_with_llm |
| ai_results = batch_classify_transactions_with_llm(to_ai_titles, to_ai_types) |
| |
| for i, res_obj in enumerate(ai_results): |
| if i < len(to_ai_indices): |
| idx = to_ai_indices[i] |
| results[idx] = { |
| 'category': res_obj.get('category', 'Uncategorized'), |
| 'type': res_obj.get('type', to_ai_types[i] if to_ai_types[i] != 'Unknown' else 'Expense') |
| } |
| except Exception as e: |
| print(f"Batch AI classification failed: {e}") |
| for idx in to_ai_indices: |
| if results[idx] is None: |
| results[idx] = {'category': 'Uncategorized', 'type': transactions[idx].get('type', 'Expense')} |
|
|
| |
| for i in range(len(results)): |
| if results[i] is None: |
| results[i] = {'category': 'Uncategorized', 'type': transactions[i].get('type', 'Expense')} |
|
|
| return results |
|
|