""" Category classification utility with comprehensive keyword-based detection """ # --- EXPENSE CATEGORIES --- EXPENSE_KEYWORDS = { "Housing": [ 'rent', 'mortgage', 'lease', 'hoa', 'property tax', 'home maintenance', 'repair', 'pest control', 'apartment', 'realtor', 'landlord', 'security deposit' ], "Food & Dining": [ 'ramen', 'sushi', 'pizza', 'burger', 'sandwich', 'salad', 'coffee', 'tea', 'soda', 'drink', 'beverage', 'juice', 'restaurant', 'cafe', 'diner', 'bistro', 'eatery', 'bar', 'breakfast', 'lunch', 'dinner', 'brunch', 'meal', 'food', 'dish', 'cuisine', 'menu', 'takeout', 'mcdonald', 'kfc', 'subway', 'starbucks', 'domino', 'walmart grocery', 'instacart', 'doordash', 'ubereats', 'grubhub', 'grocery', 'supermarket', 'mart', 'store', 'whole foods', 'trader joe', 'costco food', 'canteen', 'dining hall', 'bakery', 'donut', 'patisserie', 'winery', 'brewery', 'pub', 'liquor', 'wine', 'beer', 'safeway', 'kroger', 'tesco', 'swiggy', 'zomato', 'deliveroo', 'spices', 'sweets', 'organic', 'vegetable', 'meat' ], "Transportation": [ 'uber', 'lyft', 'taxi', 'cab', 'ride', 'gas', 'fuel', 'petrol', 'diesel', 'gasoline', 'parking', 'toll', 'highway', 'bus', 'train', 'metro', 'subway', 'transit', 'car', 'vehicle', 'auto', 'automobile', 'rental', 'rent-a-car', 'hertz', 'avis', 'enterprise', 'oil change', 'tire', 'garage', 'charging', 'ev', 'tesla supercharger', 'parking meter', 'amtrak', 'commuter', 'fare', 'ferry', 'cycling', 'bike', 'scooter', 'fastag', 'puc', 'license', 'ola' ], "Shopping": [ 'amazon', 'ebay', 'walmart', 'target', 'costco', 'clothing', 'apparel', 'fashion', 'zara', 'h&m', 'electronics', 'laptop', 'phone', 'tablet', 'computer', 'apple', 'best buy', 'furniture', 'home decor', 'ikea', 'homedepot', 'lowe', 'book', 'toy', 'hobby lobby', 'michaels', 'gift', 'mall', 'outlet', 'department store', 'boutique', 'stationary', 'hardware', 'jewelry', 'shoes', 'sneakers', 'nike', 'adidas', 'macys', 'nordstrom', 'shein', 'flipkart', 'myntra', 'ajio', 'meesho', 'nykaa', 'optical', 'watch', 'accessories' ], "Healthcare": [ 'doctor', 'hospital', 'clinic', 'medical', 'health', 'pharmacy', 'medicine', 'drug', 'prescription', 'cvs', 'walgreens', 'dental', 'dentist', 'teeth', 'orthodont', 'vision', 'eye', 'glasses', 'contact', 'therapy', 'counseling', 'psycholog', 'physio', 'chiropract', 'dermatolog', 'copay', 'deductible', 'medical bill', 'vaccine', 'checkup', 'test', 'lab', 'blood work' ], "Insurance": [ 'insurance', 'premium', 'policy', 'state farm', 'geico', 'progressive', 'allstate', 'liberty mutual', 'auto insurance', 'life insurance', 'home insurance', 'renters insurance', 'health insurance', 'lic', 'hdfc ergo', 'tata aig' ], "Entertainment": [ 'movie', 'cinema', 'theater', 'film', 'concert', 'show', 'event', 'ticketmaster', 'game', 'gaming', 'xbox', 'playstation', 'nintendo', 'steam', 'gym', 'fitness', 'yoga', 'sport', 'recreation', 'park', 'museum', 'zoo', 'aquarium', 'gallery', 'hobby', 'club', 'membership', 'nightlife', 'festival' ], "Subscriptions": [ 'subscription', 'recurring', 'netflix', 'hbo', 'max', 'spotify', 'hulu', 'disney', 'paramount', 'youtube premium', 'patreon', 'onlyfans', 'medium', 'substack', 'nyt', 'wsj', 'icloud', 'google one', 'dropbox', 'microsoft 365', 'adobe', 'canva', 'audible', 'itunes' ], "Bills & Utilities": [ 'electric', 'power', 'water', 'sewer', 'trash', 'garbage', 'recycling', 'gas bill', 'heating', 'cooling', 'hvac', 'internet', 'wifi', 'broadband', 'cable', 'comcast', 'xfinity', 'phone bill', 'mobile', 'cellular', 'verizon', 'at&t', 't-mobile', 'recharge', 'prepaid', 'postpaid', 'topup', 'maintenance', 'landline', 'utility', 'energy' ], "Education": [ 'tuition', 'school', 'college', 'university', 'course', 'class', 'training', 'workshop', 'udemy', 'coursera', 'edx', 'skillshare', 'tutor', 'seminar', 'textbook', 'learning', 'study', 'student' ], "Personal Care": [ 'salon', 'haircut', 'barber', 'spa', 'massage', 'wellness', 'cosmetic', 'makeup', 'beauty', 'skincare', 'sephora', 'ulta', 'shampoo', 'toiletries', 'grooming', 'hygiene', 'barbershop' ], "Travel": [ 'hotel', 'motel', 'airbnb', 'booking', 'accommodation', 'lodging', 'vacation', 'trip', 'tour', 'travel', 'holiday', 'expedia', 'kayak', 'resort', 'cruise', 'excursion', 'sightseeing', 'flight', 'airline', 'airport', 'ticket', 'luggage', 'suitcase', 'backpack', 'travel insurance' ], "Pets": [ 'pet', 'dog', 'cat', 'vet', 'veterinary', 'chewy', 'pet food', 'pet store', 'grooming', 'petco', 'petsmart', 'animal', 'puppy', 'kitten', 'boarding', 'daycare' ], "Family & Kids": [ 'childcare', 'daycare', 'babysitter', 'nanny', 'diapers', 'baby food', 'toys', 'child support', 'allowance', 'school supplies', 'tutoring', 'camp', 'extracurricular' ], "Gifts & Donations": [ 'gift', 'present', 'donation', 'charity', 'goodwill', 'contribution', 'fundraiser', 'nonprofit', 'gofundme', 'birthday', 'wedding', 'anniversary', 'floral', 'flowers' ], "Taxes": [ 'tax', 'irs', 'state tax', 'federal tax', 'income tax', 'estimated tax', 'tax return payment', 'turbo tax', 'hr block', 'h&r block' ], "Professional Services": [ 'legal', 'lawyer', 'attorney', 'accountant', 'tax prep', 'consultant', 'advisor', 'notary', 'architect', 'engineer', 'design fee', 'fiverr', 'upwork', 'freelancer' ], "Investments & Savings": [ 'contribution', 'vanguard', 'fidelity', 'charles schwab', 'robinhood', '401k', '401-k', 'ira', 'roth', 'stock purchase', 'etf purchase', 'crypto purchase', 'sip', 'mutual fund', 'zerodha', 'upstox', 'groww' ], "Other Expenses": [ 'other', 'misc', 'miscellaneous', 'general', 'extra' ] } # --- INCOME CATEGORIES --- INCOME_KEYWORDS = { "Salary": [ 'salary', 'wage', 'payroll', 'paycheck', 'direct deposit', 'monthly pay', 'biweekly', 'weekly pay', 'stipend', 'employment', 'employer', 'company pay' ], "Freelance & Gigs": [ 'freelance', 'contract', 'gig', 'project', 'consulting', 'consultant', 'client work', 'upwork', 'fiverr', 'freelancer', 'toptal', 'taskrabbit', 'uber income', 'lyft income', 'doordash income' ], "Business Revenue": [ 'business', 'revenue', 'sales', 'profit', 'commission', 'merchant', 'customer payment', 'invoice', 'client payment', 'stripe', 'paypal', 'square', 'shopify', 'payout', 'e-commerce' ], "Investments & Dividends": [ 'dividend', 'interest', 'capital gains', 'payout', 'return on investment', 'portfolio', 'profit sharing', 'yield', 'invest', 'stock return', 'stock', 'share', 'nasdaq', 'nyse', 'trading', 'brokerage', 'equity', 'robinhood', 'vanguard', 'fidelity', 'charles schwab', 'etrade', 'ameritrade' ], "Rental Income": [ 'rent', 'rental income', 'tenant', 'lease', 'property', 'airbnb income', 'sublease', 'real estate' ], "Refunds & Cashbacks": [ 'refund', 'reimbursement', 'cashback', 'rebate', 'rakuten', 'return', 'credit', 'compensation', 'claim', 'honey', 'retailmenot', 'venmo refund' ], "Gifts & Bonuses": [ 'gift', 'present', 'bonus', 'prize', 'award', 'lottery', 'inheritance', 'donation received', 'birthday gift', 'monetary gift', 'sign-on bonus', 'performance bonus' ], "Crypto Bit Coins": [ 'crypto', 'bitcoin', 'btc', 'eth', 'ethereum', 'binance', 'coinbase', 'wallet', 'blockchain', 'ledger', 'kraken', 'doge', 'solana', 'nft', 'tether', 'usdt' ], "Grants & Scholarships": [ 'grant', 'scholarship', 'financial aid', 'fellowship', 'bursary', 'stipend', 'education award', 'research grant', 'academic award' ], "Pensions & Social Security": [ 'pension', 'social security', 'ssi', 'retirement income', '401k withdrawal', 'ira withdrawal', 'annuity' ], "Other Income": [ 'miscellaneous', 'other', 'extra', 'side hustle', 'passive income', 'royalty', 'allowance', 'government', 'tax refund', 'stimulus' ] } def classify_transaction(title, transaction_type='Expense'): """ Classify transaction category based on title keywords """ if not title: return {'category': 'Other', 'type': transaction_type} # --- STRATEGY 1: Cloud AI (Gemini Fallback) --- try: from finance.ai_helper import classify_text_with_llm ai_result = classify_text_with_llm(title) if ai_result and ai_result.get('category'): ai_result["source"] = ai_result.get("source", "ai_cloud") return ai_result except Exception as e: print(f"AI Classification failed: {e}. Falling back to Local Rules.") return _classify_locally(title, transaction_type) def _classify_locally(title, transaction_type='Expense'): """ Internal helper for keyword-based classification without AI calls. """ if not title: return {'category': 'Other', 'type': transaction_type} title_lower = title.lower() t_type_norm = "Income" if str(transaction_type).lower() == "income" else "Expense" # Check keywords based on normalized type if t_type_norm == 'Income': for category, keywords in INCOME_KEYWORDS.items(): for kw in keywords: if kw.lower() in title_lower: return {'category': category, 'type': 'Income'} else: for category, keywords in EXPENSE_KEYWORDS.items(): for kw in keywords: if kw.lower() in title_lower: return {'category': category, 'type': 'Expense'} # --- FINAL DEFAULT --- if t_type_norm == 'Income': return {'category': 'Other Income', 'type': 'Income'} return {'category': 'Uncategorized', 'type': 'Expense'} def batch_classify_transactions(transactions): """ Classify a list of transactions efficiently using a mix of local rules and batch AI. """ if not transactions: return [] results = [None] * len(transactions) to_ai_indices = [] to_ai_titles = [] to_ai_types = [] for idx, tx in enumerate(transactions): title = tx.get('title', '') t_type = tx.get('type', 'Expense') if t_type.lower() not in ['expense', 'income']: t_type = 'Unknown' # 1. Try Local Rules ONLY first local_result = _classify_locally(title, t_type) if local_result['category'] != 'Uncategorized' and t_type != 'Unknown': results[idx] = local_result else: to_ai_indices.append(idx) to_ai_titles.append(title) to_ai_types.append(t_type) # 2. Batch process the remaining with AI if to_ai_indices: try: from finance.ai_helper import batch_classify_transactions_with_llm ai_results = batch_classify_transactions_with_llm(to_ai_titles, to_ai_types) for i, res_obj in enumerate(ai_results): if i < len(to_ai_indices): idx = to_ai_indices[i] results[idx] = { 'category': res_obj.get('category', 'Uncategorized'), 'type': res_obj.get('type', to_ai_types[i] if to_ai_types[i] != 'Unknown' else 'Expense') } except Exception as e: print(f"Batch AI classification failed: {e}") for idx in to_ai_indices: if results[idx] is None: results[idx] = {'category': 'Uncategorized', 'type': transactions[idx].get('type', 'Expense')} # Final pass: fill any remains for i in range(len(results)): if results[i] is None: results[i] = {'category': 'Uncategorized', 'type': transactions[i].get('type', 'Expense')} return results