# ========================================== # Prerequisites to run this code: # Open your terminal or command prompt and run: # pip install transformers torch # ========================================== import warnings warnings.filterwarnings("ignore") # Suppresses unnecessary warning logs import torch from transformers import pipeline class ExpenseCategorizerAI: def __init__(self): print( "Loading AI Model (This might take a minute on the first run as it downloads the model)..." ) # 1. Detect hardware acceleration for massive speedup if torch.cuda.is_available(): device = 0 # Nvidia GPU print("Hardware Acceleration: Enabled (CUDA GPU)") elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available(): device = "mps" # Apple Silicon Mac print("Hardware Acceleration: Enabled (Apple Metal)") else: device = -1 # Standard CPU print("Hardware Acceleration: None (Using CPU)") # 2. Switched to a Distilled BART model: # 'valhalla/distilbart-mnli-12-3' is much smarter than distilbert # and retains the high accuracy of the original BART model, but is faster. self.classifier = pipeline( "zero-shot-classification", model="valhalla/distilbart-mnli-12-3", device=device, ) # Your predefined list of categories self.categories = [ "Food & Drinks", "Groceries", "Shopping", "Bills & Utilities", "Entertainment", "Health", "Education", "Subscriptions", "Travel", "Rent", "Family & Friends", "Miscellaneous", "Gifts", "Party", "Personal Care", "Home & Hygiene", "Others", "Recharge", ] print("AI Model loaded successfully!\n") def predict_category(self, expense_text): """ Takes an expense description and returns the most likely category. """ # The AI compares the input text against all categories and scores them. # Adding hypothesis_template gives the AI strict context that it is analyzing a financial expense. result = self.classifier( expense_text, candidate_labels=self.categories, hypothesis_template="This expense is for {}.", ) # The result returns lists sorted by highest probability. # We extract the top prediction and its confidence score. top_category = result["labels"][0] confidence_score = result["scores"][0] return top_category, confidence_score # ========================================== # Testing the AI with examples # ========================================== if __name__ == "__main__": # Initialize our AI class ai_categorizer = ExpenseCategorizerAI() # Test cases to prove it understands context, not just keywords test_expenses = [ "I spend 100 rupees on burger", "Paid 1500 for the electricity and water", "Bought paracetamol and cough syrup", "Auto fare from home to the railway station", "Netflix and Amazon Prime monthly deduction", "Got a new shirt and jeans from the mall", ] print("-" * 40) print("TESTING EXPENSE CATEGORIZATION") print("-" * 40) for text in test_expenses: category, confidence = ai_categorizer.predict_category(text) print(f"Expense : '{text}'") print(f"AI Choice : {category}") print(f"Confidence: {confidence * 100:.1f}%\n") # Interactive loop for you to try your own inputs print("-" * 40) print("Type your own expenses below (type 'quit' to exit):") while True: user_input = input("Enter expense text: ") if user_input.lower() in ["quit", "exit"]: break cat, conf = ai_categorizer.predict_category(user_input) print(f"--> Categorized as: [{cat}] (Confidence: {conf * 100:.1f}%)\n")