File size: 4,150 Bytes
f54d769
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
# ==========================================
# Prerequisites to run this code:
# Open your terminal or command prompt and run:
# pip install transformers torch
# ==========================================

import warnings

warnings.filterwarnings("ignore")  # Suppresses unnecessary warning logs

import torch
from transformers import pipeline


class ExpenseCategorizerAI:
    def __init__(self):
        print(
            "Loading AI Model (This might take a minute on the first run as it downloads the model)..."
        )

        # 1. Detect hardware acceleration for massive speedup
        if torch.cuda.is_available():
            device = 0  # Nvidia GPU
            print("Hardware Acceleration: Enabled (CUDA GPU)")
        elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
            device = "mps"  # Apple Silicon Mac
            print("Hardware Acceleration: Enabled (Apple Metal)")
        else:
            device = -1  # Standard CPU
            print("Hardware Acceleration: None (Using CPU)")

        # 2. Switched to a Distilled BART model:
        # 'valhalla/distilbart-mnli-12-3' is much smarter than distilbert
        # and retains the high accuracy of the original BART model, but is faster.
        self.classifier = pipeline(
            "zero-shot-classification",
            model="valhalla/distilbart-mnli-12-3",
            device=device,
        )

        # Your predefined list of categories
        self.categories = [
            "Food & Drinks",
            "Groceries",
            "Shopping",
            "Bills & Utilities",
            "Entertainment",
            "Health",
            "Education",
            "Subscriptions",
            "Travel",
            "Rent",
            "Family & Friends",
            "Miscellaneous",
            "Gifts",
            "Party",
            "Personal Care",
            "Home & Hygiene",
            "Others",
            "Recharge",
        ]
        print("AI Model loaded successfully!\n")

    def predict_category(self, expense_text):
        """
        Takes an expense description and returns the most likely category.
        """
        # The AI compares the input text against all categories and scores them.
        # Adding hypothesis_template gives the AI strict context that it is analyzing a financial expense.
        result = self.classifier(
            expense_text,
            candidate_labels=self.categories,
            hypothesis_template="This expense is for {}.",
        )

        # The result returns lists sorted by highest probability.
        # We extract the top prediction and its confidence score.
        top_category = result["labels"][0]
        confidence_score = result["scores"][0]

        return top_category, confidence_score


# ==========================================
# Testing the AI with examples
# ==========================================
if __name__ == "__main__":
    # Initialize our AI class
    ai_categorizer = ExpenseCategorizerAI()

    # Test cases to prove it understands context, not just keywords
    test_expenses = [
        "I spend 100 rupees on burger",
        "Paid 1500 for the electricity and water",
        "Bought paracetamol and cough syrup",
        "Auto fare from home to the railway station",
        "Netflix and Amazon Prime monthly deduction",
        "Got a new shirt and jeans from the mall",
    ]

    print("-" * 40)
    print("TESTING EXPENSE CATEGORIZATION")
    print("-" * 40)

    for text in test_expenses:
        category, confidence = ai_categorizer.predict_category(text)

        print(f"Expense   : '{text}'")
        print(f"AI Choice : {category}")
        print(f"Confidence: {confidence * 100:.1f}%\n")

    # Interactive loop for you to try your own inputs
    print("-" * 40)
    print("Type your own expenses below (type 'quit' to exit):")
    while True:
        user_input = input("Enter expense text: ")
        if user_input.lower() in ["quit", "exit"]:
            break

        cat, conf = ai_categorizer.predict_category(user_input)
        print(f"--> Categorized as: [{cat}] (Confidence: {conf * 100:.1f}%)\n")