File size: 10,426 Bytes
4e80ff9
7e6741b
 
a7f527f
 
 
 
403ce95
1e16b3a
a7f527f
403ce95
 
 
 
7e6741b
403ce95
f820f5a
403ce95
f820f5a
 
3b1d6f0
7e6741b
403ce95
f820f5a
403ce95
f820f5a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3b1d6f0
f820f5a
 
 
3b1d6f0
f820f5a
 
 
 
3b1d6f0
f820f5a
 
 
3b1d6f0
f820f5a
3b1d6f0
f820f5a
 
3b1d6f0
 
f820f5a
 
 
 
 
3b1d6f0
6331c93
f820f5a
 
 
 
 
 
1e16b3a
6331c93
3b1d6f0
f820f5a
3b1d6f0
 
f820f5a
 
 
 
 
 
2d902b3
4e80ff9
3b1d6f0
4e80ff9
3b1d6f0
 
 
 
 
 
 
 
 
 
 
 
 
 
3e86479
12ab04c
 
f820f5a
 
 
 
 
 
 
3b1d6f0
f820f5a
 
 
 
 
 
 
 
3b1d6f0
f820f5a
 
 
 
 
 
 
 
 
 
 
 
 
3b1d6f0
f820f5a
 
3b1d6f0
 
f820f5a
 
3b1d6f0
f820f5a
3b1d6f0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
from fastapi import FastAPI
from pydantic import BaseModel
from typing import List
import pandas as pd
from setfit import SetFitModel
from sentence_transformers import util
import torch
import gradio as gr
from datetime import datetime

# ==================================================
# πŸš€ Initialize FastAPI
# ==================================================
app = FastAPI(title="Transaction Category Mapper")

# ==================================================
# 🧠 Load Main Model
# ==================================================
print("Loading main model...")
model = SetFitModel.from_pretrained("yassine123Z/EmissionFactor-mapper2-v2")
print("βœ… Model loaded successfully!")

# ==================================================
# πŸ“˜ Reference Categories
# ==================================================
ref_data = pd.DataFrame({
    "Cat1EN": [
        "Purchase of goods","Purchase of goods","Purchase of goods","Purchase of goods",
        "Purchase of goods","Purchase of goods","Purchase of goods","Purchase of goods",
        "Purchase of goods","Purchase of goods","Purchase of materials","Purchase of materials",
        "Purchase of materials","Purchase of materials","Purchase of materials","Purchase of materials",
        "Purchase of services","Purchase of services","Purchase of services","Purchase of services",
        "Purchase of services","Purchase of services","Purchase of services","Purchase of services",
        "Purchase of services","Purchase of services","Purchase of services","Purchase of services",
        "Purchase of services","Purchase of services","Food & beverages","Food & beverages",
        "Food & beverages","Food & beverages","Food & beverages","Food & beverages",
        "Food & beverages","Food & beverages","Food & beverages","Food & beverages",
        "Heating and air conditioning","Heating and air conditioning","Fuels","Fuels","Fuels","Fuels",
        "Fuels","Fuels",
        "Mobility (freight)","Mobility (freight)","Mobility (freight)","Mobility (freight)",
        "Mobility (freight)",
        "Mobility (passengers)","Mobility (passengers)","Mobility (passengers)",
        "Mobility (passengers)","Mobility (passengers)","Mobility (passengers)","Mobility (passengers)",
        "Mobility (passengers)","Mobility (passengers)","Mobility (passengers)","Mobility (passengers)",
        "Process and fugitive emissions","Process and fugitive emissions",
        "Process and fugitive emissions",
        "Waste treatment","Waste treatment","Waste treatment",
        "Waste treatment","Waste treatment","Waste treatment","Waste treatment","Waste treatment",
        "Waste treatment","Waste treatment","Waste treatment","Waste treatment",
        "Use of electricity","Use of electricity","Use of electricity"
    ],
    "Cat2EN": [
        "Sporting goods","Buildings","Office supplies","Water consumption",
        "Household appliances","Electrical equipment","Machinery and equipment","Furniture",
        "Textiles and clothing","Vehicles","Construction materials","Organic materials",
        "Paper and cardboard","Plastics and rubber","Chemicals","Refrigerants and others",
        "Equipment rental","Building rental","Furniture rental","Vehicle rental and maintenance",
        "Information and cultural services","Catering services","Health services","Specialized craft services",
        "Administrative / consulting services","Cleaning services","IT services","Logistics services",
        "Marketing / advertising services","Technical services","Alcoholic beverages","Non-alcoholic beverages",
        "Condiments","Desserts","Fruits and vegetables","Fats and oils","Prepared / cooked meals",
        "Animal products","Cereal products","Dairy products","Heat and steam","Air conditioning and refrigeration",
        "Fossil fuels","Mobile fossil fuels","Organic fuels","Gaseous fossil fuels","Liquid fossil fuels",
        "Solid fossil fuels",
        "Air transport","Ship transport","Truck transport","Combined transport",
        "Train transport",
        "Air transport","Coach / Urban bus","Ship transport","Combined transport",
        "E-Bike","Accommodation / Events","Soft mobility","Motorcycle / Scooter","Train transport",
        "Public transport","Car",
        "Agriculture","Global warming potential","Industrial processes",
        "Commercial and industrial","Wastewater","Electrical equipment","Households and similar",
        "Metal","Organic materials","Paper and cardboard","Batteries and accumulators","Plastics",
        "Fugitive process emissions","Textiles","Glass",
        "Electricity for electric vehicles","Renewables","Standard"
    ],
    "DescriptionCat2EN": [
        "Goods purchase - sports","Goods purchase - buildings","Goods purchase - office items","Goods purchase - water",
        "Goods purchase - appliances","Goods purchase - electricals","Goods purchase - machinery","Goods purchase - furniture",
        "Goods purchase - textiles","Goods purchase - vehicles","Material purchase - construction","Material purchase - organic",
        "Material purchase - paper","Material purchase - plastics","Material purchase - chemicals","Material purchase - refrigerants",
        "Service - equipment rental","Service - building rental","Service - furniture rental","Service - vehicles",
        "Service - info/culture","Service - catering","Service - healthcare","Service - crafts",
        "Service - admin/consulting","Service - cleaning","Service - IT","Service - logistics",
        "Service - marketing","Service - technical","Beverages - alcoholic","Beverages - non-alcoholic",
        "Food condiments","Food desserts","Food fruits & vegetables","Food fats & oils","Prepared meals",
        "Animal-based food","Cereal-based food","Dairy products","Heating - heat & steam","Heating - cooling/refrigeration",
        "Fuel - fossil","Fuel - mobile fossil","Fuel - organic","Fuel - gaseous","Fuel - liquid","Fuel - solid",
        "Freight transport - air","Freight transport - ship","Freight transport - truck","Freight transport - combined",
        "Freight transport - train",
        "Passenger transport - air","Passenger transport - bus","Passenger transport - ship",
        "Passenger transport - combined","Passenger transport - e-bike","Passenger transport - accommodation/events",
        "Passenger transport - soft mobility","Passenger transport - scooter/motorbike","Passenger transport - train",
        "Passenger transport - public","Passenger transport - car",
        "Emissions - agriculture","Emissions - warming potential",
        "Emissions - industry",
        "Waste - commercial/industrial","Waste - wastewater","Waste - electricals",
        "Waste - households","Waste - metals","Waste - organics","Waste - paper","Waste - batteries",
        "Waste - plastics","Waste - fugitive","Waste - textiles","Waste - glass",
        "Electricity - EVs","Electricity - renewables","Electricity - standard"
    ]
})

# Combine columns for embedding
ref_data["combined"] = ref_data[["Cat1EN", "Cat2EN", "DescriptionCat2EN"]].agg(" ".join, axis=1)
ref_embeddings = model.encode(ref_data["combined"].tolist())

# Get unique categories
unique_cat1 = sorted(ref_data["Cat1EN"].unique().tolist())
unique_cat2 = sorted(ref_data["Cat2EN"].unique().tolist())

# ==================================================
# πŸ’Ύ Storage for corrections
# ==================================================
corrections_data = []


# ==================================================
# πŸ” Classification Functions
# ==================================================
def classify_transaction(text: str, top_k=3):
    """Return top-K category predictions"""
    if not text.strip():
        return []
    trans_emb = model.encode([text])[0]
    scores = util.pytorch_cos_sim(torch.tensor(trans_emb), torch.tensor(ref_embeddings)).flatten()
    top_k_indices = scores.topk(min(top_k, len(scores))).indices.tolist()
    top_k_scores = scores.topk(min(top_k, len(scores))).values.tolist()

    results = []
    for idx, score in zip(top_k_indices, top_k_scores):
        results.append({
            "cat1": ref_data.iloc[idx]["Cat1EN"],
            "cat2": ref_data.iloc[idx]["Cat2EN"],
            "score": float(score)
        })
    return results


def classify_single(text: str):
    """Return best single match"""
    if not text.strip():
        return "Please enter a transaction", "", 0.0
    results = classify_transaction(text, top_k=1)
    if not results:
        return "No results", "", 0.0
    return results[0]["cat1"], results[0]["cat2"], results[0]["score"]


# ==================================================
# 🎨 Gradio UI
# ==================================================
def main_ui_fn(text):
    cat1, cat2, score = classify_single(text)
    return f"**Best Cat1:** {cat1}\n**Best Cat2:** {cat2}\n**Score:** {round(score,3)}"


main_ui = gr.Interface(
    fn=main_ui_fn,
    inputs=gr.Textbox(label="Enter transaction text"),
    outputs=gr.Markdown(label="Predicted Category"),
    title="πŸ’Ό Transaction Category Mapper",
    description="Predicts the best matching category for your transaction using NLP similarity."
)

# FIX βœ… mount the Gradio app AFTER defining it
#app = gr.mount_gradio_app(app, main_ui, path="/")
app = gr.mount_gradio_app(app, main_ui, path="/ui")


# ==================================================
# 🧾 REST API Endpoints
# ==================================================
class TransactionsRequest(BaseModel):
    transactions: List[str]


@app.get("/health")
def health_check():
    return {
        "status": "healthy",
        "model_loaded": model is not None,
        "corrections_count": len(corrections_data)
    }


@app.post("/map_categories")
def map_categories(request: TransactionsRequest):
    results = []
    for text in request.transactions:
        cat1, cat2, score = classify_single(text)
        results.append({
            "input_text": text,
            "best_Cat1": cat1,
            "best_Cat2": cat2,
            "similarity": score
        })
    return {"matches": results}


@app.get("/corrections")
def get_corrections():
    return {"corrections": corrections_data, "count": len(corrections_data)}


print("βœ… App initialized successfully!")
print("πŸ“ Interface available at: /")
print("πŸ₯ Health Check: /health")
print("πŸ”Œ API Endpoints: /map_categories, /corrections")