yassine123Z's picture
Update app.py
3e86479 verified
from fastapi import FastAPI
from pydantic import BaseModel
from typing import List
import pandas as pd
from setfit import SetFitModel
from sentence_transformers import util
import torch
import gradio as gr
from datetime import datetime
# ==================================================
# πŸš€ Initialize FastAPI
# ==================================================
app = FastAPI(title="Transaction Category Mapper")
# ==================================================
# 🧠 Load Main Model
# ==================================================
print("Loading main model...")
model = SetFitModel.from_pretrained("yassine123Z/EmissionFactor-mapper2-v2")
print("βœ… Model loaded successfully!")
# ==================================================
# πŸ“˜ Reference Categories
# ==================================================
ref_data = pd.DataFrame({
"Cat1EN": [
"Purchase of goods","Purchase of goods","Purchase of goods","Purchase of goods",
"Purchase of goods","Purchase of goods","Purchase of goods","Purchase of goods",
"Purchase of goods","Purchase of goods","Purchase of materials","Purchase of materials",
"Purchase of materials","Purchase of materials","Purchase of materials","Purchase of materials",
"Purchase of services","Purchase of services","Purchase of services","Purchase of services",
"Purchase of services","Purchase of services","Purchase of services","Purchase of services",
"Purchase of services","Purchase of services","Purchase of services","Purchase of services",
"Purchase of services","Purchase of services","Food & beverages","Food & beverages",
"Food & beverages","Food & beverages","Food & beverages","Food & beverages",
"Food & beverages","Food & beverages","Food & beverages","Food & beverages",
"Heating and air conditioning","Heating and air conditioning","Fuels","Fuels","Fuels","Fuels",
"Fuels","Fuels",
"Mobility (freight)","Mobility (freight)","Mobility (freight)","Mobility (freight)",
"Mobility (freight)",
"Mobility (passengers)","Mobility (passengers)","Mobility (passengers)",
"Mobility (passengers)","Mobility (passengers)","Mobility (passengers)","Mobility (passengers)",
"Mobility (passengers)","Mobility (passengers)","Mobility (passengers)","Mobility (passengers)",
"Process and fugitive emissions","Process and fugitive emissions",
"Process and fugitive emissions",
"Waste treatment","Waste treatment","Waste treatment",
"Waste treatment","Waste treatment","Waste treatment","Waste treatment","Waste treatment",
"Waste treatment","Waste treatment","Waste treatment","Waste treatment",
"Use of electricity","Use of electricity","Use of electricity"
],
"Cat2EN": [
"Sporting goods","Buildings","Office supplies","Water consumption",
"Household appliances","Electrical equipment","Machinery and equipment","Furniture",
"Textiles and clothing","Vehicles","Construction materials","Organic materials",
"Paper and cardboard","Plastics and rubber","Chemicals","Refrigerants and others",
"Equipment rental","Building rental","Furniture rental","Vehicle rental and maintenance",
"Information and cultural services","Catering services","Health services","Specialized craft services",
"Administrative / consulting services","Cleaning services","IT services","Logistics services",
"Marketing / advertising services","Technical services","Alcoholic beverages","Non-alcoholic beverages",
"Condiments","Desserts","Fruits and vegetables","Fats and oils","Prepared / cooked meals",
"Animal products","Cereal products","Dairy products","Heat and steam","Air conditioning and refrigeration",
"Fossil fuels","Mobile fossil fuels","Organic fuels","Gaseous fossil fuels","Liquid fossil fuels",
"Solid fossil fuels",
"Air transport","Ship transport","Truck transport","Combined transport",
"Train transport",
"Air transport","Coach / Urban bus","Ship transport","Combined transport",
"E-Bike","Accommodation / Events","Soft mobility","Motorcycle / Scooter","Train transport",
"Public transport","Car",
"Agriculture","Global warming potential","Industrial processes",
"Commercial and industrial","Wastewater","Electrical equipment","Households and similar",
"Metal","Organic materials","Paper and cardboard","Batteries and accumulators","Plastics",
"Fugitive process emissions","Textiles","Glass",
"Electricity for electric vehicles","Renewables","Standard"
],
"DescriptionCat2EN": [
"Goods purchase - sports","Goods purchase - buildings","Goods purchase - office items","Goods purchase - water",
"Goods purchase - appliances","Goods purchase - electricals","Goods purchase - machinery","Goods purchase - furniture",
"Goods purchase - textiles","Goods purchase - vehicles","Material purchase - construction","Material purchase - organic",
"Material purchase - paper","Material purchase - plastics","Material purchase - chemicals","Material purchase - refrigerants",
"Service - equipment rental","Service - building rental","Service - furniture rental","Service - vehicles",
"Service - info/culture","Service - catering","Service - healthcare","Service - crafts",
"Service - admin/consulting","Service - cleaning","Service - IT","Service - logistics",
"Service - marketing","Service - technical","Beverages - alcoholic","Beverages - non-alcoholic",
"Food condiments","Food desserts","Food fruits & vegetables","Food fats & oils","Prepared meals",
"Animal-based food","Cereal-based food","Dairy products","Heating - heat & steam","Heating - cooling/refrigeration",
"Fuel - fossil","Fuel - mobile fossil","Fuel - organic","Fuel - gaseous","Fuel - liquid","Fuel - solid",
"Freight transport - air","Freight transport - ship","Freight transport - truck","Freight transport - combined",
"Freight transport - train",
"Passenger transport - air","Passenger transport - bus","Passenger transport - ship",
"Passenger transport - combined","Passenger transport - e-bike","Passenger transport - accommodation/events",
"Passenger transport - soft mobility","Passenger transport - scooter/motorbike","Passenger transport - train",
"Passenger transport - public","Passenger transport - car",
"Emissions - agriculture","Emissions - warming potential",
"Emissions - industry",
"Waste - commercial/industrial","Waste - wastewater","Waste - electricals",
"Waste - households","Waste - metals","Waste - organics","Waste - paper","Waste - batteries",
"Waste - plastics","Waste - fugitive","Waste - textiles","Waste - glass",
"Electricity - EVs","Electricity - renewables","Electricity - standard"
]
})
# Combine columns for embedding
ref_data["combined"] = ref_data[["Cat1EN", "Cat2EN", "DescriptionCat2EN"]].agg(" ".join, axis=1)
ref_embeddings = model.encode(ref_data["combined"].tolist())
# Get unique categories
unique_cat1 = sorted(ref_data["Cat1EN"].unique().tolist())
unique_cat2 = sorted(ref_data["Cat2EN"].unique().tolist())
# ==================================================
# πŸ’Ύ Storage for corrections
# ==================================================
corrections_data = []
# ==================================================
# πŸ” Classification Functions
# ==================================================
def classify_transaction(text: str, top_k=3):
"""Return top-K category predictions"""
if not text.strip():
return []
trans_emb = model.encode([text])[0]
scores = util.pytorch_cos_sim(torch.tensor(trans_emb), torch.tensor(ref_embeddings)).flatten()
top_k_indices = scores.topk(min(top_k, len(scores))).indices.tolist()
top_k_scores = scores.topk(min(top_k, len(scores))).values.tolist()
results = []
for idx, score in zip(top_k_indices, top_k_scores):
results.append({
"cat1": ref_data.iloc[idx]["Cat1EN"],
"cat2": ref_data.iloc[idx]["Cat2EN"],
"score": float(score)
})
return results
def classify_single(text: str):
"""Return best single match"""
if not text.strip():
return "Please enter a transaction", "", 0.0
results = classify_transaction(text, top_k=1)
if not results:
return "No results", "", 0.0
return results[0]["cat1"], results[0]["cat2"], results[0]["score"]
# ==================================================
# 🎨 Gradio UI
# ==================================================
def main_ui_fn(text):
cat1, cat2, score = classify_single(text)
return f"**Best Cat1:** {cat1}\n**Best Cat2:** {cat2}\n**Score:** {round(score,3)}"
main_ui = gr.Interface(
fn=main_ui_fn,
inputs=gr.Textbox(label="Enter transaction text"),
outputs=gr.Markdown(label="Predicted Category"),
title="πŸ’Ό Transaction Category Mapper",
description="Predicts the best matching category for your transaction using NLP similarity."
)
# FIX βœ… mount the Gradio app AFTER defining it
#app = gr.mount_gradio_app(app, main_ui, path="/")
app = gr.mount_gradio_app(app, main_ui, path="/ui")
# ==================================================
# 🧾 REST API Endpoints
# ==================================================
class TransactionsRequest(BaseModel):
transactions: List[str]
@app.get("/health")
def health_check():
return {
"status": "healthy",
"model_loaded": model is not None,
"corrections_count": len(corrections_data)
}
@app.post("/map_categories")
def map_categories(request: TransactionsRequest):
results = []
for text in request.transactions:
cat1, cat2, score = classify_single(text)
results.append({
"input_text": text,
"best_Cat1": cat1,
"best_Cat2": cat2,
"similarity": score
})
return {"matches": results}
@app.get("/corrections")
def get_corrections():
return {"corrections": corrections_data, "count": len(corrections_data)}
print("βœ… App initialized successfully!")
print("πŸ“ Interface available at: /")
print("πŸ₯ Health Check: /health")
print("πŸ”Œ API Endpoints: /map_categories, /corrections")