Spaces:
Sleeping
Sleeping
| from fastapi import FastAPI | |
| from pydantic import BaseModel | |
| from typing import List | |
| import pandas as pd | |
| from setfit import SetFitModel | |
| from sentence_transformers import util | |
| import torch | |
| import gradio as gr | |
| from datetime import datetime | |
| # ================================================== | |
| # π Initialize FastAPI | |
| # ================================================== | |
| app = FastAPI(title="Transaction Category Mapper") | |
| # ================================================== | |
| # π§ Load Main Model | |
| # ================================================== | |
| print("Loading main model...") | |
| model = SetFitModel.from_pretrained("yassine123Z/EmissionFactor-mapper2-v2") | |
| print("β Model loaded successfully!") | |
| # ================================================== | |
| # π Reference Categories | |
| # ================================================== | |
| ref_data = pd.DataFrame({ | |
| "Cat1EN": [ | |
| "Purchase of goods","Purchase of goods","Purchase of goods","Purchase of goods", | |
| "Purchase of goods","Purchase of goods","Purchase of goods","Purchase of goods", | |
| "Purchase of goods","Purchase of goods","Purchase of materials","Purchase of materials", | |
| "Purchase of materials","Purchase of materials","Purchase of materials","Purchase of materials", | |
| "Purchase of services","Purchase of services","Purchase of services","Purchase of services", | |
| "Purchase of services","Purchase of services","Purchase of services","Purchase of services", | |
| "Purchase of services","Purchase of services","Purchase of services","Purchase of services", | |
| "Purchase of services","Purchase of services","Food & beverages","Food & beverages", | |
| "Food & beverages","Food & beverages","Food & beverages","Food & beverages", | |
| "Food & beverages","Food & beverages","Food & beverages","Food & beverages", | |
| "Heating and air conditioning","Heating and air conditioning","Fuels","Fuels","Fuels","Fuels", | |
| "Fuels","Fuels", | |
| "Mobility (freight)","Mobility (freight)","Mobility (freight)","Mobility (freight)", | |
| "Mobility (freight)", | |
| "Mobility (passengers)","Mobility (passengers)","Mobility (passengers)", | |
| "Mobility (passengers)","Mobility (passengers)","Mobility (passengers)","Mobility (passengers)", | |
| "Mobility (passengers)","Mobility (passengers)","Mobility (passengers)","Mobility (passengers)", | |
| "Process and fugitive emissions","Process and fugitive emissions", | |
| "Process and fugitive emissions", | |
| "Waste treatment","Waste treatment","Waste treatment", | |
| "Waste treatment","Waste treatment","Waste treatment","Waste treatment","Waste treatment", | |
| "Waste treatment","Waste treatment","Waste treatment","Waste treatment", | |
| "Use of electricity","Use of electricity","Use of electricity" | |
| ], | |
| "Cat2EN": [ | |
| "Sporting goods","Buildings","Office supplies","Water consumption", | |
| "Household appliances","Electrical equipment","Machinery and equipment","Furniture", | |
| "Textiles and clothing","Vehicles","Construction materials","Organic materials", | |
| "Paper and cardboard","Plastics and rubber","Chemicals","Refrigerants and others", | |
| "Equipment rental","Building rental","Furniture rental","Vehicle rental and maintenance", | |
| "Information and cultural services","Catering services","Health services","Specialized craft services", | |
| "Administrative / consulting services","Cleaning services","IT services","Logistics services", | |
| "Marketing / advertising services","Technical services","Alcoholic beverages","Non-alcoholic beverages", | |
| "Condiments","Desserts","Fruits and vegetables","Fats and oils","Prepared / cooked meals", | |
| "Animal products","Cereal products","Dairy products","Heat and steam","Air conditioning and refrigeration", | |
| "Fossil fuels","Mobile fossil fuels","Organic fuels","Gaseous fossil fuels","Liquid fossil fuels", | |
| "Solid fossil fuels", | |
| "Air transport","Ship transport","Truck transport","Combined transport", | |
| "Train transport", | |
| "Air transport","Coach / Urban bus","Ship transport","Combined transport", | |
| "E-Bike","Accommodation / Events","Soft mobility","Motorcycle / Scooter","Train transport", | |
| "Public transport","Car", | |
| "Agriculture","Global warming potential","Industrial processes", | |
| "Commercial and industrial","Wastewater","Electrical equipment","Households and similar", | |
| "Metal","Organic materials","Paper and cardboard","Batteries and accumulators","Plastics", | |
| "Fugitive process emissions","Textiles","Glass", | |
| "Electricity for electric vehicles","Renewables","Standard" | |
| ], | |
| "DescriptionCat2EN": [ | |
| "Goods purchase - sports","Goods purchase - buildings","Goods purchase - office items","Goods purchase - water", | |
| "Goods purchase - appliances","Goods purchase - electricals","Goods purchase - machinery","Goods purchase - furniture", | |
| "Goods purchase - textiles","Goods purchase - vehicles","Material purchase - construction","Material purchase - organic", | |
| "Material purchase - paper","Material purchase - plastics","Material purchase - chemicals","Material purchase - refrigerants", | |
| "Service - equipment rental","Service - building rental","Service - furniture rental","Service - vehicles", | |
| "Service - info/culture","Service - catering","Service - healthcare","Service - crafts", | |
| "Service - admin/consulting","Service - cleaning","Service - IT","Service - logistics", | |
| "Service - marketing","Service - technical","Beverages - alcoholic","Beverages - non-alcoholic", | |
| "Food condiments","Food desserts","Food fruits & vegetables","Food fats & oils","Prepared meals", | |
| "Animal-based food","Cereal-based food","Dairy products","Heating - heat & steam","Heating - cooling/refrigeration", | |
| "Fuel - fossil","Fuel - mobile fossil","Fuel - organic","Fuel - gaseous","Fuel - liquid","Fuel - solid", | |
| "Freight transport - air","Freight transport - ship","Freight transport - truck","Freight transport - combined", | |
| "Freight transport - train", | |
| "Passenger transport - air","Passenger transport - bus","Passenger transport - ship", | |
| "Passenger transport - combined","Passenger transport - e-bike","Passenger transport - accommodation/events", | |
| "Passenger transport - soft mobility","Passenger transport - scooter/motorbike","Passenger transport - train", | |
| "Passenger transport - public","Passenger transport - car", | |
| "Emissions - agriculture","Emissions - warming potential", | |
| "Emissions - industry", | |
| "Waste - commercial/industrial","Waste - wastewater","Waste - electricals", | |
| "Waste - households","Waste - metals","Waste - organics","Waste - paper","Waste - batteries", | |
| "Waste - plastics","Waste - fugitive","Waste - textiles","Waste - glass", | |
| "Electricity - EVs","Electricity - renewables","Electricity - standard" | |
| ] | |
| }) | |
| # Combine columns for embedding | |
| ref_data["combined"] = ref_data[["Cat1EN", "Cat2EN", "DescriptionCat2EN"]].agg(" ".join, axis=1) | |
| ref_embeddings = model.encode(ref_data["combined"].tolist()) | |
| # Get unique categories | |
| unique_cat1 = sorted(ref_data["Cat1EN"].unique().tolist()) | |
| unique_cat2 = sorted(ref_data["Cat2EN"].unique().tolist()) | |
| # ================================================== | |
| # πΎ Storage for corrections | |
| # ================================================== | |
| corrections_data = [] | |
| # ================================================== | |
| # π Classification Functions | |
| # ================================================== | |
| def classify_transaction(text: str, top_k=3): | |
| """Return top-K category predictions""" | |
| if not text.strip(): | |
| return [] | |
| trans_emb = model.encode([text])[0] | |
| scores = util.pytorch_cos_sim(torch.tensor(trans_emb), torch.tensor(ref_embeddings)).flatten() | |
| top_k_indices = scores.topk(min(top_k, len(scores))).indices.tolist() | |
| top_k_scores = scores.topk(min(top_k, len(scores))).values.tolist() | |
| results = [] | |
| for idx, score in zip(top_k_indices, top_k_scores): | |
| results.append({ | |
| "cat1": ref_data.iloc[idx]["Cat1EN"], | |
| "cat2": ref_data.iloc[idx]["Cat2EN"], | |
| "score": float(score) | |
| }) | |
| return results | |
| def classify_single(text: str): | |
| """Return best single match""" | |
| if not text.strip(): | |
| return "Please enter a transaction", "", 0.0 | |
| results = classify_transaction(text, top_k=1) | |
| if not results: | |
| return "No results", "", 0.0 | |
| return results[0]["cat1"], results[0]["cat2"], results[0]["score"] | |
| # ================================================== | |
| # π¨ Gradio UI | |
| # ================================================== | |
| def main_ui_fn(text): | |
| cat1, cat2, score = classify_single(text) | |
| return f"**Best Cat1:** {cat1}\n**Best Cat2:** {cat2}\n**Score:** {round(score,3)}" | |
| main_ui = gr.Interface( | |
| fn=main_ui_fn, | |
| inputs=gr.Textbox(label="Enter transaction text"), | |
| outputs=gr.Markdown(label="Predicted Category"), | |
| title="πΌ Transaction Category Mapper", | |
| description="Predicts the best matching category for your transaction using NLP similarity." | |
| ) | |
| # FIX β mount the Gradio app AFTER defining it | |
| #app = gr.mount_gradio_app(app, main_ui, path="/") | |
| app = gr.mount_gradio_app(app, main_ui, path="/ui") | |
| # ================================================== | |
| # π§Ύ REST API Endpoints | |
| # ================================================== | |
| class TransactionsRequest(BaseModel): | |
| transactions: List[str] | |
| def health_check(): | |
| return { | |
| "status": "healthy", | |
| "model_loaded": model is not None, | |
| "corrections_count": len(corrections_data) | |
| } | |
| def map_categories(request: TransactionsRequest): | |
| results = [] | |
| for text in request.transactions: | |
| cat1, cat2, score = classify_single(text) | |
| results.append({ | |
| "input_text": text, | |
| "best_Cat1": cat1, | |
| "best_Cat2": cat2, | |
| "similarity": score | |
| }) | |
| return {"matches": results} | |
| def get_corrections(): | |
| return {"corrections": corrections_data, "count": len(corrections_data)} | |
| print("β App initialized successfully!") | |
| print("π Interface available at: /") | |
| print("π₯ Health Check: /health") | |
| print("π API Endpoints: /map_categories, /corrections") | |