Spaces:
Sleeping
Sleeping
File size: 10,426 Bytes
4e80ff9 7e6741b a7f527f 403ce95 1e16b3a a7f527f 403ce95 7e6741b 403ce95 f820f5a 403ce95 f820f5a 3b1d6f0 7e6741b 403ce95 f820f5a 403ce95 f820f5a 3b1d6f0 f820f5a 3b1d6f0 f820f5a 3b1d6f0 f820f5a 3b1d6f0 f820f5a 3b1d6f0 f820f5a 3b1d6f0 f820f5a 3b1d6f0 6331c93 f820f5a 1e16b3a 6331c93 3b1d6f0 f820f5a 3b1d6f0 f820f5a 2d902b3 4e80ff9 3b1d6f0 4e80ff9 3b1d6f0 3e86479 12ab04c f820f5a 3b1d6f0 f820f5a 3b1d6f0 f820f5a 3b1d6f0 f820f5a 3b1d6f0 f820f5a 3b1d6f0 f820f5a 3b1d6f0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 |
from fastapi import FastAPI
from pydantic import BaseModel
from typing import List
import pandas as pd
from setfit import SetFitModel
from sentence_transformers import util
import torch
import gradio as gr
from datetime import datetime
# ==================================================
# π Initialize FastAPI
# ==================================================
app = FastAPI(title="Transaction Category Mapper")
# ==================================================
# π§ Load Main Model
# ==================================================
print("Loading main model...")
model = SetFitModel.from_pretrained("yassine123Z/EmissionFactor-mapper2-v2")
print("β
Model loaded successfully!")
# ==================================================
# π Reference Categories
# ==================================================
ref_data = pd.DataFrame({
"Cat1EN": [
"Purchase of goods","Purchase of goods","Purchase of goods","Purchase of goods",
"Purchase of goods","Purchase of goods","Purchase of goods","Purchase of goods",
"Purchase of goods","Purchase of goods","Purchase of materials","Purchase of materials",
"Purchase of materials","Purchase of materials","Purchase of materials","Purchase of materials",
"Purchase of services","Purchase of services","Purchase of services","Purchase of services",
"Purchase of services","Purchase of services","Purchase of services","Purchase of services",
"Purchase of services","Purchase of services","Purchase of services","Purchase of services",
"Purchase of services","Purchase of services","Food & beverages","Food & beverages",
"Food & beverages","Food & beverages","Food & beverages","Food & beverages",
"Food & beverages","Food & beverages","Food & beverages","Food & beverages",
"Heating and air conditioning","Heating and air conditioning","Fuels","Fuels","Fuels","Fuels",
"Fuels","Fuels",
"Mobility (freight)","Mobility (freight)","Mobility (freight)","Mobility (freight)",
"Mobility (freight)",
"Mobility (passengers)","Mobility (passengers)","Mobility (passengers)",
"Mobility (passengers)","Mobility (passengers)","Mobility (passengers)","Mobility (passengers)",
"Mobility (passengers)","Mobility (passengers)","Mobility (passengers)","Mobility (passengers)",
"Process and fugitive emissions","Process and fugitive emissions",
"Process and fugitive emissions",
"Waste treatment","Waste treatment","Waste treatment",
"Waste treatment","Waste treatment","Waste treatment","Waste treatment","Waste treatment",
"Waste treatment","Waste treatment","Waste treatment","Waste treatment",
"Use of electricity","Use of electricity","Use of electricity"
],
"Cat2EN": [
"Sporting goods","Buildings","Office supplies","Water consumption",
"Household appliances","Electrical equipment","Machinery and equipment","Furniture",
"Textiles and clothing","Vehicles","Construction materials","Organic materials",
"Paper and cardboard","Plastics and rubber","Chemicals","Refrigerants and others",
"Equipment rental","Building rental","Furniture rental","Vehicle rental and maintenance",
"Information and cultural services","Catering services","Health services","Specialized craft services",
"Administrative / consulting services","Cleaning services","IT services","Logistics services",
"Marketing / advertising services","Technical services","Alcoholic beverages","Non-alcoholic beverages",
"Condiments","Desserts","Fruits and vegetables","Fats and oils","Prepared / cooked meals",
"Animal products","Cereal products","Dairy products","Heat and steam","Air conditioning and refrigeration",
"Fossil fuels","Mobile fossil fuels","Organic fuels","Gaseous fossil fuels","Liquid fossil fuels",
"Solid fossil fuels",
"Air transport","Ship transport","Truck transport","Combined transport",
"Train transport",
"Air transport","Coach / Urban bus","Ship transport","Combined transport",
"E-Bike","Accommodation / Events","Soft mobility","Motorcycle / Scooter","Train transport",
"Public transport","Car",
"Agriculture","Global warming potential","Industrial processes",
"Commercial and industrial","Wastewater","Electrical equipment","Households and similar",
"Metal","Organic materials","Paper and cardboard","Batteries and accumulators","Plastics",
"Fugitive process emissions","Textiles","Glass",
"Electricity for electric vehicles","Renewables","Standard"
],
"DescriptionCat2EN": [
"Goods purchase - sports","Goods purchase - buildings","Goods purchase - office items","Goods purchase - water",
"Goods purchase - appliances","Goods purchase - electricals","Goods purchase - machinery","Goods purchase - furniture",
"Goods purchase - textiles","Goods purchase - vehicles","Material purchase - construction","Material purchase - organic",
"Material purchase - paper","Material purchase - plastics","Material purchase - chemicals","Material purchase - refrigerants",
"Service - equipment rental","Service - building rental","Service - furniture rental","Service - vehicles",
"Service - info/culture","Service - catering","Service - healthcare","Service - crafts",
"Service - admin/consulting","Service - cleaning","Service - IT","Service - logistics",
"Service - marketing","Service - technical","Beverages - alcoholic","Beverages - non-alcoholic",
"Food condiments","Food desserts","Food fruits & vegetables","Food fats & oils","Prepared meals",
"Animal-based food","Cereal-based food","Dairy products","Heating - heat & steam","Heating - cooling/refrigeration",
"Fuel - fossil","Fuel - mobile fossil","Fuel - organic","Fuel - gaseous","Fuel - liquid","Fuel - solid",
"Freight transport - air","Freight transport - ship","Freight transport - truck","Freight transport - combined",
"Freight transport - train",
"Passenger transport - air","Passenger transport - bus","Passenger transport - ship",
"Passenger transport - combined","Passenger transport - e-bike","Passenger transport - accommodation/events",
"Passenger transport - soft mobility","Passenger transport - scooter/motorbike","Passenger transport - train",
"Passenger transport - public","Passenger transport - car",
"Emissions - agriculture","Emissions - warming potential",
"Emissions - industry",
"Waste - commercial/industrial","Waste - wastewater","Waste - electricals",
"Waste - households","Waste - metals","Waste - organics","Waste - paper","Waste - batteries",
"Waste - plastics","Waste - fugitive","Waste - textiles","Waste - glass",
"Electricity - EVs","Electricity - renewables","Electricity - standard"
]
})
# Combine columns for embedding
ref_data["combined"] = ref_data[["Cat1EN", "Cat2EN", "DescriptionCat2EN"]].agg(" ".join, axis=1)
ref_embeddings = model.encode(ref_data["combined"].tolist())
# Get unique categories
unique_cat1 = sorted(ref_data["Cat1EN"].unique().tolist())
unique_cat2 = sorted(ref_data["Cat2EN"].unique().tolist())
# ==================================================
# πΎ Storage for corrections
# ==================================================
corrections_data = []
# ==================================================
# π Classification Functions
# ==================================================
def classify_transaction(text: str, top_k=3):
"""Return top-K category predictions"""
if not text.strip():
return []
trans_emb = model.encode([text])[0]
scores = util.pytorch_cos_sim(torch.tensor(trans_emb), torch.tensor(ref_embeddings)).flatten()
top_k_indices = scores.topk(min(top_k, len(scores))).indices.tolist()
top_k_scores = scores.topk(min(top_k, len(scores))).values.tolist()
results = []
for idx, score in zip(top_k_indices, top_k_scores):
results.append({
"cat1": ref_data.iloc[idx]["Cat1EN"],
"cat2": ref_data.iloc[idx]["Cat2EN"],
"score": float(score)
})
return results
def classify_single(text: str):
"""Return best single match"""
if not text.strip():
return "Please enter a transaction", "", 0.0
results = classify_transaction(text, top_k=1)
if not results:
return "No results", "", 0.0
return results[0]["cat1"], results[0]["cat2"], results[0]["score"]
# ==================================================
# π¨ Gradio UI
# ==================================================
def main_ui_fn(text):
cat1, cat2, score = classify_single(text)
return f"**Best Cat1:** {cat1}\n**Best Cat2:** {cat2}\n**Score:** {round(score,3)}"
main_ui = gr.Interface(
fn=main_ui_fn,
inputs=gr.Textbox(label="Enter transaction text"),
outputs=gr.Markdown(label="Predicted Category"),
title="πΌ Transaction Category Mapper",
description="Predicts the best matching category for your transaction using NLP similarity."
)
# FIX β
mount the Gradio app AFTER defining it
#app = gr.mount_gradio_app(app, main_ui, path="/")
app = gr.mount_gradio_app(app, main_ui, path="/ui")
# ==================================================
# π§Ύ REST API Endpoints
# ==================================================
class TransactionsRequest(BaseModel):
transactions: List[str]
@app.get("/health")
def health_check():
return {
"status": "healthy",
"model_loaded": model is not None,
"corrections_count": len(corrections_data)
}
@app.post("/map_categories")
def map_categories(request: TransactionsRequest):
results = []
for text in request.transactions:
cat1, cat2, score = classify_single(text)
results.append({
"input_text": text,
"best_Cat1": cat1,
"best_Cat2": cat2,
"similarity": score
})
return {"matches": results}
@app.get("/corrections")
def get_corrections():
return {"corrections": corrections_data, "count": len(corrections_data)}
print("β
App initialized successfully!")
print("π Interface available at: /")
print("π₯ Health Check: /health")
print("π API Endpoints: /map_categories, /corrections")
|