Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,124 +1,50 @@
|
|
| 1 |
-
|
|
|
|
|
|
|
|
|
|
| 2 |
import pandas as pd
|
| 3 |
from setfit import SetFitModel
|
| 4 |
from sentence_transformers import util
|
| 5 |
import torch
|
| 6 |
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
"Purchase of materials","Purchase of materials","Purchase of materials","Purchase of materials",
|
| 23 |
-
"Purchase of services","Purchase of services","Purchase of services","Purchase of services",
|
| 24 |
-
"Purchase of services","Purchase of services","Purchase of services","Purchase of services",
|
| 25 |
-
"Purchase of services","Purchase of services","Purchase of services","Purchase of services",
|
| 26 |
-
"Purchase of services","Purchase of services","Food & beverages","Food & beverages",
|
| 27 |
-
"Food & beverages","Food & beverages","Food & beverages","Food & beverages",
|
| 28 |
-
"Food & beverages","Food & beverages","Food & beverages","Food & beverages",
|
| 29 |
-
"Heating and air conditioning","Heating and air conditioning","Fuels","Fuels","Fuels","Fuels",
|
| 30 |
-
"Fuels","Fuels","Mobility (freight)","Mobility (freight)","Mobility (freight)","Mobility (freight)",
|
| 31 |
-
"Mobility (freight)","Mobility (passengers)","Mobility (passengers)","Mobility (passengers)",
|
| 32 |
-
"Mobility (passengers)","Mobility (passengers)","Mobility (passengers)","Mobility (passengers)",
|
| 33 |
-
"Mobility (passengers)","Mobility (passengers)","Mobility (passengers)","Mobility (passengers)",
|
| 34 |
-
"Mobility (passengers)","Process and fugitive emissions","Process and fugitive emissions",
|
| 35 |
-
"Process and fugitive emissions","Waste treatment","Waste treatment","Waste treatment",
|
| 36 |
-
"Waste treatment","Waste treatment","Waste treatment","Waste treatment","Waste treatment",
|
| 37 |
-
"Waste treatment","Waste treatment","Waste treatment","Waste treatment","Waste treatment",
|
| 38 |
-
"Use of electricity","Use of electricity","Use of electricity"
|
| 39 |
-
],
|
| 40 |
-
"Cat2EN": [
|
| 41 |
-
"Sporting goods","Buildings","Office supplies","Water consumption",
|
| 42 |
-
"Household appliances","Electrical equipment","Machinery and equipment","Furniture",
|
| 43 |
-
"Textiles and clothing","Vehicles","Construction materials","Organic materials",
|
| 44 |
-
"Paper and cardboard","Plastics and rubber","Chemicals","Refrigerants and others",
|
| 45 |
-
"Equipment rental","Building rental","Furniture rental","Vehicle rental and maintenance",
|
| 46 |
-
"Information and cultural services","Catering services","Health services","Specialized craft services",
|
| 47 |
-
"Administrative / consulting services","Cleaning services","IT services","Logistics services",
|
| 48 |
-
"Marketing / advertising services","Technical services","Alcoholic beverages","Non-alcoholic beverages",
|
| 49 |
-
"Condiments","Desserts","Fruits and vegetables","Fats and oils","Prepared / cooked meals",
|
| 50 |
-
"Animal products","Cereal products","Dairy products","Heat and steam","Air conditioning and refrigeration",
|
| 51 |
-
"Fossil fuels","Mobile fossil fuels","Organic fuels","Gaseous fossil fuels","Liquid fossil fuels",
|
| 52 |
-
"Solid fossil fuels","Air transport","Ship transport","Truck transport","Combined transport",
|
| 53 |
-
"Train transport","Air transport","Coach / Urban bus","Ship transport","Combined transport",
|
| 54 |
-
"E-Bike","Accommodation / Events","Soft mobility","Motorcycle / Scooter","Train transport",
|
| 55 |
-
"Public transport","Car","Agriculture","Global warming potential","Industrial processes",
|
| 56 |
-
"Commercial and industrial","Wastewater","Electrical equipment","Households and similar",
|
| 57 |
-
"Metal","Organic materials","Paper and cardboard","Batteries and accumulators","Plastics",
|
| 58 |
-
"Fugitive process emissions","Textiles","Glass","Electricity for electric vehicles","Renewables","Standard"
|
| 59 |
-
],
|
| 60 |
-
"DescriptionCat2EN": [
|
| 61 |
-
"Goods purchase - sports","Goods purchase - buildings","Goods purchase - office items","Goods purchase - water",
|
| 62 |
-
"Goods purchase - appliances","Goods purchase - electricals","Goods purchase - machinery","Goods purchase - furniture",
|
| 63 |
-
"Goods purchase - textiles","Goods purchase - vehicles","Material purchase - construction","Material purchase - organic",
|
| 64 |
-
"Material purchase - paper","Material purchase - plastics","Material purchase - chemicals","Material purchase - refrigerants",
|
| 65 |
-
"Service - equipment rental","Service - building rental","Service - furniture rental","Service - vehicles",
|
| 66 |
-
"Service - info/culture","Service - catering","Service - healthcare","Service - crafts",
|
| 67 |
-
"Service - admin/consulting","Service - cleaning","Service - IT","Service - logistics",
|
| 68 |
-
"Service - marketing","Service - technical","Beverages - alcoholic","Beverages - non-alcoholic",
|
| 69 |
-
"Food condiments","Food desserts","Food fruits & vegetables","Food fats & oils","Prepared meals",
|
| 70 |
-
"Animal-based food","Cereal-based food","Dairy products","Heating - heat & steam","Heating - cooling/refrigeration",
|
| 71 |
-
"Fuel - fossil","Fuel - mobile fossil","Fuel - organic","Fuel - gaseous","Fuel - liquid","Fuel - solid",
|
| 72 |
-
"Freight transport - air","Freight transport - ship","Freight transport - truck","Freight transport - combined",
|
| 73 |
-
"Freight transport - train","Passenger transport - air","Passenger transport - bus","Passenger transport - ship",
|
| 74 |
-
"Passenger transport - combined","Passenger transport - e-bike","Passenger transport - accommodation/events",
|
| 75 |
-
"Passenger transport - soft mobility","Passenger transport - scooter/motorbike","Passenger transport - train",
|
| 76 |
-
"Passenger transport - public","Passenger transport - car","Emissions - agriculture","Emissions - warming potential",
|
| 77 |
-
"Emissions - industry","Waste - commercial/industrial","Waste - wastewater","Waste - electricals",
|
| 78 |
-
"Waste - households","Waste - metals","Waste - organics","Waste - paper","Waste - batteries",
|
| 79 |
-
"Waste - plastics","Waste - fugitive","Waste - textiles","Waste - glass",
|
| 80 |
-
"Electricity - EVs","Electricity - renewables","Electricity - standard"
|
| 81 |
-
]
|
| 82 |
-
})
|
| 83 |
-
|
| 84 |
-
# Precompute embeddings
|
| 85 |
ref_data["combined"] = ref_data[["Cat1EN", "Cat2EN", "DescriptionCat2EN"]].agg(" ".join, axis=1)
|
| 86 |
ref_embeddings = model.encode(ref_data["combined"].tolist())
|
| 87 |
|
| 88 |
-
#
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
})
|
| 112 |
-
|
| 113 |
-
results_df = pd.DataFrame(results)
|
| 114 |
-
st.success("✅ Mapping completed!")
|
| 115 |
-
st.dataframe(results_df)
|
| 116 |
-
|
| 117 |
-
# Option to download
|
| 118 |
-
csv = results_df.to_csv(index=False).encode("utf-8")
|
| 119 |
-
st.download_button(
|
| 120 |
-
label="📥 Download results as CSV",
|
| 121 |
-
data=csv,
|
| 122 |
-
file_name="mapped_transactions.csv",
|
| 123 |
-
mime="text/csv"
|
| 124 |
-
)
|
|
|
|
| 1 |
+
# app.py
|
| 2 |
+
from fastapi import FastAPI
|
| 3 |
+
from pydantic import BaseModel
|
| 4 |
+
from typing import List
|
| 5 |
import pandas as pd
|
| 6 |
from setfit import SetFitModel
|
| 7 |
from sentence_transformers import util
|
| 8 |
import torch
|
| 9 |
|
| 10 |
+
app = FastAPI()
|
| 11 |
+
|
| 12 |
+
# Load your trained model once at startup
|
| 13 |
+
model = SetFitModel.from_pretrained(
|
| 14 |
+
"HEN10/setfit-particular-transaction-solon-embeddings-labels-large-kaggle-automatisation-v1"
|
| 15 |
+
)
|
| 16 |
+
|
| 17 |
+
# Dummy reference categories (replace with your real categories or load CSV)
|
| 18 |
+
ref_data = pd.DataFrame({ "Cat1EN": [ "Purchase of goods","Purchase of goods","Purchase of goods","Purchase of goods", "Purchase of goods","Purchase of goods","Purchase of goods","Purchase of goods", "Purchase of goods","Purchase of goods","Purchase of materials","Purchase of materials", "Purchase of materials","Purchase of materials","Purchase of materials","Purchase of materials", "Purchase of services","Purchase of services","Purchase of services","Purchase of services", "Purchase of services","Purchase of services","Purchase of services","Purchase of services", "Purchase of services","Purchase of services","Purchase of services","Purchase of services", "Purchase of services","Purchase of services","Food & beverages","Food & beverages", "Food & beverages","Food & beverages","Food & beverages","Food & beverages", "Food & beverages","Food & beverages","Food & beverages","Food & beverages", "Heating and air conditioning","Heating and air conditioning","Fuels","Fuels","Fuels","Fuels", "Fuels","Fuels", "Mobility (freight)","Mobility (freight)","Mobility (freight)","Mobility (freight)", "Mobility (freight)", "Mobility (passengers)","Mobility (passengers)","Mobility (passengers)", "Mobility (passengers)","Mobility (passengers)","Mobility (passengers)","Mobility (passengers)", "Mobility (passengers)","Mobility (passengers)","Mobility (passengers)","Mobility (passengers)", "Process and fugitive emissions","Process and fugitive emissions", "Process and fugitive emissions", "Waste treatment","Waste treatment","Waste treatment", "Waste treatment","Waste treatment","Waste treatment","Waste treatment","Waste treatment", "Waste treatment","Waste treatment","Waste treatment","Waste treatment", "Use of electricity","Use of electricity","Use of electricity" ],
|
| 19 |
+
|
| 20 |
+
"Cat2EN": [ "Sporting goods","Buildings","Office supplies","Water consumption", "Household appliances","Electrical equipment","Machinery and equipment","Furniture", "Textiles and clothing","Vehicles","Construction materials","Organic materials", "Paper and cardboard","Plastics and rubber","Chemicals","Refrigerants and others", "Equipment rental","Building rental","Furniture rental","Vehicle rental and maintenance", "Information and cultural services","Catering services","Health services","Specialized craft services", "Administrative / consulting services","Cleaning services","IT services","Logistics services", "Marketing / advertising services","Technical services","Alcoholic beverages","Non-alcoholic beverages", "Condiments","Desserts","Fruits and vegetables","Fats and oils","Prepared / cooked meals", "Animal products","Cereal products","Dairy products","Heat and steam","Air conditioning and refrigeration", "Fossil fuels","Mobile fossil fuels","Organic fuels","Gaseous fossil fuels","Liquid fossil fuels", "Solid fossil fuels", "Air transport","Ship transport","Truck transport","Combined transport", "Train transport", "Air transport","Coach / Urban bus","Ship transport","Combined transport", "E-Bike","Accommodation / Events","Soft mobility","Motorcycle / Scooter","Train transport", "Public transport","Car", "Agriculture","Global warming potential","Industrial processes", "Commercial and industrial","Wastewater","Electrical equipment","Households and similar", "Metal","Organic materials","Paper and cardboard","Batteries and accumulators","Plastics", "Fugitive process emissions","Textiles","Glass", "Electricity for electric vehicles","Renewables","Standard" ],
|
| 21 |
+
|
| 22 |
+
"DescriptionCat2EN": [ "Goods purchase - sports","Goods purchase - buildings","Goods purchase - office items","Goods purchase - water", "Goods purchase - appliances","Goods purchase - electricals","Goods purchase - machinery","Goods purchase - furniture", "Goods purchase - textiles","Goods purchase - vehicles","Material purchase - construction","Material purchase - organic", "Material purchase - paper","Material purchase - plastics","Material purchase - chemicals","Material purchase - refrigerants", "Service - equipment rental","Service - building rental","Service - furniture rental","Service - vehicles", "Service - info/culture","Service - catering","Service - healthcare","Service - crafts", "Service - admin/consulting","Service - cleaning","Service - IT","Service - logistics", "Service - marketing","Service - technical","Beverages - alcoholic","Beverages - non-alcoholic", "Food condiments","Food desserts","Food fruits & vegetables","Food fats & oils","Prepared meals", "Animal-based food","Cereal-based food","Dairy products","Heating - heat & steam","Heating - cooling/refrigeration", "Fuel - fossil","Fuel - mobile fossil","Fuel - organic","Fuel - gaseous","Fuel - liquid","Fuel - solid", "Freight transport - air","Freight transport - ship","Freight transport - truck","Freight transport - combined", "Freight transport - train", "Passenger transport - air","Passenger transport - bus","Passenger transport - ship", "Passenger transport - combined","Passenger transport - e-bike","Passenger transport - accommodation/events", "Passenger transport - soft mobility","Passenger transport - scooter/motorbike","Passenger transport - train", "Passenger transport - public","Passenger transport - car", "Emissions - agriculture","Emissions - warming potential", "Emissions - industry", "Waste - commercial/industrial","Waste - wastewater","Waste - electricals", "Waste - households","Waste - metals","Waste - organics","Waste - paper","Waste - batteries", "Waste - plastics","Waste - fugitive","Waste - textiles","Waste - glass", "Electricity - EVs","Electricity - renewables","Electricity - standard" ]
|
| 23 |
+
|
| 24 |
+
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
ref_data["combined"] = ref_data[["Cat1EN", "Cat2EN", "DescriptionCat2EN"]].agg(" ".join, axis=1)
|
| 26 |
ref_embeddings = model.encode(ref_data["combined"].tolist())
|
| 27 |
|
| 28 |
+
# ✅ Root endpoint so Hugging Face doesn’t show "Not Found"
|
| 29 |
+
@app.get("/")
|
| 30 |
+
def read_root():
|
| 31 |
+
return {"status": "ok", "message": "Category mapping API is running. Use POST /map_categories"}
|
| 32 |
+
|
| 33 |
+
# ✅ Define request schema
|
| 34 |
+
class TransactionsRequest(BaseModel):
|
| 35 |
+
transactions: List[str]
|
| 36 |
+
|
| 37 |
+
@app.post("/map_categories")
|
| 38 |
+
def map_categories(request: TransactionsRequest):
|
| 39 |
+
results = []
|
| 40 |
+
for text in request.transactions:
|
| 41 |
+
trans_emb = model.encode([text])[0]
|
| 42 |
+
scores = util.pytorch_cos_sim(torch.tensor(trans_emb), torch.tensor(ref_embeddings)).flatten()
|
| 43 |
+
best_idx = scores.argmax().item()
|
| 44 |
+
results.append({
|
| 45 |
+
"input_text": text,
|
| 46 |
+
"best_Cat1": ref_data.iloc[best_idx]["Cat1EN"],
|
| 47 |
+
"best_Cat2": ref_data.iloc[best_idx]["Cat2EN"],
|
| 48 |
+
"similarity": float(scores[best_idx])
|
| 49 |
+
})
|
| 50 |
+
return {"matches": results}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|