yassine123Z commited on
Commit
7e6741b
·
verified ·
1 Parent(s): 0181d6a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -116
app.py CHANGED
@@ -1,124 +1,50 @@
1
- import streamlit as st
 
 
 
2
  import pandas as pd
3
  from setfit import SetFitModel
4
  from sentence_transformers import util
5
  import torch
6
 
7
- # Load model once
8
- @st.cache_resource
9
- def load_model():
10
- return SetFitModel.from_pretrained(
11
- "HEN10/setfit-particular-transaction-solon-embeddings-labels-large-kaggle-automatisation-v1"
12
- )
13
-
14
- model = load_model()
15
-
16
- # Load reference categories
17
- ref_data = pd.DataFrame({
18
- "Cat1EN": [
19
- "Purchase of goods","Purchase of goods","Purchase of goods","Purchase of goods",
20
- "Purchase of goods","Purchase of goods","Purchase of goods","Purchase of goods",
21
- "Purchase of goods","Purchase of goods","Purchase of materials","Purchase of materials",
22
- "Purchase of materials","Purchase of materials","Purchase of materials","Purchase of materials",
23
- "Purchase of services","Purchase of services","Purchase of services","Purchase of services",
24
- "Purchase of services","Purchase of services","Purchase of services","Purchase of services",
25
- "Purchase of services","Purchase of services","Purchase of services","Purchase of services",
26
- "Purchase of services","Purchase of services","Food & beverages","Food & beverages",
27
- "Food & beverages","Food & beverages","Food & beverages","Food & beverages",
28
- "Food & beverages","Food & beverages","Food & beverages","Food & beverages",
29
- "Heating and air conditioning","Heating and air conditioning","Fuels","Fuels","Fuels","Fuels",
30
- "Fuels","Fuels","Mobility (freight)","Mobility (freight)","Mobility (freight)","Mobility (freight)",
31
- "Mobility (freight)","Mobility (passengers)","Mobility (passengers)","Mobility (passengers)",
32
- "Mobility (passengers)","Mobility (passengers)","Mobility (passengers)","Mobility (passengers)",
33
- "Mobility (passengers)","Mobility (passengers)","Mobility (passengers)","Mobility (passengers)",
34
- "Mobility (passengers)","Process and fugitive emissions","Process and fugitive emissions",
35
- "Process and fugitive emissions","Waste treatment","Waste treatment","Waste treatment",
36
- "Waste treatment","Waste treatment","Waste treatment","Waste treatment","Waste treatment",
37
- "Waste treatment","Waste treatment","Waste treatment","Waste treatment","Waste treatment",
38
- "Use of electricity","Use of electricity","Use of electricity"
39
- ],
40
- "Cat2EN": [
41
- "Sporting goods","Buildings","Office supplies","Water consumption",
42
- "Household appliances","Electrical equipment","Machinery and equipment","Furniture",
43
- "Textiles and clothing","Vehicles","Construction materials","Organic materials",
44
- "Paper and cardboard","Plastics and rubber","Chemicals","Refrigerants and others",
45
- "Equipment rental","Building rental","Furniture rental","Vehicle rental and maintenance",
46
- "Information and cultural services","Catering services","Health services","Specialized craft services",
47
- "Administrative / consulting services","Cleaning services","IT services","Logistics services",
48
- "Marketing / advertising services","Technical services","Alcoholic beverages","Non-alcoholic beverages",
49
- "Condiments","Desserts","Fruits and vegetables","Fats and oils","Prepared / cooked meals",
50
- "Animal products","Cereal products","Dairy products","Heat and steam","Air conditioning and refrigeration",
51
- "Fossil fuels","Mobile fossil fuels","Organic fuels","Gaseous fossil fuels","Liquid fossil fuels",
52
- "Solid fossil fuels","Air transport","Ship transport","Truck transport","Combined transport",
53
- "Train transport","Air transport","Coach / Urban bus","Ship transport","Combined transport",
54
- "E-Bike","Accommodation / Events","Soft mobility","Motorcycle / Scooter","Train transport",
55
- "Public transport","Car","Agriculture","Global warming potential","Industrial processes",
56
- "Commercial and industrial","Wastewater","Electrical equipment","Households and similar",
57
- "Metal","Organic materials","Paper and cardboard","Batteries and accumulators","Plastics",
58
- "Fugitive process emissions","Textiles","Glass","Electricity for electric vehicles","Renewables","Standard"
59
- ],
60
- "DescriptionCat2EN": [
61
- "Goods purchase - sports","Goods purchase - buildings","Goods purchase - office items","Goods purchase - water",
62
- "Goods purchase - appliances","Goods purchase - electricals","Goods purchase - machinery","Goods purchase - furniture",
63
- "Goods purchase - textiles","Goods purchase - vehicles","Material purchase - construction","Material purchase - organic",
64
- "Material purchase - paper","Material purchase - plastics","Material purchase - chemicals","Material purchase - refrigerants",
65
- "Service - equipment rental","Service - building rental","Service - furniture rental","Service - vehicles",
66
- "Service - info/culture","Service - catering","Service - healthcare","Service - crafts",
67
- "Service - admin/consulting","Service - cleaning","Service - IT","Service - logistics",
68
- "Service - marketing","Service - technical","Beverages - alcoholic","Beverages - non-alcoholic",
69
- "Food condiments","Food desserts","Food fruits & vegetables","Food fats & oils","Prepared meals",
70
- "Animal-based food","Cereal-based food","Dairy products","Heating - heat & steam","Heating - cooling/refrigeration",
71
- "Fuel - fossil","Fuel - mobile fossil","Fuel - organic","Fuel - gaseous","Fuel - liquid","Fuel - solid",
72
- "Freight transport - air","Freight transport - ship","Freight transport - truck","Freight transport - combined",
73
- "Freight transport - train","Passenger transport - air","Passenger transport - bus","Passenger transport - ship",
74
- "Passenger transport - combined","Passenger transport - e-bike","Passenger transport - accommodation/events",
75
- "Passenger transport - soft mobility","Passenger transport - scooter/motorbike","Passenger transport - train",
76
- "Passenger transport - public","Passenger transport - car","Emissions - agriculture","Emissions - warming potential",
77
- "Emissions - industry","Waste - commercial/industrial","Waste - wastewater","Waste - electricals",
78
- "Waste - households","Waste - metals","Waste - organics","Waste - paper","Waste - batteries",
79
- "Waste - plastics","Waste - fugitive","Waste - textiles","Waste - glass",
80
- "Electricity - EVs","Electricity - renewables","Electricity - standard"
81
- ]
82
- })
83
-
84
- # Precompute embeddings
85
  ref_data["combined"] = ref_data[["Cat1EN", "Cat2EN", "DescriptionCat2EN"]].agg(" ".join, axis=1)
86
  ref_embeddings = model.encode(ref_data["combined"].tolist())
87
 
88
- # Streamlit UI
89
- st.title("📊 Transaction Category Mapper")
90
- st.write("Upload a CSV file with a column of transactions, and the app will map them to categories.")
91
-
92
- uploaded_file = st.file_uploader("Upload your CSV file", type=["csv"])
93
-
94
- if uploaded_file:
95
- df = pd.read_csv(uploaded_file)
96
-
97
- # Let user choose which column to map
98
- col_to_use = st.selectbox("Select the column containing transactions:", df.columns)
99
-
100
- if st.button("Run Mapping"):
101
- results = []
102
- for text in df[col_to_use].dropna().tolist():
103
- trans_emb = model.encode([text])[0]
104
- scores = util.pytorch_cos_sim(torch.tensor(trans_emb), torch.tensor(ref_embeddings)).flatten()
105
- best_idx = scores.argmax().item()
106
- results.append({
107
- "input_text": text,
108
- "best_Cat1": ref_data.iloc[best_idx]["Cat1EN"],
109
- "best_Cat2": ref_data.iloc[best_idx]["Cat2EN"],
110
- "similarity": float(scores[best_idx])
111
- })
112
-
113
- results_df = pd.DataFrame(results)
114
- st.success("✅ Mapping completed!")
115
- st.dataframe(results_df)
116
-
117
- # Option to download
118
- csv = results_df.to_csv(index=False).encode("utf-8")
119
- st.download_button(
120
- label="📥 Download results as CSV",
121
- data=csv,
122
- file_name="mapped_transactions.csv",
123
- mime="text/csv"
124
- )
 
1
+ # app.py
2
+ from fastapi import FastAPI
3
+ from pydantic import BaseModel
4
+ from typing import List
5
  import pandas as pd
6
  from setfit import SetFitModel
7
  from sentence_transformers import util
8
  import torch
9
 
10
+ app = FastAPI()
11
+
12
+ # Load your trained model once at startup
13
+ model = SetFitModel.from_pretrained(
14
+ "HEN10/setfit-particular-transaction-solon-embeddings-labels-large-kaggle-automatisation-v1"
15
+ )
16
+
17
+ # Dummy reference categories (replace with your real categories or load CSV)
18
+ ref_data = pd.DataFrame({ "Cat1EN": [ "Purchase of goods","Purchase of goods","Purchase of goods","Purchase of goods", "Purchase of goods","Purchase of goods","Purchase of goods","Purchase of goods", "Purchase of goods","Purchase of goods","Purchase of materials","Purchase of materials", "Purchase of materials","Purchase of materials","Purchase of materials","Purchase of materials", "Purchase of services","Purchase of services","Purchase of services","Purchase of services", "Purchase of services","Purchase of services","Purchase of services","Purchase of services", "Purchase of services","Purchase of services","Purchase of services","Purchase of services", "Purchase of services","Purchase of services","Food & beverages","Food & beverages", "Food & beverages","Food & beverages","Food & beverages","Food & beverages", "Food & beverages","Food & beverages","Food & beverages","Food & beverages", "Heating and air conditioning","Heating and air conditioning","Fuels","Fuels","Fuels","Fuels", "Fuels","Fuels", "Mobility (freight)","Mobility (freight)","Mobility (freight)","Mobility (freight)", "Mobility (freight)", "Mobility (passengers)","Mobility (passengers)","Mobility (passengers)", "Mobility (passengers)","Mobility (passengers)","Mobility (passengers)","Mobility (passengers)", "Mobility (passengers)","Mobility (passengers)","Mobility (passengers)","Mobility (passengers)", "Process and fugitive emissions","Process and fugitive emissions", "Process and fugitive emissions", "Waste treatment","Waste treatment","Waste treatment", "Waste treatment","Waste treatment","Waste treatment","Waste treatment","Waste treatment", "Waste treatment","Waste treatment","Waste treatment","Waste treatment", "Use of electricity","Use of electricity","Use of electricity" ],
19
+
20
+ "Cat2EN": [ "Sporting goods","Buildings","Office supplies","Water consumption", "Household appliances","Electrical equipment","Machinery and equipment","Furniture", "Textiles and clothing","Vehicles","Construction materials","Organic materials", "Paper and cardboard","Plastics and rubber","Chemicals","Refrigerants and others", "Equipment rental","Building rental","Furniture rental","Vehicle rental and maintenance", "Information and cultural services","Catering services","Health services","Specialized craft services", "Administrative / consulting services","Cleaning services","IT services","Logistics services", "Marketing / advertising services","Technical services","Alcoholic beverages","Non-alcoholic beverages", "Condiments","Desserts","Fruits and vegetables","Fats and oils","Prepared / cooked meals", "Animal products","Cereal products","Dairy products","Heat and steam","Air conditioning and refrigeration", "Fossil fuels","Mobile fossil fuels","Organic fuels","Gaseous fossil fuels","Liquid fossil fuels", "Solid fossil fuels", "Air transport","Ship transport","Truck transport","Combined transport", "Train transport", "Air transport","Coach / Urban bus","Ship transport","Combined transport", "E-Bike","Accommodation / Events","Soft mobility","Motorcycle / Scooter","Train transport", "Public transport","Car", "Agriculture","Global warming potential","Industrial processes", "Commercial and industrial","Wastewater","Electrical equipment","Households and similar", "Metal","Organic materials","Paper and cardboard","Batteries and accumulators","Plastics", "Fugitive process emissions","Textiles","Glass", "Electricity for electric vehicles","Renewables","Standard" ],
21
+
22
+ "DescriptionCat2EN": [ "Goods purchase - sports","Goods purchase - buildings","Goods purchase - office items","Goods purchase - water", "Goods purchase - appliances","Goods purchase - electricals","Goods purchase - machinery","Goods purchase - furniture", "Goods purchase - textiles","Goods purchase - vehicles","Material purchase - construction","Material purchase - organic", "Material purchase - paper","Material purchase - plastics","Material purchase - chemicals","Material purchase - refrigerants", "Service - equipment rental","Service - building rental","Service - furniture rental","Service - vehicles", "Service - info/culture","Service - catering","Service - healthcare","Service - crafts", "Service - admin/consulting","Service - cleaning","Service - IT","Service - logistics", "Service - marketing","Service - technical","Beverages - alcoholic","Beverages - non-alcoholic", "Food condiments","Food desserts","Food fruits & vegetables","Food fats & oils","Prepared meals", "Animal-based food","Cereal-based food","Dairy products","Heating - heat & steam","Heating - cooling/refrigeration", "Fuel - fossil","Fuel - mobile fossil","Fuel - organic","Fuel - gaseous","Fuel - liquid","Fuel - solid", "Freight transport - air","Freight transport - ship","Freight transport - truck","Freight transport - combined", "Freight transport - train", "Passenger transport - air","Passenger transport - bus","Passenger transport - ship", "Passenger transport - combined","Passenger transport - e-bike","Passenger transport - accommodation/events", "Passenger transport - soft mobility","Passenger transport - scooter/motorbike","Passenger transport - train", "Passenger transport - public","Passenger transport - car", "Emissions - agriculture","Emissions - warming potential", "Emissions - industry", "Waste - commercial/industrial","Waste - wastewater","Waste - electricals", "Waste - households","Waste - metals","Waste - organics","Waste - paper","Waste - batteries", "Waste - plastics","Waste - fugitive","Waste - textiles","Waste - glass", "Electricity - EVs","Electricity - renewables","Electricity - standard" ]
23
+
24
+ })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  ref_data["combined"] = ref_data[["Cat1EN", "Cat2EN", "DescriptionCat2EN"]].agg(" ".join, axis=1)
26
  ref_embeddings = model.encode(ref_data["combined"].tolist())
27
 
28
+ # Root endpoint so Hugging Face doesn’t show "Not Found"
29
+ @app.get("/")
30
+ def read_root():
31
+ return {"status": "ok", "message": "Category mapping API is running. Use POST /map_categories"}
32
+
33
+ # ✅ Define request schema
34
+ class TransactionsRequest(BaseModel):
35
+ transactions: List[str]
36
+
37
+ @app.post("/map_categories")
38
+ def map_categories(request: TransactionsRequest):
39
+ results = []
40
+ for text in request.transactions:
41
+ trans_emb = model.encode([text])[0]
42
+ scores = util.pytorch_cos_sim(torch.tensor(trans_emb), torch.tensor(ref_embeddings)).flatten()
43
+ best_idx = scores.argmax().item()
44
+ results.append({
45
+ "input_text": text,
46
+ "best_Cat1": ref_data.iloc[best_idx]["Cat1EN"],
47
+ "best_Cat2": ref_data.iloc[best_idx]["Cat2EN"],
48
+ "similarity": float(scores[best_idx])
49
+ })
50
+ return {"matches": results}