EmissionFactor-fastAPI

Sleeping

App Files Files Community

yassine123Z commited on Oct 22, 2025

Commit

6331c93

verified ·

1 Parent(s): f9eaa32

Update app.py

Browse files

Files changed (1) hide show

app.py +74 -58

app.py CHANGED Viewed

@@ -7,6 +7,8 @@ from setfit import SetFitModel
 from sentence_transformers import util
 import torch
 import gradio as gr
 # ==================================================
 # 🚀 Initialize FastAPI
@@ -23,15 +25,18 @@ model = SetFitModel.from_pretrained(
 # ==================================================
 # 📘 Load Reference Categories
 # ==================================================
-ref_data = pd.DataFrame({ "Cat1EN": [ "Purchase of goods","Purchase of goods","Purchase of goods","Purchase of goods", "Purchase of goods","Purchase of goods","Purchase of goods","Purchase of goods", "Purchase of goods","Purchase of goods","Purchase of materials","Purchase of materials", "Purchase of materials","Purchase of materials","Purchase of materials","Purchase of materials", "Purchase of services","Purchase of services","Purchase of services","Purchase of services", "Purchase of services","Purchase of services","Purchase of services","Purchase of services", "Purchase of services","Purchase of services","Purchase of services","Purchase of services", "Purchase of services","Purchase of services","Food & beverages","Food & beverages", "Food & beverages","Food & beverages","Food & beverages","Food & beverages", "Food & beverages","Food & beverages","Food & beverages","Food & beverages", "Heating and air conditioning","Heating and air conditioning","Fuels","Fuels","Fuels","Fuels", "Fuels","Fuels", "Mobility (freight)","Mobility (freight)","Mobility (freight)","Mobility (freight)", "Mobility (freight)", "Mobility (passengers)","Mobility (passengers)","Mobility (passengers)", "Mobility (passengers)","Mobility (passengers)","Mobility (passengers)","Mobility (passengers)", "Mobility (passengers)","Mobility (passengers)","Mobility (passengers)","Mobility (passengers)", "Process and fugitive emissions","Process and fugitive emissions", "Process and fugitive emissions", "Waste treatment","Waste treatment","Waste treatment", "Waste treatment","Waste treatment","Waste treatment","Waste treatment","Waste treatment", "Waste treatment","Waste treatment","Waste treatment","Waste treatment", "Use of electricity","Use of electricity","Use of electricity" ],
-                         "Cat2EN": [ "Sporting goods","Buildings","Office supplies","Water consumption", "Household appliances","Electrical equipment","Machinery and equipment","Furniture", "Textiles and clothing","Vehicles","Construction materials","Organic materials", "Paper and cardboard","Plastics and rubber","Chemicals","Refrigerants and others", "Equipment rental","Building rental","Furniture rental","Vehicle rental and maintenance", "Information and cultural services","Catering services","Health services","Specialized craft services", "Administrative / consulting services","Cleaning services","IT services","Logistics services", "Marketing / advertising services","Technical services","Alcoholic beverages","Non-alcoholic beverages", "Condiments","Desserts","Fruits and vegetables","Fats and oils","Prepared / cooked meals", "Animal products","Cereal products","Dairy products","Heat and steam","Air conditioning and refrigeration", "Fossil fuels","Mobile fossil fuels","Organic fuels","Gaseous fossil fuels","Liquid fossil fuels", "Solid fossil fuels", "Air transport","Ship transport","Truck transport","Combined transport", "Train transport", "Air transport","Coach / Urban bus","Ship transport","Combined transport", "E-Bike","Accommodation / Events","Soft mobility","Motorcycle / Scooter","Train transport", "Public transport","Car", "Agriculture","Global warming potential","Industrial processes", "Commercial and industrial","Wastewater","Electrical equipment","Households and similar", "Metal","Organic materials","Paper and cardboard","Batteries and accumulators","Plastics", "Fugitive process emissions","Textiles","Glass", "Electricity for electric vehicles","Renewables","Standard" ],
-                         "DescriptionCat2EN": [ "Goods purchase - sports","Goods purchase - buildings","Goods purchase - office items","Goods purchase - water", "Goods purchase - appliances","Goods purchase - electricals","Goods purchase - machinery","Goods purchase - furniture", "Goods purchase - textiles","Goods purchase - vehicles","Material purchase - construction","Material purchase - organic", "Material purchase - paper","Material purchase - plastics","Material purchase - chemicals","Material purchase - refrigerants", "Service - equipment rental","Service - building rental","Service - furniture rental","Service - vehicles", "Service - info/culture","Service - catering","Service - healthcare","Service - crafts", "Service - admin/consulting","Service - cleaning","Service - IT","Service - logistics", "Service - marketing","Service - technical","Beverages - alcoholic","Beverages - non-alcoholic", "Food condiments","Food desserts","Food fruits & vegetables","Food fats & oils","Prepared meals", "Animal-based food","Cereal-based food","Dairy products","Heating - heat & steam","Heating - cooling/refrigeration", "Fuel - fossil","Fuel - mobile fossil","Fuel - organic","Fuel - gaseous","Fuel - liquid","Fuel - solid", "Freight transport - air","Freight transport - ship","Freight transport - truck","Freight transport - combined", "Freight transport - train", "Passenger transport - air","Passenger transport - bus","Passenger transport - ship", "Passenger transport - combined","Passenger transport - e-bike","Passenger transport - accommodation/events", "Passenger transport - soft mobility","Passenger transport - scooter/motorbike","Passenger transport - train", "Passenger transport - public","Passenger transport - car", "Emissions - agriculture","Emissions - warming potential", "Emissions - industry", "Waste - commercial/industrial","Waste - wastewater","Waste - electricals", "Waste - households","Waste - metals","Waste - organics","Waste - paper","Waste - batteries", "Waste - plastics","Waste - fugitive","Waste - textiles","Waste - glass", "Electricity - EVs","Electricity - renewables","Electricity - standard" ]
-                        })
-# combine all category info into a single string for embeddings
 ref_data["combined"] = ref_data[["Cat1EN", "Cat2EN", "DescriptionCat2EN"]].agg(" ".join, axis=1)
 ref_embeddings = model.encode(ref_data["combined"].tolist())
@@ -48,25 +53,68 @@ def classify_transaction(text):
     return cat1, cat2, score
 # ==================================================
-# 🖥️ Gradio Interface
 # ==================================================
-gradio_ui = gr.Interface(
-    fn=classify_transaction,
-    inputs=gr.Textbox(lines=3, label="Transaction Description", placeholder="Enter a transaction text..."),
-    outputs=[
-        gr.Label(label="Predicted Category 1"),
-        gr.Label(label="Predicted Category 2"),
-        gr.Number(label="Similarity Score")
-    ],
-    title="Transaction Category Classifier",
-    description="Enter a transaction description and get the best-matching category using SetFit embeddings.",
-)
 # Mount Gradio inside FastAPI at /ui
 app = gr.mount_gradio_app(app, gradio_ui, path="/ui")
 # ==================================================
-# 🧾 API Endpoints
 # ==================================================
 class TransactionsRequest(BaseModel):
     transactions: List[str]
@@ -79,43 +127,11 @@ def read_root():
 def map_categories(request: TransactionsRequest):
     results = []
     for text in request.transactions:
-        trans_emb = model.encode([text])[0]
-        scores = util.pytorch_cos_sim(torch.tensor(trans_emb), torch.tensor(ref_embeddings)).flatten()
-        best_idx = scores.argmax().item()
         results.append({
             "input_text": text,
-            "best_Cat1": ref_data.iloc[best_idx]["Cat1EN"],
-            "best_Cat2": ref_data.iloc[best_idx]["Cat2EN"],
-            "similarity": float(scores[best_idx])
         })
     return {"matches": results}
-feedback_data = "feedback.csv"
-@app.post("/feedback/")
-def submit_feedback(text: str, predicted_label: str, correct_label: str):
-    df = pd.DataFrame([[text, predicted_label, correct_label]],
-                      columns=["text", "predicted_label", "correct_label"])
-    df.to_csv(feedback_data, mode='a', header=False, index=False)
-    return {"message": "Feedback saved successfully"}
-@app.post("/map_categories_csv/")
-async def map_categories_csv(file: UploadFile = File(...)):
-    df = pd.read_csv(file.file)
-    results = []
-    for text in df['transaction']:
-        trans_emb = model.encode([text])[0]
-        scores = util.pytorch_cos_sim(torch.tensor(trans_emb), torch.tensor(ref_embeddings)).flatten()
-        best_idx = scores.argmax().item()
-        results.append({
-            "input_text": text,
-            "best_Cat1": ref_data.iloc[best_idx]["Cat1EN"],
-            "best_Cat2": ref_data.iloc[best_idx]["Cat2EN"],
-            "similarity": float(scores[best_idx])
-        })
-    result_df = pd.DataFrame(results)
-    output_file = "results.csv"
-    result_df.to_csv(output_file, index=False)
-    return FileResponse(output_file, media_type='text/csv', filename="matched_results.csv")

 from sentence_transformers import util
 import torch
 import gradio as gr
+import tempfile
+import os
 # ==================================================
 # 🚀 Initialize FastAPI
 # ==================================================
 # 📘 Load Reference Categories
 # ==================================================
+ref_data = pd.DataFrame({
+    "Cat1EN": ["Purchase of goods", "Mobility (passengers)", "Waste treatment", "Use of electricity"],
+    "Cat2EN": ["Office supplies", "Air transport", "Wastewater", "Renewables"],
+    "DescriptionCat2EN": [
+        "Goods purchase - office items",
+        "Passenger transport - air",
+        "Waste - wastewater",
+        "Electricity - renewables"
+    ]
+})
+# Combine all category info into a single string for embeddings
 ref_data["combined"] = ref_data[["Cat1EN", "Cat2EN", "DescriptionCat2EN"]].agg(" ".join, axis=1)
 ref_embeddings = model.encode(ref_data["combined"].tolist())
     return cat1, cat2, score
 # ==================================================
+# 📂 CSV Mapping Function
 # ==================================================
+def map_csv(file):
+    df = pd.read_csv(file.name)
+    if "transaction" not in df.columns:
+        return "Error: Missing column 'transaction'. Please include it in your CSV.", None
+    results = []
+    for text in df["transaction"]:
+        trans_emb = model.encode([text])[0]
+        scores = util.pytorch_cos_sim(torch.tensor(trans_emb), torch.tensor(ref_embeddings)).flatten()
+        best_idx = scores.argmax().item()
+        results.append({
+            "transaction": text,
+            "Predicted Category 1": ref_data.iloc[best_idx]["Cat1EN"],
+            "Predicted Category 2": ref_data.iloc[best_idx]["Cat2EN"],
+            "Similarity Score": float(scores[best_idx])
+        })
+    result_df = pd.DataFrame(results)
+    # Save to temporary file for download
+    tmp_dir = tempfile.mkdtemp()
+    output_path = os.path.join(tmp_dir, "matched_results.csv")
+    result_df.to_csv(output_path, index=False)
+    return result_df, output_path
+# ==================================================
+# 🖥️ Gradio Interface with Upload + Download
+# ==================================================
+with gr.Blocks(title="Transaction Category Classifier") as gradio_ui:
+    gr.Markdown("## 🧾 Transaction Category Classifier")
+    gr.Markdown("Enter a transaction manually or upload a CSV file to classify multiple transactions.")
+    with gr.Tab("🔹 Single Transaction"):
+        text_input = gr.Textbox(label="Transaction Description", placeholder="e.g., going to Barcelona using plane")
+        btn_submit = gr.Button("Submit")
+        cat1_out = gr.Label(label="Predicted Category 1")
+        cat2_out = gr.Label(label="Predicted Category 2")
+        score_out = gr.Number(label="Similarity Score")
+        btn_submit.click(fn=classify_transaction, inputs=text_input, outputs=[cat1_out, cat2_out, score_out])
+    with gr.Tab("📂 Batch CSV Upload"):
+        csv_input = gr.File(label="Upload CSV file with 'transaction' column", file_types=[".csv"])
+        btn_process = gr.Button("Process CSV")
+        csv_output = gr.DataFrame(label="Matched Results")
+        download_file = gr.File(label="Download Results CSV")
+        def process_and_return(file):
+            df, output_path = map_csv(file)
+            if isinstance(df, str):
+                return None, None
+            return df, output_path
+        btn_process.click(fn=process_and_return, inputs=csv_input, outputs=[csv_output, download_file])
 # Mount Gradio inside FastAPI at /ui
 app = gr.mount_gradio_app(app, gradio_ui, path="/ui")
 # ==================================================
+# 🧾 REST API Endpoints
 # ==================================================
 class TransactionsRequest(BaseModel):
     transactions: List[str]
 def map_categories(request: TransactionsRequest):
     results = []
     for text in request.transactions:
+        cat1, cat2, score = classify_transaction(text)
         results.append({
             "input_text": text,
+            "best_Cat1": cat1,
+            "best_Cat2": cat2,
+            "similarity": score
         })
     return {"matches": results}