EmissionFactor-fastAPI

Sleeping

App Files Files Community

yassine123Z commited on Oct 26, 2025

Commit

1e16b3a

verified ·

1 Parent(s): 9c5d1b7

Update app.py

Browse files

Files changed (1) hide show

app.py +314 -116

app.py CHANGED Viewed

@@ -9,6 +9,7 @@ import torch
 import gradio as gr
 import tempfile
 import os
 # ==================================================
 # 🚀 Initialize FastAPI
@@ -23,132 +24,355 @@ model = SetFitModel.from_pretrained("yassine123Z/EmissionFactor-mapper2-v2")
 # ==================================================
 # 📘 Reference Categories
 # ==================================================
-ref_data = pd.DataFrame({ "Cat1EN": [ "Purchase of goods","Purchase of goods","Purchase of goods","Purchase of goods", "Purchase of goods","Purchase of goods","Purchase of goods","Purchase of goods", "Purchase of goods","Purchase of goods","Purchase of materials","Purchase of materials", "Purchase of materials","Purchase of materials","Purchase of materials","Purchase of materials", "Purchase of services","Purchase of services","Purchase of services","Purchase of services", "Purchase of services","Purchase of services","Purchase of services","Purchase of services", "Purchase of services","Purchase of services","Purchase of services","Purchase of services", "Purchase of services","Purchase of services","Food & beverages","Food & beverages", "Food & beverages","Food & beverages","Food & beverages","Food & beverages", "Food & beverages","Food & beverages","Food & beverages","Food & beverages", "Heating and air conditioning","Heating and air conditioning","Fuels","Fuels","Fuels","Fuels", "Fuels","Fuels", "Mobility (freight)","Mobility (freight)","Mobility (freight)","Mobility (freight)", "Mobility (freight)", "Mobility (passengers)","Mobility (passengers)","Mobility (passengers)", "Mobility (passengers)","Mobility (passengers)","Mobility (passengers)","Mobility (passengers)", "Mobility (passengers)","Mobility (passengers)","Mobility (passengers)","Mobility (passengers)", "Process and fugitive emissions","Process and fugitive emissions", "Process and fugitive emissions", "Waste treatment","Waste treatment","Waste treatment", "Waste treatment","Waste treatment","Waste treatment","Waste treatment","Waste treatment", "Waste treatment","Waste treatment","Waste treatment","Waste treatment", "Use of electricity","Use of electricity","Use of electricity" ],
-                         "Cat2EN": [ "Sporting goods","Buildings","Office supplies","Water consumption", "Household appliances","Electrical equipment","Machinery and equipment","Furniture", "Textiles and clothing","Vehicles","Construction materials","Organic materials", "Paper and cardboard","Plastics and rubber","Chemicals","Refrigerants and others", "Equipment rental","Building rental","Furniture rental","Vehicle rental and maintenance", "Information and cultural services","Catering services","Health services","Specialized craft services", "Administrative / consulting services","Cleaning services","IT services","Logistics services", "Marketing / advertising services","Technical services","Alcoholic beverages","Non-alcoholic beverages", "Condiments","Desserts","Fruits and vegetables","Fats and oils","Prepared / cooked meals", "Animal products","Cereal products","Dairy products","Heat and steam","Air conditioning and refrigeration", "Fossil fuels","Mobile fossil fuels","Organic fuels","Gaseous fossil fuels","Liquid fossil fuels", "Solid fossil fuels", "Air transport","Ship transport","Truck transport","Combined transport", "Train transport", "Air transport","Coach / Urban bus","Ship transport","Combined transport", "E-Bike","Accommodation / Events","Soft mobility","Motorcycle / Scooter","Train transport", "Public transport","Car", "Agriculture","Global warming potential","Industrial processes", "Commercial and industrial","Wastewater","Electrical equipment","Households and similar", "Metal","Organic materials","Paper and cardboard","Batteries and accumulators","Plastics", "Fugitive process emissions","Textiles","Glass", "Electricity for electric vehicles","Renewables","Standard" ],
-                         "DescriptionCat2EN": [ "Goods purchase - sports","Goods purchase - buildings","Goods purchase - office items","Goods purchase - water", "Goods purchase - appliances","Goods purchase - electricals","Goods purchase - machinery","Goods purchase - furniture", "Goods purchase - textiles","Goods purchase - vehicles","Material purchase - construction","Material purchase - organic", "Material purchase - paper","Material purchase - plastics","Material purchase - chemicals","Material purchase - refrigerants", "Service - equipment rental","Service - building rental","Service - furniture rental","Service - vehicles", "Service - info/culture","Service - catering","Service - healthcare","Service - crafts", "Service - admin/consulting","Service - cleaning","Service - IT","Service - logistics", "Service - marketing","Service - technical","Beverages - alcoholic","Beverages - non-alcoholic", "Food condiments","Food desserts","Food fruits & vegetables","Food fats & oils","Prepared meals", "Animal-based food","Cereal-based food","Dairy products","Heating - heat & steam","Heating - cooling/refrigeration", "Fuel - fossil","Fuel - mobile fossil","Fuel - organic","Fuel - gaseous","Fuel - liquid","Fuel - solid", "Freight transport - air","Freight transport - ship","Freight transport - truck","Freight transport - combined", "Freight transport - train", "Passenger transport - air","Passenger transport - bus","Passenger transport - ship", "Passenger transport - combined","Passenger transport - e-bike","Passenger transport - accommodation/events", "Passenger transport - soft mobility","Passenger transport - scooter/motorbike","Passenger transport - train", "Passenger transport - public","Passenger transport - car", "Emissions - agriculture","Emissions - warming potential", "Emissions - industry", "Waste - commercial/industrial","Waste - wastewater","Waste - electricals", "Waste - households","Waste - metals","Waste - organics","Waste - paper","Waste - batteries", "Waste - plastics","Waste - fugitive","Waste - textiles","Waste - glass", "Electricity - EVs","Electricity - renewables","Electricity - standard" ]
-                        })
 ref_data["combined"] = ref_data[["Cat1EN", "Cat2EN", "DescriptionCat2EN"]].agg(" ".join, axis=1)
 ref_embeddings = model.encode(ref_data["combined"].tolist())
 # ==================================================
-# 🔍 Core Classification Logic
 # ==================================================
-def classify_transaction(text: str):
-    trans_emb = model.encode([text])[0]
-    scores = util.pytorch_cos_sim(torch.tensor(trans_emb), torch.tensor(ref_embeddings)).flatten()
-    best_idx = scores.argmax().item()
-    cat1 = ref_data.iloc[best_idx]["Cat1EN"]
-    cat2 = ref_data.iloc[best_idx]["Cat2EN"]
-    score = float(scores[best_idx])
-    return cat1, cat2, score
 # ==================================================
-# 📂 Batch Mapping
 # ==================================================
-def map_csv(file):
-    df = pd.read_csv(file.name)
-    if "transaction" not in df.columns:
-        return "Error: Missing column 'transaction'.", None
     results = []
-    for text in df["transaction"]:
-        cat1, cat2, score = classify_transaction(text)
         results.append({
-            "transaction": text,
-            "Predicted Category 1": cat1,
-            "Predicted Category 2": cat2,
-            "Similarity Score": score
         })
-    result_df = pd.DataFrame(results)
-    tmp_dir = tempfile.mkdtemp()
-    output_path = os.path.join(tmp_dir, "matched_results.csv")
-    result_df.to_csv(output_path, index=False)
-    return result_df, output_path
 # ==================================================
-# 🧠 Model Comparison Logic (no external URL call)
 # ==================================================
-def compare_models_ui(hf_model_url, file):
-    import pandas as pd
-    from setfit import SetFitModel
     try:
         df = pd.read_csv(file.name)
         if "transaction" not in df.columns:
             return "❌ Missing column 'transaction' in CSV.", None
-        # Load the models
-        local_model = SetFitModel.from_pretrained("yassine123Z/EmissionFactor-mapper2-v2")
-        hf_model = SetFitModel.from_pretrained(hf_model_url)
         # Compare predictions
-        local_preds, hf_preds, matches = [], [], []
-        for text in df["transaction"]:
-            local_pred = local_model.predict([text])[0]
-            hf_pred = hf_model.predict([text])[0]
-            local_preds.append(local_pred)
-            hf_preds.append(hf_pred)
-            matches.append(1.0 if local_pred == hf_pred else 0.0)
-        df["local_pred"] = local_preds
-        df["hf_pred"] = hf_preds
-        df["match"] = matches
-        match_rate = round(df["match"].mean() * 100, 2)
-        summary = f"✅ Match rate: {match_rate}% ({len(df)} rows)"
-        return summary, df.head(10)
     except Exception as e:
-        return f"❌ Exception: {str(e)}", None
 # ==================================================
 # 🖥️ Gradio UI: Main App
 # ==================================================
-with gr.Blocks(title="Transaction Category Classifier") as gradio_ui:
-    gr.Markdown("## 🧾 Transaction Category Classifier")
-    gr.Markdown("Enter a transaction manually or upload a CSV file to classify multiple transactions.")
     with gr.Tab("🔹 Single Transaction"):
-        text_input = gr.Textbox(label="Transaction Description", placeholder="e.g., Plane ticket to Barcelona")
-        btn_submit = gr.Button("Submit")
-        cat1_out = gr.Label(label="Predicted Category 1")
-        cat2_out = gr.Label(label="Predicted Category 2")
-        score_out = gr.Number(label="Similarity Score")
-        btn_submit.click(fn=classify_transaction, inputs=text_input, outputs=[cat1_out, cat2_out, score_out])
     with gr.Tab("📂 Batch CSV Upload"):
-        csv_input = gr.File(label="Upload CSV file with 'transaction' column", file_types=[".csv"])
-        btn_process = gr.Button("Process CSV")
-        csv_output = gr.DataFrame(label="Matched Results")
-        download_file = gr.File(label="Download Results CSV")
-        def process_and_return(file):
-            df, output_path = map_csv(file)
-            if isinstance(df, str):
-                return None, None
-            return df, output_path
-        btn_process.click(fn=process_and_return, inputs=csv_input, outputs=[csv_output, download_file])
 # ==================================================
-# 🧠 Gradio UI: Model Comparison Page
 # ==================================================
-with gr.Blocks(title="Model Comparison Tool") as compare_ui:
-    gr.Markdown("### 🔍 Model Comparison Tool")
-    hf_model_url = gr.Textbox(label="Hugging Face model URL")
-    file = gr.File(label="Upload test dataset (CSV)")
-    compare_btn = gr.Button("Compare Models")
-    output_text = gr.Textbox(label="Summary")
-    output_table = gr.DataFrame(label="Sample results")
-    compare_btn.click(fn=compare_models_ui, inputs=[hf_model_url, file], outputs=[output_text, output_table])
 # ==================================================
 # 🌐 Mount Gradio inside FastAPI
 # ==================================================
-app = gr.mount_gradio_app(app, gradio_ui, path="/ui")
 app = gr.mount_gradio_app(app, compare_ui, path="/compare")
 # ==================================================
@@ -157,15 +381,11 @@ app = gr.mount_gradio_app(app, compare_ui, path="/compare")
 class TransactionsRequest(BaseModel):
     transactions: List[str]
-@app.get("/")
-def read_root():
-    return {"status": "ok", "message": "Use /ui or /compare for Gradio, or /map_categories for API."}
 @app.post("/map_categories")
 def map_categories(request: TransactionsRequest):
     results = []
     for text in request.transactions:
-        cat1, cat2, score = classify_transaction(text)
         results.append({
             "input_text": text,
             "best_Cat1": cat1,
@@ -174,29 +394,7 @@ def map_categories(request: TransactionsRequest):
         })
     return {"matches": results}
-@app.post("/compare_models")
-def compare_models(request: dict):
-    local_model = SetFitModel.from_pretrained("yassine123Z/EmissionFactor-mapper2-v2")
-    hf_model = SetFitModel.from_pretrained(request["hf_model"])
-    df = pd.DataFrame(request["data"])
-    if "transaction" not in df.columns:
-        return {"error": "Missing column 'transaction'."}
-    local_preds, hf_preds, matches = [], [], []
-    for text in df["transaction"]:
-        local_pred = local_model.predict([text])[0]
-        hf_pred = hf_model.predict([text])[0]
-        local_preds.append(local_pred)
-        hf_preds.append(hf_pred)
-        matches.append(1.0 if local_pred == hf_pred else 0.0)
-    df["local_pred"] = local_preds
-    df["hf_pred"] = hf_preds
-    df["match"] = matches
-    match_rate = round(df["match"].mean() * 100, 2)
-    return {
-        "match_rate": match_rate,
-        "total_records": len(df),
-        "sample_results": df.head(10).to_dict(orient="records")
-    }

 import gradio as gr
 import tempfile
 import os
+from datetime import datetime
 # ==================================================
 # 🚀 Initialize FastAPI
 # ==================================================
 # 📘 Reference Categories
 # ==================================================
+ref_data = pd.DataFrame({
+    "Cat1EN": [
+        "Purchase of goods","Purchase of goods","Purchase of goods","Purchase of goods",
+        "Purchase of goods","Purchase of goods","Purchase of goods","Purchase of goods",
+        "Purchase of goods","Purchase of goods","Purchase of materials","Purchase of materials",
+        "Purchase of materials","Purchase of materials","Purchase of materials","Purchase of materials",
+        "Purchase of services","Purchase of services","Purchase of services","Purchase of services",
+        "Purchase of services","Purchase of services","Purchase of services","Purchase of services",
+        "Purchase of services","Purchase of services","Purchase of services","Purchase of services",
+        "Purchase of services","Purchase of services","Food & beverages","Food & beverages",
+        "Food & beverages","Food & beverages","Food & beverages","Food & beverages",
+        "Food & beverages","Food & beverages","Food & beverages","Food & beverages",
+        "Heating and air conditioning","Heating and air conditioning","Fuels","Fuels","Fuels","Fuels",
+        "Fuels","Fuels",
+        "Mobility (freight)","Mobility (freight)","Mobility (freight)","Mobility (freight)",
+        "Mobility (freight)",
+        "Mobility (passengers)","Mobility (passengers)","Mobility (passengers)",
+        "Mobility (passengers)","Mobility (passengers)","Mobility (passengers)","Mobility (passengers)",
+        "Mobility (passengers)","Mobility (passengers)","Mobility (passengers)","Mobility (passengers)",
+        "Process and fugitive emissions","Process and fugitive emissions",
+        "Process and fugitive emissions",
+        "Waste treatment","Waste treatment","Waste treatment",
+        "Waste treatment","Waste treatment","Waste treatment","Waste treatment","Waste treatment",
+        "Waste treatment","Waste treatment","Waste treatment","Waste treatment",
+        "Use of electricity","Use of electricity","Use of electricity"
+    ],
+    "Cat2EN": [
+        "Sporting goods","Buildings","Office supplies","Water consumption",
+        "Household appliances","Electrical equipment","Machinery and equipment","Furniture",
+        "Textiles and clothing","Vehicles","Construction materials","Organic materials",
+        "Paper and cardboard","Plastics and rubber","Chemicals","Refrigerants and others",
+        "Equipment rental","Building rental","Furniture rental","Vehicle rental and maintenance",
+        "Information and cultural services","Catering services","Health services","Specialized craft services",
+        "Administrative / consulting services","Cleaning services","IT services","Logistics services",
+        "Marketing / advertising services","Technical services","Alcoholic beverages","Non-alcoholic beverages",
+        "Condiments","Desserts","Fruits and vegetables","Fats and oils","Prepared / cooked meals",
+        "Animal products","Cereal products","Dairy products","Heat and steam","Air conditioning and refrigeration",
+        "Fossil fuels","Mobile fossil fuels","Organic fuels","Gaseous fossil fuels","Liquid fossil fuels",
+        "Solid fossil fuels",
+        "Air transport","Ship transport","Truck transport","Combined transport",
+        "Train transport",
+        "Air transport","Coach / Urban bus","Ship transport","Combined transport",
+        "E-Bike","Accommodation / Events","Soft mobility","Motorcycle / Scooter","Train transport",
+        "Public transport","Car",
+        "Agriculture","Global warming potential","Industrial processes",
+        "Commercial and industrial","Wastewater","Electrical equipment","Households and similar",
+        "Metal","Organic materials","Paper and cardboard","Batteries and accumulators","Plastics",
+        "Fugitive process emissions","Textiles","Glass",
+        "Electricity for electric vehicles","Renewables","Standard"
+    ],
+    "DescriptionCat2EN": [
+        "Goods purchase - sports","Goods purchase - buildings","Goods purchase - office items","Goods purchase - water",
+        "Goods purchase - appliances","Goods purchase - electricals","Goods purchase - machinery","Goods purchase - furniture",
+        "Goods purchase - textiles","Goods purchase - vehicles","Material purchase - construction","Material purchase - organic",
+        "Material purchase - paper","Material purchase - plastics","Material purchase - chemicals","Material purchase - refrigerants",
+        "Service - equipment rental","Service - building rental","Service - furniture rental","Service - vehicles",
+        "Service - info/culture","Service - catering","Service - healthcare","Service - crafts",
+        "Service - admin/consulting","Service - cleaning","Service - IT","Service - logistics",
+        "Service - marketing","Service - technical","Beverages - alcoholic","Beverages - non-alcoholic",
+        "Food condiments","Food desserts","Food fruits & vegetables","Food fats & oils","Prepared meals",
+        "Animal-based food","Cereal-based food","Dairy products","Heating - heat & steam","Heating - cooling/refrigeration",
+        "Fuel - fossil","Fuel - mobile fossil","Fuel - organic","Fuel - gaseous","Fuel - liquid","Fuel - solid",
+        "Freight transport - air","Freight transport - ship","Freight transport - truck","Freight transport - combined",
+        "Freight transport - train",
+        "Passenger transport - air","Passenger transport - bus","Passenger transport - ship",
+        "Passenger transport - combined","Passenger transport - e-bike","Passenger transport - accommodation/events",
+        "Passenger transport - soft mobility","Passenger transport - scooter/motorbike","Passenger transport - train",
+        "Passenger transport - public","Passenger transport - car",
+        "Emissions - agriculture","Emissions - warming potential",
+        "Emissions - industry",
+        "Waste - commercial/industrial","Waste - wastewater","Waste - electricals",
+        "Waste - households","Waste - metals","Waste - organics","Waste - paper","Waste - batteries",
+        "Waste - plastics","Waste - fugitive","Waste - textiles","Waste - glass",
+        "Electricity - EVs","Electricity - renewables","Electricity - standard"
+    ]
+})
 ref_data["combined"] = ref_data[["Cat1EN", "Cat2EN", "DescriptionCat2EN"]].agg(" ".join, axis=1)
 ref_embeddings = model.encode(ref_data["combined"].tolist())
+# Get unique categories for dropdowns
+unique_cat1 = sorted(ref_data["Cat1EN"].unique().tolist())
+unique_cat2 = sorted(ref_data["Cat2EN"].unique().tolist())
 # ==================================================
+# 💾 Corrections Storage (in-memory, use DB in production)
 # ==================================================
+corrections_data = []
+def save_correction(transaction, predicted_cat1, predicted_cat2, correct_cat1, correct_cat2):
+    """Save user correction for future model improvement"""
+    corrections_data.append({
+        "timestamp": datetime.now().isoformat(),
+        "transaction": transaction,
+        "predicted_cat1": predicted_cat1,
+        "predicted_cat2": predicted_cat2,
+        "correct_cat1": correct_cat1,
+        "correct_cat2": correct_cat2
+    })
+    return f"✅ Correction saved! Total corrections: {len(corrections_data)}"
 # ==================================================
+# 🔍 Core Classification Logic with Top-K
 # ==================================================
+def classify_transaction(text: str, top_k=3):
+    """Classify with top-K results for review"""
+    trans_emb = model.encode([text])[0]
+    scores = util.pytorch_cos_sim(torch.tensor(trans_emb), torch.tensor(ref_embeddings)).flatten()
+    # Get top-k matches
+    top_k_indices = scores.topk(top_k).indices.tolist()
+    top_k_scores = scores.topk(top_k).values.tolist()
     results = []
+    for idx, score in zip(top_k_indices, top_k_scores):
         results.append({
+            "cat1": ref_data.iloc[idx]["Cat1EN"],
+            "cat2": ref_data.iloc[idx]["Cat2EN"],
+            "score": float(score)
         })
+    return results
+def classify_single(text: str):
+    """For simple single classification"""
+    results = classify_transaction(text, top_k=1)
+    return results[0]["cat1"], results[0]["cat2"], results[0]["score"]
 # ==================================================
+# 📂 Batch Mapping with Review
 # ==================================================
+def map_csv_with_review(file):
+    """Process CSV and return results for review"""
+    try:
+        df = pd.read_csv(file.name)
+        if "transaction" not in df.columns:
+            return "❌ Error: Missing column 'transaction'.", None, None
+        results = []
+        for idx, text in enumerate(df["transaction"]):
+            top_matches = classify_transaction(text, top_k=3)
+            results.append({
+                "row_id": idx,
+                "transaction": text,
+                "cat1_pred": top_matches[0]["cat1"],
+                "cat2_pred": top_matches[0]["cat2"],
+                "confidence": round(top_matches[0]["score"], 3),
+                "cat1_alt1": top_matches[1]["cat1"] if len(top_matches) > 1 else "",
+                "cat2_alt1": top_matches[1]["cat2"] if len(top_matches) > 1 else "",
+                "confidence_alt1": round(top_matches[1]["score"], 3) if len(top_matches) > 1 else 0,
+                "status": "✅ High" if top_matches[0]["score"] > 0.8 else "⚠️ Review"
+            })
+        result_df = pd.DataFrame(results)
+        # Save temporary file
+        tmp_dir = tempfile.mkdtemp()
+        output_path = os.path.join(tmp_dir, "mapped_results.csv")
+        result_df.to_csv(output_path, index=False)
+        return result_df, output_path, f"✅ Processed {len(result_df)} transactions"
+    except Exception as e:
+        return f"❌ Error: {str(e)}", None, None
+# ==================================================
+# 🔧 Model Comparison (Fixed)
+# ==================================================
+def compare_models_fixed(hf_model_url, file):
+    """Fixed comparison function"""
     try:
+        if not hf_model_url or not file:
+            return "❌ Please provide both model URL and CSV file", None
         df = pd.read_csv(file.name)
         if "transaction" not in df.columns:
             return "❌ Missing column 'transaction' in CSV.", None
+        # Load models
+        local_model = model  # Use already loaded model
+        hf_model = SetFitModel.from_pretrained(hf_model_url.strip())
+        # Get embeddings
+        local_embs = local_model.encode(ref_data["combined"].tolist())
+        hf_embs = hf_model.encode(ref_data["combined"].tolist())
         # Compare predictions
+        results = []
+        for text in df["transaction"][:50]:  # Limit to 50 for speed
+            # Local prediction
+            trans_emb_local = local_model.encode([text])[0]
+            scores_local = util.pytorch_cos_sim(torch.tensor(trans_emb_local), torch.tensor(local_embs)).flatten()
+            best_idx_local = scores_local.argmax().item()
+            # HF prediction
+            trans_emb_hf = hf_model.encode([text])[0]
+            scores_hf = util.pytorch_cos_sim(torch.tensor(trans_emb_hf), torch.tensor(hf_embs)).flatten()
+            best_idx_hf = scores_hf.argmax().item()
+            match = "✅" if best_idx_local == best_idx_hf else "❌"
+            results.append({
+                "transaction": text,
+                "local_cat1": ref_data.iloc[best_idx_local]["Cat1EN"],
+                "local_cat2": ref_data.iloc[best_idx_local]["Cat2EN"],
+                "local_score": round(float(scores_local[best_idx_local]), 3),
+                "hf_cat1": ref_data.iloc[best_idx_hf]["Cat1EN"],
+                "hf_cat2": ref_data.iloc[best_idx_hf]["Cat2EN"],
+                "hf_score": round(float(scores_hf[best_idx_hf]), 3),
+                "match": match
+            })
+        result_df = pd.DataFrame(results)
+        matches = (result_df["match"] == "✅").sum()
+        match_rate = round(matches / len(result_df) * 100, 2)
+        summary = f"✅ Compared {len(result_df)} transactions\n📊 Match rate: {match_rate}% ({matches}/{len(result_df)})"
+        return summary, result_df
     except Exception as e:
+        return f"❌ Error: {str(e)}", None
+# ==================================================
+# 📥 Export Corrections
+# ==================================================
+def export_corrections():
+    """Export corrections to CSV"""
+    if not corrections_data:
+        return None, "⚠️ No corrections to export"
+    df = pd.DataFrame(corrections_data)
+    tmp_dir = tempfile.mkdtemp()
+    output_path = os.path.join(tmp_dir, f"corrections_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv")
+    df.to_csv(output_path, index=False)
+    return output_path, f"✅ Exported {len(corrections_data)} corrections"
 # ==================================================
 # 🖥️ Gradio UI: Main App
 # ==================================================
+with gr.Blocks(title="Transaction Category Classifier", theme=gr.themes.Soft()) as gradio_ui:
+    gr.Markdown("# 🧾 Transaction Category Classifier")
+    gr.Markdown("Classify transactions and review/correct predictions to improve the model.")
     with gr.Tab("🔹 Single Transaction"):
+        with gr.Row():
+            with gr.Column():
+                text_input = gr.Textbox(
+                    label="Transaction Description",
+                    placeholder="e.g., Plane ticket to Barcelona",
+                    lines=2
+                )
+                btn_submit = gr.Button("🔍 Classify", variant="primary")
+            with gr.Column():
+                cat1_out = gr.Textbox(label="Predicted Category 1")
+                cat2_out = gr.Textbox(label="Predicted Category 2")
+                score_out = gr.Number(label="Confidence Score")
+        gr.Markdown("### ✏️ Review & Correct")
+        with gr.Row():
+            correct_cat1 = gr.Dropdown(choices=unique_cat1, label="Correct Category 1")
+            correct_cat2 = gr.Dropdown(choices=unique_cat2, label="Correct Category 2")
+        btn_save_correction = gr.Button("💾 Save Correction")
+        correction_status = gr.Textbox(label="Status")
+        # Event handlers
+        btn_submit.click(
+            fn=classify_single,
+            inputs=text_input,
+            outputs=[cat1_out, cat2_out, score_out]
+        )
+        btn_save_correction.click(
+            fn=save_correction,
+            inputs=[text_input, cat1_out, cat2_out, correct_cat1, correct_cat2],
+            outputs=correction_status
+        )
     with gr.Tab("📂 Batch CSV Upload"):
+        gr.Markdown("Upload a CSV file with a 'transaction' column to classify multiple transactions.")
+        csv_input = gr.File(label="Upload CSV file", file_types=[".csv"])
+        btn_process = gr.Button("🚀 Process CSV", variant="primary")
+        process_status = gr.Textbox(label="Status")
+        csv_output = gr.DataFrame(label="Classification Results (scroll right for alternatives)")
+        download_file = gr.File(label="📥 Download Results CSV")
+        btn_process.click(
+            fn=map_csv_with_review,
+            inputs=csv_input,
+            outputs=[csv_output, download_file, process_status]
+        )
+        gr.Markdown("""
+        **Legend:**
+        - ✅ High: Confidence > 80%
+        - ⚠️ Review: Confidence < 80% - please review
+        - Alternative predictions provided for low-confidence matches
+        """)
+    with gr.Tab("📊 View Corrections"):
+        gr.Markdown("### Review and export saved corrections")
+        btn_refresh = gr.Button("🔄 Refresh Corrections")
+        corrections_df = gr.DataFrame(label="Saved Corrections")
+        export_status = gr.Textbox(label="Export Status")
+        export_file = gr.File(label="📥 Download Corrections CSV")
+        btn_export = gr.Button("📤 Export All Corrections")
+        def show_corrections():
+            if not corrections_data:
+                return pd.DataFrame({"message": ["No corrections yet"]})
+            return pd.DataFrame(corrections_data)
+        btn_refresh.click(fn=show_corrections, outputs=corrections_df)
+        btn_export.click(fn=export_corrections, outputs=[export_file, export_status])
 # ==================================================
+# 🔍 Gradio UI: Model Comparison Page
 # ==================================================
+with gr.Blocks(title="Model Comparison Tool", theme=gr.themes.Soft()) as compare_ui:
+    gr.Markdown("# 🔍 Model Comparison Tool")
+    gr.Markdown("Compare predictions between your local model and any HuggingFace model.")
+    with gr.Row():
+        hf_model_url = gr.Textbox(
+            label="HuggingFace Model ID",
+            placeholder="e.g., sentence-transformers/all-MiniLM-L6-v2",
+            info="Enter the model ID from HuggingFace"
+        )
+    file = gr.File(label="Upload test dataset (CSV with 'transaction' column)", file_types=[".csv"])
+    compare_btn = gr.Button("🔬 Compare Models", variant="primary")
+    output_text = gr.Textbox(label="Comparison Summary", lines=3)
+    output_table = gr.DataFrame(label="Detailed Comparison Results")
+    compare_btn.click(
+        fn=compare_models_fixed,
+        inputs=[hf_model_url, file],
+        outputs=[output_text, output_table]
+    )
 # ==================================================
 # 🌐 Mount Gradio inside FastAPI
 # ==================================================
+app = gr.mount_gradio_app(app, gradio_ui, path="/")
 app = gr.mount_gradio_app(app, compare_ui, path="/compare")
 # ==================================================
 class TransactionsRequest(BaseModel):
     transactions: List[str]
 @app.post("/map_categories")
 def map_categories(request: TransactionsRequest):
     results = []
     for text in request.transactions:
+        cat1, cat2, score = classify_single(text)
         results.append({
             "input_text": text,
             "best_Cat1": cat1,
         })
     return {"matches": results}
+@app.get("/corrections")
+def get_corrections():
+    """API endpoint to retrieve all corrections"""
+    return {"corrections": corrections_data, "count": len(corrections_data)}