EmissionFactor-fastAPI

Sleeping

App Files Files Community

yassine123Z commited on Oct 26, 2025

Commit

f820f5a

verified ·

1 Parent(s): 54d63e4

Update app.py

Browse files

Files changed (1) hide show

app.py +454 -78

app.py CHANGED Viewed

@@ -16,133 +16,509 @@ from datetime import datetime
 app = FastAPI(title="Transaction Category Mapper")
 # ==================================================
-# 🧠 Load Model
 # ==================================================
-model_path = "HEN10/setfit-particular-transaction-solon-embeddings-labels-large-kaggle-automatisation-v1"
-model = SetFitModel.from_pretrained(model_path)
-device = "cuda" if torch.cuda.is_available() else "cpu"
-model.to(device)
 # ==================================================
-# 🧩 Utility: Classify transaction text
 # ==================================================
-def classify_transaction(text, top_k=3):
-    embeddings = model.model_body.encode([text], convert_to_tensor=True, device=device)
-    label_embeddings = model.model_body.encode(model.labels, convert_to_tensor=True, device=device)
-    cos_scores = util.pytorch_cos_sim(embeddings, label_embeddings)[0]
-    top_results = torch.topk(cos_scores, k=top_k)
     results = []
-    for score, idx in zip(top_results.values, top_results.indices):
-        cat = model.labels[idx]
-        cat_split = cat.split(" > ")
-        cat1 = cat_split[0] if len(cat_split) > 0 else ""
-        cat2 = cat_split[1] if len(cat_split) > 1 else ""
-        results.append({"cat1": cat1, "cat2": cat2, "score": float(score)})
     return results
 # ==================================================
-# 📥 Batch Mapping for Review (Simplified Columns)
 # ==================================================
 def map_csv_for_review(file):
-    """Process CSV and return simplified results with editable prediction columns."""
     global batch_results
     try:
         if file is None:
             return None, "⚠️ Please upload a CSV file"
         df = pd.read_csv(file.name)
         if "transaction" not in df.columns:
-            return None, "❌ Missing column 'transaction'"
         results = []
         for idx, text in enumerate(df["transaction"]):
             if pd.isna(text) or str(text).strip() == "":
                 continue
-            top_match = classify_transaction(str(text), top_k=1)[0]
             results.append({
                 "ID": idx + 1,
-                "Transaction": str(text)[:100],
-                "Predicted_Cat1": top_match["cat1"],
-                "Predicted_Cat2": top_match["cat2"],
-                "Confidence": round(top_match["score"], 3),
-                "Status": "✅ OK" if top_match["score"] > 0.8 else "⚠️ Review"
             })
-        batch_results = pd.DataFrame(results)
-        return batch_results, f"✅ Processed {len(batch_results)} transactions!"
     except Exception as e:
         return None, f"❌ Error: {str(e)}"
 # ==================================================
-# 💾 Save Corrections & Generate Training Data
 # ==================================================
-def save_batch_corrections(updated_df):
-    """Save corrected table and prepare new training data."""
-    global batch_results
-    corrected = pd.DataFrame(updated_df)
-    corrected.to_csv("corrected_results.csv", index=False)
-    # Build training dataset (only corrected entries)
-    training_data = corrected[["Transaction", "Predicted_Cat1", "Predicted_Cat2"]].copy()
-    training_data = training_data.rename(columns={
-        "Transaction": "text",
-        "Predicted_Cat1": "label1",
-        "Predicted_Cat2": "label2"
-    })
-    training_data.to_csv("training_data.csv", index=False)
-    return (
-        f"💾 Saved {len(corrected)} reviewed transactions.",
-        "corrected_results.csv",
-        "training_data.csv"
-    )
 # ==================================================
-# 🌍 Gradio Interface
 # ==================================================
-with gr.Blocks(title="Transaction Category Mapper") as demo:
-    gr.Markdown("# 💼 Transaction Category Mapper")
-    gr.Markdown("Upload a file → review predictions → correct directly → save for fine-tuning.")
     with gr.Tab("📂 Batch Review & Correct"):
-        upload_file = gr.File(label="Upload your CSV file (must include 'transaction' column)")
-        btn_process = gr.Button("🚀 Process Transactions", variant="primary")
-        review_table = gr.Dataframe(
-            headers=["ID", "Transaction", "Predicted_Cat1", "Predicted_Cat2", "Confidence", "Status"],
             interactive=True,
-            wrap=True,
-            max_rows=50
         )
-        process_status = gr.Textbox(label="Status", lines=3, interactive=False)
         btn_process.click(
-            fn=map_csv_for_review,
-            inputs=upload_file,
             outputs=[review_table, process_status]
         )
-        gr.Markdown("---")
-        gr.Markdown("### 💾 Save and Export Results")
-        btn_save = gr.Button("💾 Save All Corrections", variant="primary")
-        btn_download_corrected = gr.File(label="📥 Download Corrected Results")
-        btn_download_training = gr.File(label="📥 Download Training Data (Corrections)")
-        save_status = gr.Textbox(label="Save Status", lines=3, interactive=False)
         btn_save.click(
             fn=save_batch_corrections,
             inputs=review_table,
             outputs=[save_status, btn_download_corrected, btn_download_training]
         )
 # ==================================================
-# 🚀 Launch App
 # ==================================================
-if __name__ == "__main__":
-    import uvicorn
-    demo.launch(server_name="0.0.0.0", server_port=7860)

 app = FastAPI(title="Transaction Category Mapper")
 # ==================================================
+# 🧠 Load Main Model
 # ==================================================
+print("Loading main model...")
+model = SetFitModel.from_pretrained("yassine123Z/EmissionFactor-mapper2-v2")
+print("Model loaded successfully!")
 # ==================================================
+# 📘 Reference Categories
 # ==================================================
+ref_data = pd.DataFrame({
+    "Cat1EN": [
+        "Purchase of goods","Purchase of goods","Purchase of goods","Purchase of goods",
+        "Purchase of goods","Purchase of goods","Purchase of goods","Purchase of goods",
+        "Purchase of goods","Purchase of goods","Purchase of materials","Purchase of materials",
+        "Purchase of materials","Purchase of materials","Purchase of materials","Purchase of materials",
+        "Purchase of services","Purchase of services","Purchase of services","Purchase of services",
+        "Purchase of services","Purchase of services","Purchase of services","Purchase of services",
+        "Purchase of services","Purchase of services","Purchase of services","Purchase of services",
+        "Purchase of services","Purchase of services","Food & beverages","Food & beverages",
+        "Food & beverages","Food & beverages","Food & beverages","Food & beverages",
+        "Food & beverages","Food & beverages","Food & beverages","Food & beverages",
+        "Heating and air conditioning","Heating and air conditioning","Fuels","Fuels","Fuels","Fuels",
+        "Fuels","Fuels",
+        "Mobility (freight)","Mobility (freight)","Mobility (freight)","Mobility (freight)",
+        "Mobility (freight)",
+        "Mobility (passengers)","Mobility (passengers)","Mobility (passengers)",
+        "Mobility (passengers)","Mobility (passengers)","Mobility (passengers)","Mobility (passengers)",
+        "Mobility (passengers)","Mobility (passengers)","Mobility (passengers)","Mobility (passengers)",
+        "Process and fugitive emissions","Process and fugitive emissions",
+        "Process and fugitive emissions",
+        "Waste treatment","Waste treatment","Waste treatment",
+        "Waste treatment","Waste treatment","Waste treatment","Waste treatment","Waste treatment",
+        "Waste treatment","Waste treatment","Waste treatment","Waste treatment",
+        "Use of electricity","Use of electricity","Use of electricity"
+    ],
+    "Cat2EN": [
+        "Sporting goods","Buildings","Office supplies","Water consumption",
+        "Household appliances","Electrical equipment","Machinery and equipment","Furniture",
+        "Textiles and clothing","Vehicles","Construction materials","Organic materials",
+        "Paper and cardboard","Plastics and rubber","Chemicals","Refrigerants and others",
+        "Equipment rental","Building rental","Furniture rental","Vehicle rental and maintenance",
+        "Information and cultural services","Catering services","Health services","Specialized craft services",
+        "Administrative / consulting services","Cleaning services","IT services","Logistics services",
+        "Marketing / advertising services","Technical services","Alcoholic beverages","Non-alcoholic beverages",
+        "Condiments","Desserts","Fruits and vegetables","Fats and oils","Prepared / cooked meals",
+        "Animal products","Cereal products","Dairy products","Heat and steam","Air conditioning and refrigeration",
+        "Fossil fuels","Mobile fossil fuels","Organic fuels","Gaseous fossil fuels","Liquid fossil fuels",
+        "Solid fossil fuels",
+        "Air transport","Ship transport","Truck transport","Combined transport",
+        "Train transport",
+        "Air transport","Coach / Urban bus","Ship transport","Combined transport",
+        "E-Bike","Accommodation / Events","Soft mobility","Motorcycle / Scooter","Train transport",
+        "Public transport","Car",
+        "Agriculture","Global warming potential","Industrial processes",
+        "Commercial and industrial","Wastewater","Electrical equipment","Households and similar",
+        "Metal","Organic materials","Paper and cardboard","Batteries and accumulators","Plastics",
+        "Fugitive process emissions","Textiles","Glass",
+        "Electricity for electric vehicles","Renewables","Standard"
+    ],
+    "DescriptionCat2EN": [
+        "Goods purchase - sports","Goods purchase - buildings","Goods purchase - office items","Goods purchase - water",
+        "Goods purchase - appliances","Goods purchase - electricals","Goods purchase - machinery","Goods purchase - furniture",
+        "Goods purchase - textiles","Goods purchase - vehicles","Material purchase - construction","Material purchase - organic",
+        "Material purchase - paper","Material purchase - plastics","Material purchase - chemicals","Material purchase - refrigerants",
+        "Service - equipment rental","Service - building rental","Service - furniture rental","Service - vehicles",
+        "Service - info/culture","Service - catering","Service - healthcare","Service - crafts",
+        "Service - admin/consulting","Service - cleaning","Service - IT","Service - logistics",
+        "Service - marketing","Service - technical","Beverages - alcoholic","Beverages - non-alcoholic",
+        "Food condiments","Food desserts","Food fruits & vegetables","Food fats & oils","Prepared meals",
+        "Animal-based food","Cereal-based food","Dairy products","Heating - heat & steam","Heating - cooling/refrigeration",
+        "Fuel - fossil","Fuel - mobile fossil","Fuel - organic","Fuel - gaseous","Fuel - liquid","Fuel - solid",
+        "Freight transport - air","Freight transport - ship","Freight transport - truck","Freight transport - combined",
+        "Freight transport - train",
+        "Passenger transport - air","Passenger transport - bus","Passenger transport - ship",
+        "Passenger transport - combined","Passenger transport - e-bike","Passenger transport - accommodation/events",
+        "Passenger transport - soft mobility","Passenger transport - scooter/motorbike","Passenger transport - train",
+        "Passenger transport - public","Passenger transport - car",
+        "Emissions - agriculture","Emissions - warming potential",
+        "Emissions - industry",
+        "Waste - commercial/industrial","Waste - wastewater","Waste - electricals",
+        "Waste - households","Waste - metals","Waste - organics","Waste - paper","Waste - batteries",
+        "Waste - plastics","Waste - fugitive","Waste - textiles","Waste - glass",
+        "Electricity - EVs","Electricity - renewables","Electricity - standard"
+    ]
+})
+ref_data["combined"] = ref_data[["Cat1EN", "Cat2EN", "DescriptionCat2EN"]].agg(" ".join, axis=1)
+ref_embeddings = model.encode(ref_data["combined"].tolist())
+# Get unique categories for dropdowns
+unique_cat1 = sorted(ref_data["Cat1EN"].unique().tolist())
+unique_cat2 = sorted(ref_data["Cat2EN"].unique().tolist())
+# ==================================================
+# 💾 Storage for batch data and corrections
+# ==================================================
+batch_results = None  # Store current batch for editing
+corrections_data = []
+# ==================================================
+# 🔍 Core Classification Logic
+# ==================================================
+def classify_transaction(text: str, top_k=3):
+    """Classify with top-K results"""
+    if not text or text.strip() == "":
+        return []
+    trans_emb = model.encode([text])[0]
+    scores = util.pytorch_cos_sim(torch.tensor(trans_emb), torch.tensor(ref_embeddings)).flatten()
+    top_k_indices = scores.topk(min(top_k, len(scores))).indices.tolist()
+    top_k_scores = scores.topk(min(top_k, len(scores))).values.tolist()
     results = []
+    for idx, score in zip(top_k_indices, top_k_scores):
+        results.append({
+            "cat1": ref_data.iloc[idx]["Cat1EN"],
+            "cat2": ref_data.iloc[idx]["Cat2EN"],
+            "score": float(score)
+        })
     return results
+def classify_single(text: str):
+    """Single classification"""
+    if not text or text.strip() == "":
+        return "Please enter a transaction", "", 0.0
+    results = classify_transaction(text, top_k=1)
+    if not results:
+        return "No results", "", 0.0
+    return results[0]["cat1"], results[0]["cat2"], results[0]["score"]
 # ==================================================
+# 📂 Batch Mapping with Review Table
 # ==================================================
 def map_csv_for_review(file):
+    """Process CSV and return editable results"""
     global batch_results
     try:
         if file is None:
             return None, "⚠️ Please upload a CSV file"
         df = pd.read_csv(file.name)
         if "transaction" not in df.columns:
+            return None, "❌ Error: Missing column 'transaction'"
         results = []
         for idx, text in enumerate(df["transaction"]):
             if pd.isna(text) or str(text).strip() == "":
                 continue
+            top_matches = classify_transaction(str(text), top_k=1)
+            if not top_matches:
+                continue
             results.append({
                 "ID": idx + 1,
+                "Transaction": str(text)[:80],  # Truncate long text
+                "Cat1": top_matches[0]["cat1"],
+                "Cat2": top_matches[0]["cat2"],
+                "Confidence": round(top_matches[0]["score"], 3),
+                "Status": "✅ OK" if top_matches[0]["score"] > 0.8 else "⚠️ Review",
             })
+        if not results:
+            return None, "❌ No valid transactions found"
+        batch_results = pd.DataFrame(results)
+        status_msg = f"✅ Processed {len(batch_results)} transactions. Review and edit the 'Cat1' and 'Cat2' columns directly, then click 'Save Corrections'."
+        return batch_results, status_msg
     except Exception as e:
         return None, f"❌ Error: {str(e)}"
 # ==================================================
+# 💾 Save Corrections from Edited Table
 # ==================================================
+def save_batch_corrections(edited_df):
+    """Save corrections from the edited dataframe"""
+    global corrections_data, batch_results
+    try:
+        if edited_df is None or len(edited_df) == 0:
+            return "⚠️ No data to save", None, None
+        # Convert to DataFrame if needed
+        if not isinstance(edited_df, pd.DataFrame):
+            edited_df = pd.DataFrame(edited_df)
+        # Store original predictions for comparison
+        original_predictions = {}
+        if batch_results is not None:
+            for idx, row in batch_results.iterrows():
+                original_predictions[row["ID"]] = {
+                    "cat1": row["Cat1"],
+                    "cat2": row["Cat2"]
+                }
+        # Count corrections (where edited differs from original prediction)
+        corrections_count = 0
+        new_corrections = []
+        for idx, row in edited_df.iterrows():
+            transaction_id = row["ID"]
+            original_cat1 = original_predictions.get(transaction_id, {}).get("cat1", "")
+            original_cat2 = original_predictions.get(transaction_id, {}).get("cat2", "")
+            edited_cat1 = row.get("Cat1", original_cat1)
+            edited_cat2 = row.get("Cat2", original_cat2)
+            # Check if correction was made
+            if edited_cat1 != original_cat1 or edited_cat2 != original_cat2:
+                corrections_count += 1
+                new_corrections.append({
+                    "timestamp": datetime.now().isoformat(),
+                    "transaction": row["Transaction"],
+                    "original_cat1": original_cat1,
+                    "original_cat2": original_cat2,
+                    "corrected_cat1": edited_cat1,
+                    "corrected_cat2": edited_cat2,
+                    "confidence": row.get("Confidence", 0)
+                })
+        # Add to global corrections
+        corrections_data.extend(new_corrections)
+        # Save final CSV with corrections
+        tmp_dir = tempfile.mkdtemp()
+        # Save corrected results
+        corrected_file = os.path.join(tmp_dir, f"corrected_results_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv")
+        edited_df.to_csv(corrected_file, index=False)
+        # Save only corrections (training data)
+        training_file = None
+        if new_corrections:
+            training_df = pd.DataFrame(new_corrections)
+            training_file = os.path.join(tmp_dir, f"training_corrections_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv")
+            training_df.to_csv(training_file, index=False)
+        status_msg = f"✅ Saved!\n📊 Total rows: {len(edited_df)}\n✏️ Corrections made: {corrections_count}\n💾 Total corrections in memory: {len(corrections_data)}"
+        return status_msg, corrected_file, training_file
+    except Exception as e:
+        return f"❌ Error saving: {str(e)}", None, None
+# ==================================================
+# 📥 Export All Corrections
+# ==================================================
+def export_all_corrections():
+    """Export all accumulated corrections"""
+    if not corrections_data:
+        return None, "⚠️ No corrections to export yet"
+    df = pd.DataFrame(corrections_data)
+    tmp_dir = tempfile.mkdtemp()
+    output_path = os.path.join(tmp_dir, f"all_corrections_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv")
+    df.to_csv(output_path, index=False)
+    return output_path, f"✅ Exported {len(corrections_data)} total corrections for model training"
+def show_corrections():
+    """Display current corrections"""
+    if not corrections_data:
+        return pd.DataFrame({"message": ["No corrections saved yet. Upload a CSV and make corrections!"]})
+    return pd.DataFrame(corrections_data)
+# ==================================================
+# 🔧 Model Comparison
+# ==================================================
+def compare_models_fixed(hf_model_url, file):
+    """Compare models"""
+    try:
+        if not hf_model_url or not hf_model_url.strip():
+            return "❌ Please provide a HuggingFace model ID", None
+        if file is None:
+            return "❌ Please upload a CSV file", None
+        df = pd.read_csv(file.name)
+        if "transaction" not in df.columns:
+            return "❌ Missing column 'transaction' in CSV", None
+        print(f"Loading comparison model: {hf_model_url}")
+        try:
+            hf_model = SetFitModel.from_pretrained(hf_model_url.strip())
+        except Exception as e:
+            return f"❌ Failed to load model '{hf_model_url}': {str(e)}", None
+        print("Encoding reference data...")
+        local_embs = model.encode(ref_data["combined"].tolist())
+        hf_embs = hf_model.encode(ref_data["combined"].tolist())
+        results = []
+        max_samples = min(50, len(df))
+        print(f"Comparing {max_samples} samples...")
+        for idx, text in enumerate(df["transaction"][:max_samples]):
+            if pd.isna(text) or str(text).strip() == "":
+                continue
+            text = str(text)
+            trans_emb_local = model.encode([text])[0]
+            scores_local = util.pytorch_cos_sim(
+                torch.tensor(trans_emb_local),
+                torch.tensor(local_embs)
+            ).flatten()
+            best_idx_local = scores_local.argmax().item()
+            trans_emb_hf = hf_model.encode([text])[0]
+            scores_hf = util.pytorch_cos_sim(
+                torch.tensor(trans_emb_hf),
+                torch.tensor(hf_embs)
+            ).flatten()
+            best_idx_hf = scores_hf.argmax().item()
+            local_cat1 = ref_data.iloc[best_idx_local]["Cat1EN"]
+            local_cat2 = ref_data.iloc[best_idx_local]["Cat2EN"]
+            hf_cat1 = ref_data.iloc[best_idx_hf]["Cat1EN"]
+            hf_cat2 = ref_data.iloc[best_idx_hf]["Cat2EN"]
+            match = "✅" if (local_cat1 == hf_cat1 and local_cat2 == hf_cat2) else "❌"
+            results.append({
+                "Transaction": text[:50] + "..." if len(text) > 50 else text,
+                "Local_Cat1": local_cat1,
+                "Local_Cat2": local_cat2,
+                "Local_Conf": round(float(scores_local[best_idx_local]), 3),
+                "HF_Cat1": hf_cat1,
+                "HF_Cat2": hf_cat2,
+                "HF_Conf": round(float(scores_hf[best_idx_hf]), 3),
+                "Match": match
+            })
+        if not results:
+            return "❌ No valid transactions to compare", None
+        result_df = pd.DataFrame(results)
+        matches = (result_df["Match"] == "✅").sum()
+        match_rate = round(matches / len(result_df) * 100, 2)
+        summary = f"""✅ Comparison Complete!
+📊 Analyzed: {len(result_df)} transactions
+🎯 Match Rate: {match_rate}% ({matches}/{len(result_df)} matches)
+📝 Your Model: yassine123Z/EmissionFactor-mapper2-v2
+📝 Compared with: {hf_model_url}"""
+        return summary, result_df
+    except Exception as e:
+        import traceback
+        error_details = traceback.format_exc()
+        return f"❌ Error: {str(e)}\n\nDetails:\n{error_details}", None
 # ==================================================
+# 🖥️ Main Gradio UI
 # ==================================================
+with gr.Blocks(title="Transaction Category Classifier", theme=gr.themes.Soft()) as main_ui:
+    gr.Markdown("# 🧾 Transaction Category Classifier")
+    gr.Markdown("Upload CSV → Review predictions in editable table → Make corrections → Save for training")
+    with gr.Tab("🔹 Single Transaction"):
+        with gr.Row():
+            with gr.Column():
+                text_input = gr.Textbox(
+                    label="Transaction Description",
+                    placeholder="e.g., Plane ticket to Barcelona",
+                    lines=2
+                )
+                btn_submit = gr.Button("🔍 Classify", variant="primary")
+            with gr.Column():
+                cat1_out = gr.Textbox(label="Predicted Category 1", interactive=False)
+                cat2_out = gr.Textbox(label="Predicted Category 2", interactive=False)
+                score_out = gr.Number(label="Confidence Score")
+        btn_submit.click(
+            fn=classify_single,
+            inputs=text_input,
+            outputs=[cat1_out, cat2_out, score_out]
+        )
     with gr.Tab("📂 Batch Review & Correct"):
+        gr.Markdown("""
+        ## 📋 How to use:
+        1. **Upload CSV** with 'transaction' column
+        2. **Review & Edit** the table below - modify 'Cat1' and 'Cat2' columns directly
+        3. **Save** corrections for model training
+        """)
+        csv_input = gr.File(label="📁 Upload CSV file", file_types=[".csv"])
+        btn_process = gr.Button("🚀 Process & Load for Review", variant="primary", size="lg")
+        process_status = gr.Textbox(label="Status", lines=3, interactive=False)
+        gr.Markdown("### ✏️ Editable Results - Modify categories directly in the table:")
+        review_table = gr.DataFrame(
+            label="Review & Edit Predictions (Edit 'Cat1' and 'Cat2' columns directly)",
             interactive=True,
+            wrap=True
         )
+        gr.Markdown("**Instructions:** Click any cell in 'Cat1' or 'Cat2' to edit directly. Changes will be tracked as corrections.")
+        with gr.Row():
+            btn_save = gr.Button("💾 Save All Corrections", variant="primary", size="lg")
+            btn_download_corrected = gr.File(label="📥 Download Corrected Results")
+            btn_download_training = gr.File(label="📥 Download Training Data (Corrections Only)")
+        save_status = gr.Textbox(label="Save Status", lines=4, interactive=False)
         btn_process.click(
+            fn=map_csv_for_review,
+            inputs=csv_input,
             outputs=[review_table, process_status]
         )
         btn_save.click(
             fn=save_batch_corrections,
             inputs=review_table,
             outputs=[save_status, btn_download_corrected, btn_download_training]
         )
+    with gr.Tab("📊 All Corrections History"):
+        gr.Markdown("### 📜 View All Saved Corrections")
+        gr.Markdown("All corrections from all batches are stored here for model retraining.")
+        btn_refresh = gr.Button("🔄 Refresh List")
+        corrections_table = gr.DataFrame(label="All Corrections", interactive=False)
+        with gr.Row():
+            btn_export_all = gr.Button("📤 Export All Corrections", variant="primary")
+            export_all_file = gr.File(label="📥 Download All Corrections CSV")
+        export_status = gr.Textbox(label="Export Status", interactive=False)
+        btn_refresh.click(fn=show_corrections, outputs=corrections_table)
+        btn_export_all.click(fn=export_all_corrections, outputs=[export_all_file, export_status])
+    with gr.Tab("🔬 Compare Models"):
+        gr.Markdown("## 🔍 Model Comparison Tool")
+        gr.Markdown("Compare your model with any HuggingFace SetFit model.")
+        hf_model_url = gr.Textbox(
+            label="HuggingFace Model ID",
+            placeholder="e.g., sentence-transformers/all-MiniLM-L6-v2"
+        )
+        compare_file = gr.File(label="Upload Test CSV", file_types=[".csv"])
+        compare_btn = gr.Button("🔬 Compare Models", variant="primary")
+        compare_summary = gr.Textbox(label="Comparison Summary", lines=6, interactive=False)
+        compare_results = gr.DataFrame(label="Detailed Results", interactive=False)
+        compare_btn.click(
+            fn=compare_models_fixed,
+            inputs=[hf_model_url, compare_file],
+            outputs=[compare_summary, compare_results]
+        )
 # ==================================================
+# 🌐 Mount Gradio App
 # ==================================================
+app = gr.mount_gradio_app(app, main_ui, path="/")
+# ==================================================
+# 🧾 REST API Endpoints
+# ==================================================
+class TransactionsRequest(BaseModel):
+    transactions: List[str]
+@app.get("/health")
+def health_check():
+    return {
+        "status": "healthy",
+        "model_loaded": model is not None,
+        "corrections_count": len(corrections_data)
+    }
+@app.post("/map_categories")
+def map_categories(request: TransactionsRequest):
+    results = []
+    for text in request.transactions:
+        cat1, cat2, score = classify_single(text)
+        results.append({
+            "input_text": text,
+            "best_Cat1": cat1,
+            "best_Cat2": cat2,
+            "similarity": score
+        })
+    return {"matches": results}
+@app.get("/corrections")
+def get_corrections():
+    return {
+        "corrections": corrections_data,
+        "count": len(corrections_data)
+    }
+print("✅ App initialized successfully!")
+print("📍 All features in one interface: /")
+print("🏥 Health Check: /health")
+print("🔌 API: /map_categories, /corrections")