Spaces:

subbunanepalli
/

XGB_VALIDATION

No application file

App Files Files Community

subbunanepalli commited on Jun 14, 2025

Commit

6ae2d38

verified ·

1 Parent(s): cd15aba

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -37

app.py CHANGED Viewed

@@ -1,23 +1,23 @@
 from fastapi import FastAPI, HTTPException
-from pydantic import BaseModel
 from typing import Optional
 import pandas as pd
 import joblib
 import os
-# === Initialize FastAPI app ===
 app = FastAPI()
-# === Load TF-IDF vectorizer, models, and encoders ===
 TFIDF_PATH = "models/tfidf_vectorizer.pkl"
 MODEL_PATH = "models/xgb_models.pkl"
 ENCODER_PATH = "models/label_encoders.pkl"
 tfidf_vectorizer = joblib.load(TFIDF_PATH)
 models = joblib.load(MODEL_PATH)
 label_encoders = joblib.load(ENCODER_PATH)
-# === Define Input Schema ===
 class TransactionData(BaseModel):
     Transaction_Id: str
     Hit_Seq: int
@@ -53,7 +53,7 @@ class TransactionData(BaseModel):
     Next_Review_Date: str
     Sanction_Description: str
     Checker_Notes: str
-    Sanction_Context: str
     Maker_Action: str
     Customer_ID: int
     Customer_Type: str
@@ -83,24 +83,34 @@ class TransactionData(BaseModel):
     Beneficial_Owner: str
     Sanctions_Exposure_History: bool
 class PredictionRequest(BaseModel):
     transaction_data: TransactionData
-class TextOnlyRequest(BaseModel):
-    text_input: str
-# === Root Health Check ===
 @app.get("/")
 async def root():
-    return {"status": "healthy", "message": "XGBoost TF-IDF Multi-Output API is running"}
-# === Predict using structured input ===
 @app.post("/predict")
 async def predict(request: PredictionRequest):
     try:
         input_data = pd.DataFrame([request.transaction_data.dict()])
-        # === Concatenate important fields to form a context ===
         text_input = f"""
         Transaction ID: {input_data['Transaction_Id'].iloc[0]}
         Origin: {input_data['Origin'].iloc[0]}
@@ -145,39 +155,17 @@ async def predict(request: PredictionRequest):
         Purpose of Transaction: {input_data['Purpose_Of_Transaction'].iloc[0]}
         Beneficial Owner: {input_data['Beneficial_Owner'].iloc[0]}
         """
         X_tfidf = tfidf_vectorizer.transform([text_input])
         response = {}
         for label, model in models.items():
             proba = model.predict_proba(X_tfidf)[0]
             pred_idx = proba.argmax()
             decoded_label = label_encoders[label].inverse_transform([pred_idx])[0]
-            response[label] = {
-                "prediction": decoded_label,
-                "probabilities": {
-                    label_encoders[label].classes_[i]: float(p)
-                    for i, p in enumerate(proba)
-                }
-            }
-        return response
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
-# === Predict using raw text input ===
-@app.post("/predict_text")
-async def predict_from_text(request: TextOnlyRequest):
-    try:
-        X_tfidf = tfidf_vectorizer.transform([request.text_input])
-        response = {}
-        for label, model in models.items():
-            proba = model.predict_proba(X_tfidf)[0]
-            pred_idx = proba.argmax()
-            decoded_label = label_encoders[label].inverse_transform([pred_idx])[0]
             response[label] = {
                 "prediction": decoded_label,
                 "probabilities": {

 from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel, Field, validator
 from typing import Optional
 import pandas as pd
 import joblib
 import os
 app = FastAPI()
+# === File paths ===
 TFIDF_PATH = "models/tfidf_vectorizer.pkl"
 MODEL_PATH = "models/xgb_models.pkl"
 ENCODER_PATH = "models/label_encoders.pkl"
+# === Load models ===
 tfidf_vectorizer = joblib.load(TFIDF_PATH)
 models = joblib.load(MODEL_PATH)
 label_encoders = joblib.load(ENCODER_PATH)
+# === Input schema ===
 class TransactionData(BaseModel):
     Transaction_Id: str
     Hit_Seq: int
     Next_Review_Date: str
     Sanction_Description: str
     Checker_Notes: str
+    Sanction_Context: str = Field(..., min_length=5)
     Maker_Action: str
     Customer_ID: int
     Customer_Type: str
     Beneficial_Owner: str
     Sanctions_Exposure_History: bool
+    @validator("Sanction_Context")
+    def context_must_not_be_blank(cls, v):
+        if not v.strip():
+            raise ValueError("Sanction_Context must not be empty or whitespace.")
+        return v
 class PredictionRequest(BaseModel):
     transaction_data: TransactionData
 @app.get("/")
 async def root():
+    return {"status": "healthy", "message": "XGBoost TF-IDF API is running"}
+@app.post("/validate")
+async def validate(request: PredictionRequest):
+    """Only validate input. No prediction is made."""
+    try:
+        _ = request.transaction_data
+        return {"status": "success", "message": "Input is valid."}
+    except Exception as e:
+        raise HTTPException(status_code=400, detail=str(e))
 @app.post("/predict")
 async def predict(request: PredictionRequest):
     try:
         input_data = pd.DataFrame([request.transaction_data.dict()])
+         # === Concatenate important fields to form a context ===
         text_input = f"""
         Transaction ID: {input_data['Transaction_Id'].iloc[0]}
         Origin: {input_data['Origin'].iloc[0]}
         Purpose of Transaction: {input_data['Purpose_Of_Transaction'].iloc[0]}
         Beneficial Owner: {input_data['Beneficial_Owner'].iloc[0]}
         """
+        # === TF-IDF vectorization ===
         X_tfidf = tfidf_vectorizer.transform([text_input])
         response = {}
+        # === Predict for each target ===
         for label, model in models.items():
             proba = model.predict_proba(X_tfidf)[0]
             pred_idx = proba.argmax()
             decoded_label = label_encoders[label].inverse_transform([pred_idx])[0]
             response[label] = {
                 "prediction": decoded_label,
                 "probabilities": {