subbunanepalli commited on
Commit
6ae2d38
·
verified ·
1 Parent(s): cd15aba

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -37
app.py CHANGED
@@ -1,23 +1,23 @@
1
  from fastapi import FastAPI, HTTPException
2
- from pydantic import BaseModel
3
  from typing import Optional
4
  import pandas as pd
5
  import joblib
6
  import os
7
 
8
- # === Initialize FastAPI app ===
9
  app = FastAPI()
10
 
11
- # === Load TF-IDF vectorizer, models, and encoders ===
12
  TFIDF_PATH = "models/tfidf_vectorizer.pkl"
13
  MODEL_PATH = "models/xgb_models.pkl"
14
  ENCODER_PATH = "models/label_encoders.pkl"
15
 
 
16
  tfidf_vectorizer = joblib.load(TFIDF_PATH)
17
  models = joblib.load(MODEL_PATH)
18
  label_encoders = joblib.load(ENCODER_PATH)
19
 
20
- # === Define Input Schema ===
21
  class TransactionData(BaseModel):
22
  Transaction_Id: str
23
  Hit_Seq: int
@@ -53,7 +53,7 @@ class TransactionData(BaseModel):
53
  Next_Review_Date: str
54
  Sanction_Description: str
55
  Checker_Notes: str
56
- Sanction_Context: str
57
  Maker_Action: str
58
  Customer_ID: int
59
  Customer_Type: str
@@ -83,24 +83,34 @@ class TransactionData(BaseModel):
83
  Beneficial_Owner: str
84
  Sanctions_Exposure_History: bool
85
 
 
 
 
 
 
 
86
  class PredictionRequest(BaseModel):
87
  transaction_data: TransactionData
88
 
89
- class TextOnlyRequest(BaseModel):
90
- text_input: str
91
-
92
- # === Root Health Check ===
93
  @app.get("/")
94
  async def root():
95
- return {"status": "healthy", "message": "XGBoost TF-IDF Multi-Output API is running"}
 
 
 
 
 
 
 
 
 
96
 
97
- # === Predict using structured input ===
98
  @app.post("/predict")
99
  async def predict(request: PredictionRequest):
100
  try:
101
  input_data = pd.DataFrame([request.transaction_data.dict()])
102
 
103
- # === Concatenate important fields to form a context ===
104
  text_input = f"""
105
  Transaction ID: {input_data['Transaction_Id'].iloc[0]}
106
  Origin: {input_data['Origin'].iloc[0]}
@@ -145,39 +155,17 @@ async def predict(request: PredictionRequest):
145
  Purpose of Transaction: {input_data['Purpose_Of_Transaction'].iloc[0]}
146
  Beneficial Owner: {input_data['Beneficial_Owner'].iloc[0]}
147
  """
148
-
 
149
  X_tfidf = tfidf_vectorizer.transform([text_input])
150
  response = {}
151
 
 
152
  for label, model in models.items():
153
  proba = model.predict_proba(X_tfidf)[0]
154
  pred_idx = proba.argmax()
155
  decoded_label = label_encoders[label].inverse_transform([pred_idx])[0]
156
- response[label] = {
157
- "prediction": decoded_label,
158
- "probabilities": {
159
- label_encoders[label].classes_[i]: float(p)
160
- for i, p in enumerate(proba)
161
- }
162
- }
163
 
164
- return response
165
-
166
- except Exception as e:
167
- raise HTTPException(status_code=500, detail=str(e))
168
-
169
-
170
- # === Predict using raw text input ===
171
- @app.post("/predict_text")
172
- async def predict_from_text(request: TextOnlyRequest):
173
- try:
174
- X_tfidf = tfidf_vectorizer.transform([request.text_input])
175
- response = {}
176
-
177
- for label, model in models.items():
178
- proba = model.predict_proba(X_tfidf)[0]
179
- pred_idx = proba.argmax()
180
- decoded_label = label_encoders[label].inverse_transform([pred_idx])[0]
181
  response[label] = {
182
  "prediction": decoded_label,
183
  "probabilities": {
 
1
  from fastapi import FastAPI, HTTPException
2
+ from pydantic import BaseModel, Field, validator
3
  from typing import Optional
4
  import pandas as pd
5
  import joblib
6
  import os
7
 
 
8
  app = FastAPI()
9
 
10
+ # === File paths ===
11
  TFIDF_PATH = "models/tfidf_vectorizer.pkl"
12
  MODEL_PATH = "models/xgb_models.pkl"
13
  ENCODER_PATH = "models/label_encoders.pkl"
14
 
15
+ # === Load models ===
16
  tfidf_vectorizer = joblib.load(TFIDF_PATH)
17
  models = joblib.load(MODEL_PATH)
18
  label_encoders = joblib.load(ENCODER_PATH)
19
 
20
+ # === Input schema ===
21
  class TransactionData(BaseModel):
22
  Transaction_Id: str
23
  Hit_Seq: int
 
53
  Next_Review_Date: str
54
  Sanction_Description: str
55
  Checker_Notes: str
56
+ Sanction_Context: str = Field(..., min_length=5)
57
  Maker_Action: str
58
  Customer_ID: int
59
  Customer_Type: str
 
83
  Beneficial_Owner: str
84
  Sanctions_Exposure_History: bool
85
 
86
+ @validator("Sanction_Context")
87
+ def context_must_not_be_blank(cls, v):
88
+ if not v.strip():
89
+ raise ValueError("Sanction_Context must not be empty or whitespace.")
90
+ return v
91
+
92
  class PredictionRequest(BaseModel):
93
  transaction_data: TransactionData
94
 
 
 
 
 
95
  @app.get("/")
96
  async def root():
97
+ return {"status": "healthy", "message": "XGBoost TF-IDF API is running"}
98
+
99
+ @app.post("/validate")
100
+ async def validate(request: PredictionRequest):
101
+ """Only validate input. No prediction is made."""
102
+ try:
103
+ _ = request.transaction_data
104
+ return {"status": "success", "message": "Input is valid."}
105
+ except Exception as e:
106
+ raise HTTPException(status_code=400, detail=str(e))
107
 
 
108
  @app.post("/predict")
109
  async def predict(request: PredictionRequest):
110
  try:
111
  input_data = pd.DataFrame([request.transaction_data.dict()])
112
 
113
+ # === Concatenate important fields to form a context ===
114
  text_input = f"""
115
  Transaction ID: {input_data['Transaction_Id'].iloc[0]}
116
  Origin: {input_data['Origin'].iloc[0]}
 
155
  Purpose of Transaction: {input_data['Purpose_Of_Transaction'].iloc[0]}
156
  Beneficial Owner: {input_data['Beneficial_Owner'].iloc[0]}
157
  """
158
+
159
+ # === TF-IDF vectorization ===
160
  X_tfidf = tfidf_vectorizer.transform([text_input])
161
  response = {}
162
 
163
+ # === Predict for each target ===
164
  for label, model in models.items():
165
  proba = model.predict_proba(X_tfidf)[0]
166
  pred_idx = proba.argmax()
167
  decoded_label = label_encoders[label].inverse_transform([pred_idx])[0]
 
 
 
 
 
 
 
168
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
  response[label] = {
170
  "prediction": decoded_label,
171
  "probabilities": {