subbunanepalli commited on
Commit
6b1d291
·
verified ·
1 Parent(s): 92d8426

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +188 -0
app.py ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from pydantic import BaseModel, Field, validator
3
+ from typing import Optional
4
+ import pandas as pd
5
+ import joblib
6
+
7
+ app = FastAPI()
8
+
9
+ # Load models
10
+ TFIDF_PATH = "models/tfidf_vectorizer.pkl"
11
+ MODEL_PATH = "models/logreg_model.pkl"
12
+ ENCODER_PATH = "models/label_encoders.pkl"
13
+
14
+ tfidf_vectorizer = joblib.load(TFIDF_PATH)
15
+ models = joblib.load(MODEL_PATH)
16
+ label_encoders = joblib.load(ENCODER_PATH)
17
+
18
+ # === Input schema ===
19
+ class TransactionData(BaseModel):
20
+ Transaction_Id: str
21
+ Hit_Seq: int
22
+ Hit_Id_List: str
23
+ Origin: str
24
+ Designation: str
25
+ Keywords: str
26
+ Name: str
27
+ SWIFT_Tag: str
28
+ Currency: str
29
+ Entity: str
30
+ Message: str
31
+ City: str
32
+ Country: str
33
+ State: str
34
+ Hit_Type: str
35
+ Record_Matching_String: str
36
+ WatchList_Match_String: str
37
+ Payment_Sender_Name: Optional[str] = ""
38
+ Payment_Reciever_Name: Optional[str] = ""
39
+ Swift_Message_Type: str
40
+ Text_Sanction_Data: str
41
+ Matched_Sanctioned_Entity: str
42
+ Is_Match: int
43
+ Red_Flag_Reason: str
44
+ Risk_Level: str
45
+ Risk_Score: float
46
+ Risk_Score_Description: str
47
+ CDD_Level: str
48
+ PEP_Status: str
49
+ Value_Date: str
50
+ Last_Review_Date: str
51
+ Next_Review_Date: str
52
+ Sanction_Description: str
53
+ Checker_Notes: str
54
+ Sanction_Context: str
55
+ Maker_Action: str
56
+ Customer_ID: int
57
+ Customer_Type: str
58
+ Industry: str
59
+ Transaction_Date_Time: str
60
+ Transaction_Type: str
61
+ Transaction_Channel: str
62
+ Originating_Bank: str
63
+ Beneficiary_Bank: str
64
+ Geographic_Origin: str
65
+ Geographic_Destination: str
66
+ Match_Score: float
67
+ Match_Type: str
68
+ Sanctions_List_Version: str
69
+ Screening_Date_Time: str
70
+ Risk_Category: str
71
+ Risk_Drivers: str
72
+ Alert_Status: str
73
+ Investigation_Outcome: str
74
+ Case_Owner_Analyst: str
75
+ Escalation_Level: str
76
+ Escalation_Date: str
77
+ Regulatory_Reporting_Flags: bool
78
+ Audit_Trail_Timestamp: str
79
+ Source_Of_Funds: str
80
+ Purpose_Of_Transaction: str
81
+ Beneficial_Owner: str
82
+ Sanctions_Exposure_History: bool
83
+
84
+ class PredictionRequest(BaseModel):
85
+ transaction_data: TransactionData
86
+
87
+ @app.get("/")
88
+ def root():
89
+ return {"status": "healthy", "message": "TF-IDF + LogReg API running"}
90
+
91
+ @app.post("/validate")
92
+ def validate_input(request: PredictionRequest):
93
+ return {"message": " Input is valid"}
94
+
95
+ @app.post("/predict")
96
+ def predict(request: PredictionRequest):
97
+ try:
98
+ input_df = pd.DataFrame([request.transaction_data.dict()])
99
+
100
+ # Create text_input for TF-IDF
101
+ text_input = f"""
102
+ Transaction ID: {input_df['Transaction_Id'].iloc[0]}
103
+ Origin: {input_df['Origin'].iloc[0]}
104
+ Designation: {input_df['Designation'].iloc[0]}
105
+ Keywords: {input_df['Keywords'].iloc[0]}
106
+ Name: {input_df['Name'].iloc[0]}
107
+ SWIFT Tag: {input_df['SWIFT_Tag'].iloc[0]}
108
+ Currency: {input_df['Currency'].iloc[0]}
109
+ Entity: {input_df['Entity'].iloc[0]}
110
+ Message: {input_df['Message'].iloc[0]}
111
+ City: {input_df['City'].iloc[0]}
112
+ Country: {input_df['Country'].iloc[0]}
113
+ State: {input_df['State'].iloc[0]}
114
+ Hit Type: {input_df['Hit_Type'].iloc[0]}
115
+ Record Matching String: {input_df['Record_Matching_String'].iloc[0]}
116
+ WatchList Match String: {input_df['WatchList_Match_String'].iloc[0]}
117
+ Payment Sender: {input_df['Payment_Sender_Name'].iloc[0]}
118
+ Payment Receiver: {input_df['Payment_Reciever_Name'].iloc[0]}
119
+ Swift Message Type: {input_df['Swift_Message_Type'].iloc[0]}
120
+ Text Sanction Data: {input_df['Text_Sanction_Data'].iloc[0]}
121
+ Matched Sanctioned Entity: {input_df['Matched_Sanctioned_Entity'].iloc[0]}
122
+ Red Flag Reason: {input_df['Red_Flag_Reason'].iloc[0]}
123
+ Risk Level: {input_df['Risk_Level'].iloc[0]}
124
+ Risk Score: {input_df['Risk_Score'].iloc[0]}
125
+ CDD Level: {input_df['CDD_Level'].iloc[0]}
126
+ PEP Status: {input_df['PEP_Status'].iloc[0]}
127
+ Sanction Description: {input_df['Sanction_Description'].iloc[0]}
128
+ Checker Notes: {input_df['Checker_Notes'].iloc[0]}
129
+ Sanction Context: {input_df['Sanction_Context'].iloc[0]}
130
+ Maker Action: {input_df['Maker_Action'].iloc[0]}
131
+ Customer Type: {input_df['Customer_Type'].iloc[0]}
132
+ Industry: {input_df['Industry'].iloc[0]}
133
+ Transaction Type: {input_df['Transaction_Type'].iloc[0]}
134
+ Transaction Channel: {input_df['Transaction_Channel'].iloc[0]}
135
+ Geographic Origin: {input_df['Geographic_Origin'].iloc[0]}
136
+ Geographic Destination: {input_df['Geographic_Destination'].iloc[0]}
137
+ Risk Category: {input_df['Risk_Category'].iloc[0]}
138
+ Risk Drivers: {input_df['Risk_Drivers'].iloc[0]}
139
+ Alert Status: {input_df['Alert_Status'].iloc[0]}
140
+ Investigation Outcome: {input_df['Investigation_Outcome'].iloc[0]}
141
+ Source of Funds: {input_df['Source_Of_Funds'].iloc[0]}
142
+ Purpose of Transaction: {input_df['Purpose_Of_Transaction'].iloc[0]}
143
+ Beneficial Owner: {input_df['Beneficial_Owner'].iloc[0]}
144
+ """
145
+
146
+ # Vectorize and predict
147
+ X_tfidf = tfidf_vectorizer.transform([text_input])
148
+ response = {}
149
+
150
+ for label, model in models.items():
151
+ proba = model.predict_proba(X_tfidf)[0]
152
+ pred_idx = proba.argmax()
153
+ decoded = label_encoders[label].inverse_transform([pred_idx])[0]
154
+ response[label] = {
155
+ "prediction": decoded,
156
+ "probabilities": {
157
+ label_encoders[label].classes_[i]: float(p)
158
+ for i, p in enumerate(proba)
159
+ }
160
+ }
161
+
162
+ return response
163
+
164
+ except Exception as e:
165
+ raise HTTPException(status_code=500, detail=str(e))
166
+
167
+ @app.get("/test")
168
+ def test_model():
169
+ try:
170
+ sample_text = "Transaction flagged due to suspicious entity and origin. Needs compliance check."
171
+ X_tfidf = tfidf_vectorizer.transform([sample_text])
172
+ response = {}
173
+
174
+ for label, model in models.items():
175
+ proba = model.predict_proba(X_tfidf)[0]
176
+ pred_idx = proba.argmax()
177
+ decoded = label_encoders[label].inverse_transform([pred_idx])[0]
178
+ response[label] = {
179
+ "prediction": decoded,
180
+ "probabilities": {
181
+ label_encoders[label].classes_[i]: float(p)
182
+ for i, p in enumerate(proba)
183
+ }
184
+ }
185
+
186
+ return {"sample_input": sample_text, "predictions": response}
187
+ except Exception as e:
188
+ raise HTTPException(status_code=500, detail=str(e))