ganeshkonapalli commited on
Commit
aaa245d
·
verified ·
1 Parent(s): 0ab4a86

Update validate.py

Browse files
Files changed (1) hide show
  1. validate.py +75 -72
validate.py CHANGED
@@ -1,75 +1,78 @@
1
- import pandas as pd
2
- import pickle
3
- import os
4
- from sklearn.metrics import classification_report
5
- from sklearn.model_selection import train_test_split
6
 
7
- # === File Paths ===
8
- DATA_PATH = "data.csv" # Change this to your actual CSV
9
- TEXT_COLUMNS = [
10
- "Transaction_Id", "Origin", "Designation", "Keywords", "Name", "SWIFT_Tag", "Currency",
11
- "Entity", "Message", "City", "Country", "State", "Hit_Type", "Record_Matching_String",
12
- "WatchList_Match_String", "Payment_Sender_Name", "Payment_Reciever_Name", "Swift_Message_Type",
13
- "Text_Sanction_Data", "Matched_Sanctioned_Entity", "Red_Flag_Reason", "Risk_Level",
14
- "Risk_Score", "CDD_Level", "PEP_Status", "Sanction_Description", "Checker_Notes",
15
- "Sanction_Context", "Maker_Action", "Customer_Type", "Industry", "Transaction_Type",
16
- "Transaction_Channel", "Geographic_Origin", "Geographic_Destination", "Risk_Category",
17
- "Risk_Drivers", "Alert_Status", "Investigation_Outcome", "Source_Of_Funds",
18
- "Purpose_Of_Transaction", "Beneficial_Owner"
19
- ]
20
-
21
- LABEL_COLUMNS = [
22
- "Red_Flag_Reason", "Maker_Action", "Escalation_Level",
23
- "Risk_Category", "Risk_Drivers", "Investigation_Outcome"
24
- ]
25
-
26
- VECTORIZER_PATH = "tfidf_vectorizer (2).pkl"
27
- MODELS_PATH = "xgb_model.pkl"
28
- ENCODERS_PATH = "label_encoders (5).pkl"
29
-
30
- RANDOM_STATE = 42
31
- TEST_SIZE = 0.2
32
-
33
- # === Utils ===
34
- def load_pickle(path):
35
- with open(path, "rb") as f:
36
- return pickle.load(f)
37
-
38
- # === Main ===
39
- def validate():
40
- print("📥 Loading data...")
41
- df = pd.read_csv(DATA_PATH)
42
- df.dropna(subset=["Sanction_Context"] + LABEL_COLUMNS, inplace=True)
43
-
44
- print("🧠 Loading vectorizer, models, encoders...")
45
- tfidf = load_pickle(VECTORIZER_PATH)
46
- models = load_pickle(MODELS_PATH)
47
- label_encoders = load_pickle(ENCODERS_PATH)
48
-
49
- print("📊 TF-IDF transforming text...")
50
- def concat_text(row):
51
- return "\n".join([str(row[col]) for col in TEXT_COLUMNS if col in row and pd.notna(row[col])])
52
-
53
- df["combined_text"] = df.apply(concat_text, axis=1)
54
- X = tfidf.transform(df["combined_text"])
55
-
56
- for label in LABEL_COLUMNS:
57
- print(f"\n🔍 Validating: {label}")
58
- y_raw = df[label]
59
- encoder = label_encoders[label]
60
- y_encoded = encoder.transform(y_raw)
61
-
62
- _, X_test, _, y_test = train_test_split(
63
- X, y_encoded, test_size=TEST_SIZE, random_state=RANDOM_STATE
64
- )
65
-
66
- model = models[label]
67
- y_pred = model.predict(X_test)
68
-
69
- print(classification_report(
70
- encoder.inverse_transform(y_test),
71
- encoder.inverse_transform(y_pred)
72
- ))
73
 
74
  if __name__ == "__main__":
75
- validate()
 
 
 
 
 
 
1
+ from pydantic import ValidationError
2
+ from app import TransactionData # Reuse model from app.py
3
+ import json
 
 
4
 
5
+ # Example JSON input (update as needed)
6
+ sample_input = {
7
+ "Transaction_Id": "T123456789",
8
+ "Hit_Seq": 1,
9
+ "Hit_Id_List": "HIT001",
10
+ "Origin": "USA",
11
+ "Designation": "Manager",
12
+ "Keywords": "Payment",
13
+ "Name": "John Doe",
14
+ "SWIFT_Tag": "TAG001",
15
+ "Currency": "USD",
16
+ "Entity": "CompanyX",
17
+ "Message": "This is a test message",
18
+ "City": "New York",
19
+ "Country": "USA",
20
+ "State": "NY",
21
+ "Hit_Type": "Name Match",
22
+ "Record_Matching_String": "Matching String Example",
23
+ "WatchList_Match_String": "Watchlist Match Example",
24
+ "Payment_Sender_Name": "Alice",
25
+ "Payment_Reciever_Name": "Bob",
26
+ "Swift_Message_Type": "MT103",
27
+ "Text_Sanction_Data": "Sanction text here",
28
+ "Matched_Sanctioned_Entity": "Entity XYZ",
29
+ "Is_Match": 1,
30
+ "Red_Flag_Reason": "High Risk",
31
+ "Risk_Level": "High",
32
+ "Risk_Score": 85.0,
33
+ "Risk_Score_Description": "High Risk Score",
34
+ "CDD_Level": "Enhanced",
35
+ "PEP_Status": "No",
36
+ "Value_Date": "2023-10-01",
37
+ "Last_Review_Date": "2023-11-01",
38
+ "Next_Review_Date": "2024-11-01",
39
+ "Sanction_Description": "Some sanctions",
40
+ "Checker_Notes": "Verified manually",
41
+ "Sanction_Context": "Transaction under review",
42
+ "Maker_Action": "Escalated",
43
+ "Customer_ID": 1001,
44
+ "Customer_Type": "Corporate",
45
+ "Industry": "Finance",
46
+ "Transaction_Date_Time": "2023-10-01T12:00:00",
47
+ "Transaction_Type": "Transfer",
48
+ "Transaction_Channel": "Online",
49
+ "Originating_Bank": "Bank A",
50
+ "Beneficiary_Bank": "Bank B",
51
+ "Geographic_Origin": "USA",
52
+ "Geographic_Destination": "UK",
53
+ "Match_Score": 0.95,
54
+ "Match_Type": "Fuzzy",
55
+ "Sanctions_List_Version": "v2023.10",
56
+ "Screening_Date_Time": "2023-10-01T10:00:00",
57
+ "Risk_Category": "Fraud",
58
+ "Risk_Drivers": "Transaction Volume",
59
+ "Alert_Status": "Open",
60
+ "Investigation_Outcome": "Pending",
61
+ "Case_Owner_Analyst": "Analyst1",
62
+ "Escalation_Level": "Level 2",
63
+ "Escalation_Date": "2023-10-02",
64
+ "Regulatory_Reporting_Flags": True,
65
+ "Audit_Trail_Timestamp": "2023-10-01T10:15:00",
66
+ "Source_Of_Funds": "Internal",
67
+ "Purpose_Of_Transaction": "Loan Repayment",
68
+ "Beneficial_Owner": "OwnerX",
69
+ "Sanctions_Exposure_History": False
70
+ }
71
 
72
  if __name__ == "__main__":
73
+ try:
74
+ validated = TransactionData(**sample_input)
75
+ print(" Input is valid.")
76
+ except ValidationError as e:
77
+ print(" Validation failed:")
78
+ print(e.json())