ganeshkonapalli commited on
Commit
05e8dcd
·
verified ·
1 Parent(s): db1821b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -4
app.py CHANGED
@@ -15,7 +15,7 @@ LABEL_COLUMNS = [
15
  "Risk_Category", "Risk_Drivers", "Investigation_Outcome"
16
  ]
17
  TEXT_COLUMN = "Sanction_Context"
18
- MODEL_DIR = "/tmp" # or "/data" if preferred
19
  MODEL_PATH = os.path.join(MODEL_DIR, "logreg_model.pkl")
20
  TFIDF_PATH = os.path.join(MODEL_DIR, "tfidf_vectorizer.pkl")
21
  ENCODERS_PATH = os.path.join(MODEL_DIR, "label_encoders.pkl")
@@ -23,10 +23,9 @@ ENCODERS_PATH = os.path.join(MODEL_DIR, "label_encoders.pkl")
23
  # --- FastAPI App ---
24
  app = FastAPI()
25
 
26
- # --- Input Schema ---
27
  class TransactionData(BaseModel):
28
  Sanction_Context: str
29
- # Add all required metadata fields here if needed
30
 
31
  class PredictionRequest(BaseModel):
32
  transaction_data: TransactionData
@@ -44,19 +43,23 @@ def train_model(input: DataPathInput):
44
  df = pd.read_csv(input.data_path)
45
  df.dropna(subset=[TEXT_COLUMN] + LABEL_COLUMNS, inplace=True)
46
 
 
47
  label_encoders = {}
48
  for col in LABEL_COLUMNS:
49
  le = LabelEncoder()
50
  df[col] = le.fit_transform(df[col])
51
  label_encoders[col] = le
52
 
 
53
  tfidf = TfidfVectorizer(max_features=1000, ngram_range=(1, 2), stop_words="english")
54
  X_vec = tfidf.fit_transform(df[TEXT_COLUMN])
55
  y = df[LABEL_COLUMNS]
56
 
 
57
  model = MultiOutputClassifier(LogisticRegression(max_iter=1000))
58
  model.fit(X_vec, y)
59
 
 
60
  joblib.dump(model, MODEL_PATH)
61
  joblib.dump(tfidf, TFIDF_PATH)
62
  joblib.dump(label_encoders, ENCODERS_PATH)
@@ -100,7 +103,7 @@ def test_model(input: DataPathInput):
100
  }
101
  decoded_preds.append(decoded)
102
 
103
- return {"predictions": decoded_preds[:5]}
104
  except Exception as e:
105
  raise HTTPException(status_code=500, detail=str(e))
106
 
 
15
  "Risk_Category", "Risk_Drivers", "Investigation_Outcome"
16
  ]
17
  TEXT_COLUMN = "Sanction_Context"
18
+ MODEL_DIR = "/tmp"
19
  MODEL_PATH = os.path.join(MODEL_DIR, "logreg_model.pkl")
20
  TFIDF_PATH = os.path.join(MODEL_DIR, "tfidf_vectorizer.pkl")
21
  ENCODERS_PATH = os.path.join(MODEL_DIR, "label_encoders.pkl")
 
23
  # --- FastAPI App ---
24
  app = FastAPI()
25
 
26
+ # --- Schemas ---
27
  class TransactionData(BaseModel):
28
  Sanction_Context: str
 
29
 
30
  class PredictionRequest(BaseModel):
31
  transaction_data: TransactionData
 
43
  df = pd.read_csv(input.data_path)
44
  df.dropna(subset=[TEXT_COLUMN] + LABEL_COLUMNS, inplace=True)
45
 
46
+ # Label Encoding
47
  label_encoders = {}
48
  for col in LABEL_COLUMNS:
49
  le = LabelEncoder()
50
  df[col] = le.fit_transform(df[col])
51
  label_encoders[col] = le
52
 
53
+ # TF-IDF
54
  tfidf = TfidfVectorizer(max_features=1000, ngram_range=(1, 2), stop_words="english")
55
  X_vec = tfidf.fit_transform(df[TEXT_COLUMN])
56
  y = df[LABEL_COLUMNS]
57
 
58
+ # Train Model
59
  model = MultiOutputClassifier(LogisticRegression(max_iter=1000))
60
  model.fit(X_vec, y)
61
 
62
+ # Save
63
  joblib.dump(model, MODEL_PATH)
64
  joblib.dump(tfidf, TFIDF_PATH)
65
  joblib.dump(label_encoders, ENCODERS_PATH)
 
103
  }
104
  decoded_preds.append(decoded)
105
 
106
+ return {"predictions": decoded_preds[:5]} # limit output
107
  except Exception as e:
108
  raise HTTPException(status_code=500, detail=str(e))
109