Spaces:
Sleeping
Sleeping
| import os | |
| # --- Paths --- | |
| BASE_DIR = os.path.abspath(os.path.dirname(__file__)) | |
| # Path to dataset | |
| DATA_PATH = os.path.join(BASE_DIR, "data", "synthetic_transactions_samples_5000.csv") | |
| # Directory to save models | |
| MODEL_SAVE_DIR = os.path.join(BASE_DIR, "models") | |
| os.makedirs(MODEL_SAVE_DIR, exist_ok=True) | |
| # Save paths for Logistic Regression model + artifacts | |
| MODEL_PATH = os.path.join(MODEL_SAVE_DIR, "logreg_model.pkl") # β Logistic Regression model | |
| TFIDF_VECTORIZER_PATH = os.path.join(MODEL_SAVE_DIR, "tfidf_vectorizer.pkl") | |
| LABEL_ENCODERS_PATH = os.path.join(MODEL_SAVE_DIR, "label_encoders.pkl") | |
| # --- Text & Label Columns --- | |
| TEXT_COLUMN = "Sanction_Context" | |
| LABEL_COLUMNS = [ | |
| "Red_Flag_Reason", | |
| "Maker_Action", | |
| "Escalation_Level", | |
| "Risk_Category", | |
| "Risk_Drivers", | |
| "Investigation_Outcome" | |
| ] | |
| # --- TF-IDF Settings --- | |
| TFIDF_MAX_FEATURES = 5000 | |
| NGRAM_RANGE = (1, 2) | |
| USE_STOPWORDS = True # English stopwords will be removed if True | |
| # --- Train/Test Split --- | |
| RANDOM_STATE = 42 | |
| TEST_SIZE = 0.2 | |