Spaces:
Sleeping
Sleeping
| import torch | |
| import os | |
| # --- Paths --- | |
| DATA_PATH = '/kaggle/input/synthesis-data/synthetic_transactions_samples_5000.csv' | |
| TOKENIZER_PATH = './tokenizer_roberta/' | |
| LABEL_ENCODERS_PATH = './label_encoders.pkl' | |
| MODEL_SAVE_DIR = './saved_models/' | |
| PREDICTIONS_SAVE_DIR = './predictions/' | |
| # --- Data Columns --- | |
| TEXT_COLUMN = "Sanction_Context" | |
| LABEL_COLUMNS = [ | |
| "Red_Flag_Reason", | |
| "Maker_Action", | |
| "Escalation_Level", | |
| "Risk_Category", | |
| "Risk_Drivers", | |
| "Investigation_Outcome" | |
| ] | |
| METADATA_COLUMNS = [] | |
| # --- Model Hyperparameters --- | |
| MAX_LEN = 128 | |
| BATCH_SIZE = 16 | |
| LEARNING_RATE = 2e-5 | |
| NUM_EPOCHS = 3 | |
| DROPOUT_RATE = 0.3 | |
| # --- Device Configuration --- | |
| DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| # --- Model Names --- | |
| BERT_MODEL_NAME = 'bert-base-uncased' | |
| ROBERTA_MODEL_NAME = 'roberta-base' | |
| DEBERTA_MODEL_NAME = 'microsoft/deberta-base' | |
| # --- TF-IDF --- | |
| TFIDF_MAX_FEATURES = 5000 | |
| # --- Strategy Definitions --- | |
| FIELD_STRATEGIES = { | |
| "Maker_Action": { | |
| "loss": "focal_loss", | |
| "enhancements": ["action_templates", "context_prompt_tuning"] | |
| }, | |
| "Risk_Category": { | |
| "enhancements": ["numerical_metadata", "transaction_patterns"] | |
| }, | |
| "Escalation_Level": { | |
| "enhancements": ["class_balancing", "policy_keyword_patterns"] | |
| }, | |
| "Investigation_Outcome": { | |
| "type": "classification_or_generation" | |
| } | |
| } | |
| # --- Ensure directories exist --- | |
| os.makedirs(MODEL_SAVE_DIR, exist_ok=True) | |
| os.makedirs(PREDICTIONS_SAVE_DIR, exist_ok=True) | |
| os.makedirs(TOKENIZER_PATH, exist_ok=True) | |
| # β Set the active model for training/deployment | |
| MODEL_NAME = ROBERTA_MODEL_NAME | |