Spaces:

point9
/

Deberta_model_Test

Sleeping

App Files Files Community

subbunanepalli commited on Jun 11

Commit

ccba496

verified ·

1 Parent(s): 356cf69

Upload 4 files

Browse files

Files changed (4) hide show

config.py +61 -0
deberta_model.py +22 -0
label_encoders.pkl +3 -0
requirements.txt +8 -0

config.py ADDED Viewed

	@@ -0,0 +1,61 @@

+import torch
+import os
+# --- Paths ---
+DATA_PATH = '/kaggle/input/synthesis-data/synthetic_transactions_samples_5000.csv'
+TOKENIZER_PATH = './tokenizer/'
+LABEL_ENCODERS_PATH = './label_encoders.pkl'
+MODEL_SAVE_DIR = './saved_models/'
+PREDICTIONS_SAVE_DIR = './predictions/'
+# --- Data Columns ---
+TEXT_COLUMN = "Sanction_Context"
+LABEL_COLUMNS = [
+    "Red_Flag_Reason",
+    "Maker_Action",
+    "Escalation_Level",
+    "Risk_Category",
+    "Risk_Drivers",
+    "Investigation_Outcome"
+]
+METADATA_COLUMNS = []
+# --- Model Hyperparameters ---
+MAX_LEN = 128
+BATCH_SIZE = 16
+LEARNING_RATE = 2e-5
+NUM_EPOCHS = 3
+DROPOUT_RATE = 0.3
+# --- Device Configuration ---
+DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# --- Model Names ---
+BERT_MODEL_NAME = 'bert-base-uncased'
+ROBERTA_MODEL_NAME = 'roberta-base'
+DEBERTA_MODEL_NAME = 'microsoft/deberta-base'
+# --- TF-IDF ---
+TFIDF_MAX_FEATURES = 5000
+# --- Optional Strategy Definitions ---
+FIELD_STRATEGIES = {
+    "Maker_Action": {
+        "loss": "focal_loss",
+        "enhancements": ["action_templates", "context_prompt_tuning"]
+    },
+    "Risk_Category": {
+        "enhancements": ["numerical_metadata", "transaction_patterns"]
+    },
+    "Escalation_Level": {
+        "enhancements": ["class_balancing", "policy_keyword_patterns"]
+    },
+    "Investigation_Outcome": {
+        "type": "classification_or_generation"
+    }
+}
+# Ensure save directories exist
+os.makedirs(MODEL_SAVE_DIR, exist_ok=True)
+os.makedirs(PREDICTIONS_SAVE_DIR, exist_ok=True)
+os.makedirs(TOKENIZER_PATH, exist_ok=True)

deberta_model.py ADDED Viewed

	@@ -0,0 +1,22 @@

+import torch
+import torch.nn as nn
+from transformers import DebertaModel
+from config import DROPOUT_RATE, DEBERTA_MODEL_NAME
+class DebertaMultiOutputModel(nn.Module):
+    tokenizer_name = DEBERTA_MODEL_NAME
+    def __init__(self, num_labels):
+        super(DebertaMultiOutputModel, self).__init__()
+        self.deberta = DebertaModel.from_pretrained(DEBERTA_MODEL_NAME)
+        self.dropout = nn.Dropout(DROPOUT_RATE)
+        self.classifiers = nn.ModuleList([
+            nn.Linear(self.deberta.config.hidden_size, n_classes) for n_classes in num_labels
+        ])
+    def forward(self, input_ids, attention_mask):
+        last_hidden_state = self.deberta(input_ids=input_ids, attention_mask=attention_mask).last_hidden_state
+        pooled_output = last_hidden_state[:, 0]  # [CLS] token representation
+        pooled_output = self.dropout(pooled_output)
+        return [classifier(pooled_output) for classifier in self.classifiers]

label_encoders.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:be834abbaaa80f915d0a0015f541a17ae6fda5c75d9485cb23c6a7b7bb7b7c97
+size 2047

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+fastapi==0.104.1
+uvicorn==0.24.0
+pydantic==2.4.2
+transformers==4.35.2
+torch==2.1.1
+numpy==1.24.3
+pandas==2.1.3
+scikit-learn==1.3.2