Josephaimodels commited on
Commit
a21d146
·
verified ·
1 Parent(s): e91fc1b

Upload 3 files

Browse files
Files changed (3) hide show
  1. LICENSE +7 -0
  2. bert_inference.py +250 -0
  3. deberta_inference.py +250 -0
LICENSE ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ Copyright 2025 Joseph Jarusevicius
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4
+
5
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6
+
7
+ THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
bert_inference.py ADDED
@@ -0,0 +1,250 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import pickle
4
+ import torch
5
+ import torch.nn.functional as F
6
+ import numpy as np
7
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
8
+ from sklearn.isotonic import IsotonicRegression
9
+ from sklearn.linear_model import LogisticRegression
10
+ import warnings
11
+ warnings.filterwarnings('ignore')
12
+
13
+ # Fix numpy compatibility issue
14
+ if 'numpy._core' not in sys.modules:
15
+ import numpy.core
16
+ sys.modules['numpy._core'] = numpy.core
17
+ sys.modules['numpy._core._multiarray_umath'] = numpy.core._multiarray_umath
18
+
19
+ # Configuration
20
+ MODEL_NAME = "microsoft/xtremedistil-l6-h256-uncased"
21
+ CHECKPOINT_PATH = input("Please enter the path to the BERT model directory: ").strip()
22
+ CALIBRATOR_FILE = os.path.join(CHECKPOINT_PATH, "calibrators.pkl")
23
+ MAX_LENGTH = 512
24
+ BATCH_SIZE = 16
25
+ DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
26
+
27
+ # ============================================================================
28
+ # CALIBRATION CLASSES (needed for unpickling)
29
+ # ============================================================================
30
+
31
+ class TemperatureScaling:
32
+ def __init__(self):
33
+ self.temperature = 1.0
34
+ def transform(self, logits):
35
+ return logits / self.temperature
36
+
37
+ class PlattScaling:
38
+ def __init__(self):
39
+ self.calibrator = LogisticRegression()
40
+ self.fitted = False
41
+ def transform(self, logits):
42
+ if not self.fitted:
43
+ raise ValueError("Calibrator not fitted")
44
+ probs = torch.softmax(torch.tensor(logits), dim=-1).numpy()
45
+ scores = probs[:, 1].reshape(-1, 1)
46
+ calibrated_probs = self.calibrator.predict_proba(scores)
47
+ return calibrated_probs
48
+
49
+ class IsotonicCalibration:
50
+ def __init__(self):
51
+ self.calibrator = IsotonicRegression(out_of_bounds='clip')
52
+ self.fitted = False
53
+ def transform(self, logits):
54
+ if not self.fitted:
55
+ raise ValueError("Calibrator not fitted")
56
+ probs = torch.softmax(torch.tensor(logits), dim=-1).numpy()
57
+ scores = probs[:, 1]
58
+ calibrated_scores = self.calibrator.transform(scores)
59
+ calibrated_probs = np.zeros((len(scores), 2))
60
+ calibrated_probs[:, 1] = calibrated_scores
61
+ calibrated_probs[:, 0] = 1 - calibrated_scores
62
+ return calibrated_probs
63
+
64
+ class MixNMatchCalibration:
65
+ def __init__(self, n_bins=15, bin_strategy='quantile'):
66
+ self.n_bins = n_bins
67
+ self.bin_strategy = bin_strategy
68
+ self.temperature = 1.0
69
+ self.bin_boundaries = None
70
+ self.bin_calibrators = {}
71
+ self.bin_sample_counts = {}
72
+
73
+ def _get_bin_mask(self, probs, bin_idx):
74
+ lower = self.bin_boundaries[bin_idx]
75
+ upper = self.bin_boundaries[bin_idx + 1]
76
+ if bin_idx == self.n_bins - 1:
77
+ return (probs >= lower) & (probs <= upper)
78
+ else:
79
+ return (probs >= lower) & (probs < upper)
80
+
81
+ def transform(self, logits):
82
+ scaled_logits = logits / self.temperature
83
+ probs = torch.softmax(torch.tensor(scaled_logits), dim=-1).numpy()
84
+ class1_probs = probs[:, 1]
85
+ calibrated_probs = np.zeros_like(class1_probs)
86
+
87
+ for i in range(self.n_bins):
88
+ bin_mask = self._get_bin_mask(class1_probs, i)
89
+ if not np.any(bin_mask):
90
+ continue
91
+ bin_probs = class1_probs[bin_mask]
92
+ if self.bin_calibrators.get(i) is not None:
93
+ cal_type, cal_data = self.bin_calibrators[i]
94
+ if cal_type == 'isotonic':
95
+ calibrated_bin_probs = cal_data.predict(bin_probs)
96
+ elif cal_type == 'mean':
97
+ calibrated_bin_probs = bin_probs * cal_data
98
+ calibrated_probs[bin_mask] = np.clip(calibrated_bin_probs, 0, 1)
99
+ else:
100
+ calibrated_probs[bin_mask] = bin_probs
101
+
102
+ result = np.zeros((len(calibrated_probs), 2))
103
+ result[:, 1] = calibrated_probs
104
+ result[:, 0] = 1 - calibrated_probs
105
+ return result
106
+
107
+ # ============================================================================
108
+ # MODEL LOADING AND PREDICTION
109
+ # ============================================================================
110
+
111
+ def load_model_and_calibrators():
112
+ """Load the model and calibrators"""
113
+ print(f"Loading BERT model from: {CHECKPOINT_PATH}")
114
+ tokenizer = AutoTokenizer.from_pretrained(CHECKPOINT_PATH)
115
+ model = AutoModelForSequenceClassification.from_pretrained(CHECKPOINT_PATH)
116
+ model = model.to(DEVICE)
117
+ model.eval()
118
+ print("BERT model loaded successfully!")
119
+
120
+ # Load calibrators with compatibility handling
121
+ print(f"Loading calibrators from: {CALIBRATOR_FILE}")
122
+
123
+ # Add the calibration classes to the current module for unpickling
124
+ import sys
125
+ current_module = sys.modules[__name__]
126
+
127
+ class CompatibleUnpickler(pickle.Unpickler):
128
+ def find_class(self, module, name):
129
+ # Handle the calibration classes
130
+ if name in ['TemperatureScaling', 'PlattScaling', 'IsotonicCalibration', 'MixNMatchCalibration']:
131
+ return getattr(current_module, name)
132
+ if module == 'numpy._core':
133
+ module = 'numpy.core'
134
+ elif module == 'numpy._core._multiarray_umath':
135
+ module = 'numpy.core._multiarray_umath'
136
+ return super().find_class(module, name)
137
+
138
+ try:
139
+ with open(CALIBRATOR_FILE, 'rb') as f:
140
+ cal_data = CompatibleUnpickler(f).load()
141
+ except:
142
+ with open(CALIBRATOR_FILE, 'rb') as f:
143
+ cal_data = pickle.load(f)
144
+
145
+ # Use only mixnmatch calibration
146
+ calibrator = cal_data['calibrators']['mixnmatch']
147
+
148
+ print("Using calibration: mixnmatch")
149
+
150
+ return model, tokenizer, calibrator
151
+
152
+ def predict_batch(model, tokenizer, calibrator, texts):
153
+ """Make predictions on a batch of texts"""
154
+ all_logits = []
155
+
156
+ model.eval()
157
+ with torch.no_grad():
158
+ for i in range(0, len(texts), BATCH_SIZE):
159
+ batch_texts = texts[i:i + BATCH_SIZE]
160
+
161
+ encoding = tokenizer(
162
+ batch_texts,
163
+ truncation=True,
164
+ padding=True,
165
+ max_length=MAX_LENGTH,
166
+ return_tensors='pt'
167
+ )
168
+
169
+ input_ids = encoding['input_ids'].to(DEVICE)
170
+ attention_mask = encoding['attention_mask'].to(DEVICE)
171
+
172
+ outputs = model(input_ids=input_ids, attention_mask=attention_mask)
173
+ logits = outputs.logits.cpu().numpy()
174
+ all_logits.append(logits)
175
+
176
+ logits = np.vstack(all_logits)
177
+
178
+ # Get uncalibrated probabilities
179
+ uncalibrated_probs = torch.softmax(torch.tensor(logits), dim=-1).numpy()
180
+
181
+ # Apply calibration
182
+ calibrated_output = calibrator.transform(logits)
183
+ if len(calibrated_output.shape) == 1:
184
+ calibrated_probs = np.zeros((len(calibrated_output), 2))
185
+ calibrated_probs[:, 1] = calibrated_output
186
+ calibrated_probs[:, 0] = 1 - calibrated_output
187
+ else:
188
+ calibrated_probs = calibrated_output
189
+
190
+ # Get predictions and confidence
191
+ predictions = np.argmax(calibrated_probs, axis=1)
192
+ confidence = np.max(calibrated_probs, axis=1)
193
+
194
+ # Calibration shift
195
+ cal_conf = np.max(calibrated_probs, axis=1)
196
+ uncal_conf = np.max(uncalibrated_probs, axis=1)
197
+ calibration_shift = cal_conf - uncal_conf
198
+
199
+ return {
200
+ 'predictions': predictions,
201
+ 'probabilities': calibrated_probs,
202
+ 'confidence': confidence,
203
+ 'uncalibrated_probs': uncalibrated_probs,
204
+ 'calibration_shift': calibration_shift
205
+ }
206
+
207
+ def process_texts(texts):
208
+ """Process a list of texts and return predictions"""
209
+ model, tokenizer, calibrator = load_model_and_calibrators()
210
+ results = predict_batch(model, tokenizer, calibrator, texts)
211
+ return results
212
+
213
+ # Example usage
214
+ if __name__ == "__main__":
215
+ # Example texts for testing
216
+ sample_texts = [
217
+ # Social Engineering - Manipulative, but flagged wrong
218
+ "URGENT: Your account will be suspended in 2 hours due to suspicious activity. Click this link immediately to verify your identity or lose access forever. -IT Security Team",
219
+
220
+ # Social Engineering - Non-manipulative
221
+ "Hi, we noticed some unusual login attempts on your account. For your security, please log into your account through our official website when convenient to review your recent activity. If you have concerns, contact our support team at [official number]. -IT Security Team",
222
+
223
+ # Social Engineering - no context to determine if manipulative
224
+ "Hey! It's Sarah from accounting. I'm working from home and can't access the expense system. Can you quickly send me your login details so I can process your reimbursement today? Thanks!",
225
+
226
+ # Social Engineering - Non-manipulative
227
+ "Hi, this is Sarah from accounting. I'm having technical issues with the expense system. Could you please submit your reimbursement request through the official portal, or I can walk you through it when I'm back in the office tomorrow?",
228
+
229
+ # Emotional Manipulation - Manipulative
230
+ "I guess you don't really care about our friendship since you never make time for me anymore. I've been there for you through everything, but apparently that doesn't matter. Fine, I'll just stop trying.",
231
+
232
+ # Emotional Manipulation - Non-manipulative
233
+ "I miss spending time together and I'm feeling a bit disconnected lately. I understand you're busy, but I'd love to catch up when you have some free time. Would you be interested in planning something together?",
234
+
235
+ # Emotional Manipulation - Manipulative
236
+ "You're being way too sensitive about this. You always overreact to everything - I was just joking around. Maybe you should work on not taking things so personally all the time.",
237
+
238
+ # Emotional Manipulation - Non-manipulative
239
+ "I can see that what I said upset you, and that wasn't my intention. I was trying to be playful, but I can understand how it came across differently. I'm sorry for hurting your feelings."
240
+ ]
241
+
242
+ print("Processing sample texts with BERT model...")
243
+ results = process_texts(sample_texts)
244
+
245
+ for i, text in enumerate(sample_texts):
246
+ print(f"\nText: {text}")
247
+ print(f"Prediction: {results['predictions'][i]}")
248
+ print(f"Confidence: {results['confidence'][i]:.4f}")
249
+ print(f"Probabilities: Class 0: {results['probabilities'][i][0]:.4f}, Class 1: {results['probabilities'][i][1]:.4f}")
250
+ print(f"Calibration Shift: {results['calibration_shift'][i]:.4f}")
deberta_inference.py ADDED
@@ -0,0 +1,250 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import pickle
4
+ import torch
5
+ import torch.nn.functional as F
6
+ import numpy as np
7
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
8
+ from sklearn.isotonic import IsotonicRegression
9
+ from sklearn.linear_model import LogisticRegression
10
+ import warnings
11
+ warnings.filterwarnings('ignore')
12
+
13
+ # Fix numpy compatibility issue
14
+ if 'numpy._core' not in sys.modules:
15
+ import numpy.core
16
+ sys.modules['numpy._core'] = numpy.core
17
+ sys.modules['numpy._core._multiarray_umath'] = numpy.core._multiarray_umath
18
+
19
+ # Configuration
20
+ MODEL_NAME = "microsoft/deberta-v3-xsmall"
21
+ CHECKPOINT_PATH = input("Please enter the path to the DeBERTa model directory: ").strip()
22
+ CALIBRATOR_FILE = os.path.join(CHECKPOINT_PATH, "calibrators.pkl")
23
+ MAX_LENGTH = 512
24
+ BATCH_SIZE = 16
25
+ DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
26
+
27
+ # ============================================================================
28
+ # CALIBRATION CLASSES (needed for unpickling)
29
+ # ============================================================================
30
+
31
+ class TemperatureScaling:
32
+ def __init__(self):
33
+ self.temperature = 1.0
34
+ def transform(self, logits):
35
+ return logits / self.temperature
36
+
37
+ class PlattScaling:
38
+ def __init__(self):
39
+ self.calibrator = LogisticRegression()
40
+ self.fitted = False
41
+ def transform(self, logits):
42
+ if not self.fitted:
43
+ raise ValueError("Calibrator not fitted")
44
+ probs = torch.softmax(torch.tensor(logits), dim=-1).numpy()
45
+ scores = probs[:, 1].reshape(-1, 1)
46
+ calibrated_probs = self.calibrator.predict_proba(scores)
47
+ return calibrated_probs
48
+
49
+ class IsotonicCalibration:
50
+ def __init__(self):
51
+ self.calibrator = IsotonicRegression(out_of_bounds='clip')
52
+ self.fitted = False
53
+ def transform(self, logits):
54
+ if not self.fitted:
55
+ raise ValueError("Calibrator not fitted")
56
+ probs = torch.softmax(torch.tensor(logits), dim=-1).numpy()
57
+ scores = probs[:, 1]
58
+ calibrated_scores = self.calibrator.transform(scores)
59
+ calibrated_probs = np.zeros((len(scores), 2))
60
+ calibrated_probs[:, 1] = calibrated_scores
61
+ calibrated_probs[:, 0] = 1 - calibrated_scores
62
+ return calibrated_probs
63
+
64
+ class MixNMatchCalibration:
65
+ def __init__(self, n_bins=15, bin_strategy='quantile'):
66
+ self.n_bins = n_bins
67
+ self.bin_strategy = bin_strategy
68
+ self.temperature = 1.0
69
+ self.bin_boundaries = None
70
+ self.bin_calibrators = {}
71
+ self.bin_sample_counts = {}
72
+
73
+ def _get_bin_mask(self, probs, bin_idx):
74
+ lower = self.bin_boundaries[bin_idx]
75
+ upper = self.bin_boundaries[bin_idx + 1]
76
+ if bin_idx == self.n_bins - 1:
77
+ return (probs >= lower) & (probs <= upper)
78
+ else:
79
+ return (probs >= lower) & (probs < upper)
80
+
81
+ def transform(self, logits):
82
+ scaled_logits = logits / self.temperature
83
+ probs = torch.softmax(torch.tensor(scaled_logits), dim=-1).numpy()
84
+ class1_probs = probs[:, 1]
85
+ calibrated_probs = np.zeros_like(class1_probs)
86
+
87
+ for i in range(self.n_bins):
88
+ bin_mask = self._get_bin_mask(class1_probs, i)
89
+ if not np.any(bin_mask):
90
+ continue
91
+ bin_probs = class1_probs[bin_mask]
92
+ if self.bin_calibrators.get(i) is not None:
93
+ cal_type, cal_data = self.bin_calibrators[i]
94
+ if cal_type == 'isotonic':
95
+ calibrated_bin_probs = cal_data.predict(bin_probs)
96
+ elif cal_type == 'mean':
97
+ calibrated_bin_probs = bin_probs * cal_data
98
+ calibrated_probs[bin_mask] = np.clip(calibrated_bin_probs, 0, 1)
99
+ else:
100
+ calibrated_probs[bin_mask] = bin_probs
101
+
102
+ result = np.zeros((len(calibrated_probs), 2))
103
+ result[:, 1] = calibrated_probs
104
+ result[:, 0] = 1 - calibrated_probs
105
+ return result
106
+
107
+ # ============================================================================
108
+ # MODEL LOADING AND PREDICTION
109
+ # ============================================================================
110
+
111
+ def load_model_and_calibrators():
112
+ """Load the model and calibrators"""
113
+ print(f"Loading DeBERTa model from: {CHECKPOINT_PATH}")
114
+ tokenizer = AutoTokenizer.from_pretrained(CHECKPOINT_PATH)
115
+ model = AutoModelForSequenceClassification.from_pretrained(CHECKPOINT_PATH)
116
+ model = model.to(DEVICE)
117
+ model.eval()
118
+ print("DeBERTa model loaded successfully!")
119
+
120
+ # Load calibrators with compatibility handling
121
+ print(f"Loading calibrators from: {CALIBRATOR_FILE}")
122
+
123
+ # Add the calibration classes to the current module for unpickling
124
+ import sys
125
+ current_module = sys.modules[__name__]
126
+
127
+ class CompatibleUnpickler(pickle.Unpickler):
128
+ def find_class(self, module, name):
129
+ # Handle the calibration classes
130
+ if name in ['TemperatureScaling', 'PlattScaling', 'IsotonicCalibration', 'MixNMatchCalibration']:
131
+ return getattr(current_module, name)
132
+ if module == 'numpy._core':
133
+ module = 'numpy.core'
134
+ elif module == 'numpy._core._multiarray_umath':
135
+ module = 'numpy.core._multiarray_umath'
136
+ return super().find_class(module, name)
137
+
138
+ try:
139
+ with open(CALIBRATOR_FILE, 'rb') as f:
140
+ cal_data = CompatibleUnpickler(f).load()
141
+ except:
142
+ with open(CALIBRATOR_FILE, 'rb') as f:
143
+ cal_data = pickle.load(f)
144
+
145
+ # Use only mixnmatch calibration
146
+ calibrator = cal_data['calibrators']['mixnmatch']
147
+
148
+ print("Using calibration: mixnmatch")
149
+
150
+ return model, tokenizer, calibrator
151
+
152
+ def predict_batch(model, tokenizer, calibrator, texts):
153
+ """Make predictions on a batch of texts"""
154
+ all_logits = []
155
+
156
+ model.eval()
157
+ with torch.no_grad():
158
+ for i in range(0, len(texts), BATCH_SIZE):
159
+ batch_texts = texts[i:i + BATCH_SIZE]
160
+
161
+ encoding = tokenizer(
162
+ batch_texts,
163
+ truncation=True,
164
+ padding=True,
165
+ max_length=MAX_LENGTH,
166
+ return_tensors='pt'
167
+ )
168
+
169
+ input_ids = encoding['input_ids'].to(DEVICE)
170
+ attention_mask = encoding['attention_mask'].to(DEVICE)
171
+
172
+ outputs = model(input_ids=input_ids, attention_mask=attention_mask)
173
+ logits = outputs.logits.cpu().numpy()
174
+ all_logits.append(logits)
175
+
176
+ logits = np.vstack(all_logits)
177
+
178
+ # Get uncalibrated probabilities
179
+ uncalibrated_probs = torch.softmax(torch.tensor(logits), dim=-1).numpy()
180
+
181
+ # Apply calibration
182
+ calibrated_output = calibrator.transform(logits)
183
+ if len(calibrated_output.shape) == 1:
184
+ calibrated_probs = np.zeros((len(calibrated_output), 2))
185
+ calibrated_probs[:, 1] = calibrated_output
186
+ calibrated_probs[:, 0] = 1 - calibrated_output
187
+ else:
188
+ calibrated_probs = calibrated_output
189
+
190
+ # Get predictions and confidence
191
+ predictions = np.argmax(calibrated_probs, axis=1)
192
+ confidence = np.max(calibrated_probs, axis=1)
193
+
194
+ # Calibration shift
195
+ cal_conf = np.max(calibrated_probs, axis=1)
196
+ uncal_conf = np.max(uncalibrated_probs, axis=1)
197
+ calibration_shift = cal_conf - uncal_conf
198
+
199
+ return {
200
+ 'predictions': predictions,
201
+ 'probabilities': calibrated_probs,
202
+ 'confidence': confidence,
203
+ 'uncalibrated_probs': uncalibrated_probs,
204
+ 'calibration_shift': calibration_shift
205
+ }
206
+
207
+ def process_texts(texts):
208
+ """Process a list of texts and return predictions"""
209
+ model, tokenizer, calibrator = load_model_and_calibrators()
210
+ results = predict_batch(model, tokenizer, calibrator, texts)
211
+ return results
212
+
213
+ # Example usage
214
+ if __name__ == "__main__":
215
+ # Example texts for testing
216
+ sample_texts = [
217
+ # Social Engineering - Manipulative, but flagged wrong
218
+ "URGENT: Your account will be suspended in 2 hours due to suspicious activity. Click this link immediately to verify your identity or lose access forever. -IT Security Team",
219
+
220
+ # Social Engineering - Non-manipulative
221
+ "Hi, we noticed some unusual login attempts on your account. For your security, please log into your account through our official website when convenient to review your recent activity. If you have concerns, contact our support team at [official number]. -IT Security Team",
222
+
223
+ # Social Engineering - no context to determine if manipulative
224
+ "Hey! It's Sarah from accounting. I'm working from home and can't access the expense system. Can you quickly send me your login details so I can process your reimbursement today? Thanks!",
225
+
226
+ # Social Engineering - Non-manipulative
227
+ "Hi, this is Sarah from accounting. I'm having technical issues with the expense system. Could you please submit your reimbursement request through the official portal, or I can walk you through it when I'm back in the office tomorrow?",
228
+
229
+ # Emotional Manipulation - Manipulative
230
+ "I guess you don't really care about our friendship since you never make time for me anymore. I've been there for you through everything, but apparently that doesn't matter. Fine, I'll just stop trying.",
231
+
232
+ # Emotional Manipulation - Non-manipulative
233
+ "I miss spending time together and I'm feeling a bit disconnected lately. I understand you're busy, but I'd love to catch up when you have some free time. Would you be interested in planning something together?",
234
+
235
+ # Emotional Manipulation - Manipulative
236
+ "You're being way too sensitive about this. You always overreact to everything - I was just joking around. Maybe you should work on not taking things so personally all the time.",
237
+
238
+ # Emotional Manipulation - Non-manipulative
239
+ "I can see that what I said upset you, and that wasn't my intention. I was trying to be playful, but I can understand how it came across differently. I'm sorry for hurting your feelings."
240
+ ]
241
+
242
+ print("Processing sample texts with DeBERTa model...")
243
+ results = process_texts(sample_texts)
244
+
245
+ for i, text in enumerate(sample_texts):
246
+ print(f"\nText: {text}")
247
+ print(f"Prediction: {results['predictions'][i]}")
248
+ print(f"Confidence: {results['confidence'][i]:.4f}")
249
+ print(f"Probabilities: Class 0: {results['probabilities'][i][0]:.4f}, Class 1: {results['probabilities'][i][1]:.4f}")
250
+ print(f"Calibration Shift: {results['calibration_shift'][i]:.4f}")