File size: 9,871 Bytes
31f0e50 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 | """
Verification Script for Task 3.2: Scam Classification with IndicBERT
This script verifies all acceptance criteria:
- AC-1.2.1: Achieves >90% accuracy on test dataset
- AC-1.2.2: False positive rate <5%
- AC-1.2.3: Inference time <500ms per message
- AC-1.2.4: Handles messages up to 5000 characters
- AC-1.2.5: Returns calibrated confidence scores (not just 0/1)
"""
import sys
import time
import io
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
sys.path.insert(0, '.')
from app.models.detector import ScamDetector, reset_detector_cache
# Test datasets
SCAM_MESSAGES = [
# English scams
"Congratulations! You won ₹10 lakh. Share OTP to claim.",
"Your account will be suspended. Send money to unblock.",
"You have won a lottery prize of 5 crore rupees!",
"This is police. You are under arrest. Pay fine immediately.",
"Your bank account is blocked. Verify by sending OTP.",
"Urgent! Claim your prize now before it expires.",
"Send ₹500 to this UPI to win ₹50000.",
"Your credit card is suspended. Call now to reactivate.",
"Dear customer, your KYC is incomplete. Update immediately.",
"You won iPhone 15! Click link to claim now.",
# Hindi scams
"आप गिरफ्तार हो जाएंगे। तुरंत UPI पर पैसे भेजें।",
"आपने लॉटरी जीती है! इनाम लेने के लिए OTP भेजें।",
"आपका खाता ब्लॉक हो जाएगा। तुरंत वेरिफाई करें।",
"पुलिस यहाँ है। जुर्माना भरो या गिरफ्तार हो जाओगे।",
# Hinglish scams
"Aapne jeeta hai 10 lakh! OTP share karo jaldi.",
"Bank account block ho jayega. Turant call karo.",
]
LEGITIMATE_MESSAGES = [
"Hi, how are you? Let's meet for coffee tomorrow.",
"Your order #12345 has been shipped.",
"Reminder: Your dentist appointment is tomorrow at 3 PM.",
"Thanks for your payment. Receipt attached.",
"Happy birthday! Have a great day.",
"Meeting rescheduled to next Monday at 10 AM.",
"The weather is nice today.",
"Can you please send me the document?",
"नमस्ते! कैसे हो? कल मिलते हैं।",
"आपका ऑर्डर डिलीवर हो गया है।",
"Thank you for your feedback.",
"See you at the party tonight!",
"Your booking is confirmed for tomorrow.",
"Please find the invoice attached.",
"Happy to help with any questions.",
]
def verify_accuracy():
"""AC-1.2.1: Achieves >90% accuracy on test dataset"""
detector = ScamDetector(load_model=False)
# Test scam detection
scam_correct = 0
for msg in SCAM_MESSAGES:
result = detector.detect(msg)
if result["scam_detected"]:
scam_correct += 1
scam_accuracy = scam_correct / len(SCAM_MESSAGES)
return {
"id": "AC-1.2.1",
"description": "Scam detection accuracy >90%",
"accuracy": scam_accuracy,
"correct": scam_correct,
"total": len(SCAM_MESSAGES),
"passed": scam_accuracy >= 0.90,
}
def verify_false_positive_rate():
"""AC-1.2.2: False positive rate <5%"""
detector = ScamDetector(load_model=False)
false_positives = 0
for msg in LEGITIMATE_MESSAGES:
result = detector.detect(msg)
if result["scam_detected"]:
false_positives += 1
fp_rate = false_positives / len(LEGITIMATE_MESSAGES)
return {
"id": "AC-1.2.2",
"description": "False positive rate <5%",
"fp_rate": fp_rate,
"false_positives": false_positives,
"total": len(LEGITIMATE_MESSAGES),
"passed": fp_rate < 0.05,
}
def verify_inference_time():
"""AC-1.2.3: Inference time <500ms per message"""
detector = ScamDetector(load_model=False)
test_messages = [
("English scam", "You won 10 lakh! Send OTP now!"),
("Hindi scam", "आप जीत गए हैं! OTP भेजें तुरंत!"),
("Legitimate", "Hi, how are you doing today?"),
]
results = []
all_passed = True
for name, msg in test_messages:
start = time.time()
detector.detect(msg)
elapsed_ms = (time.time() - start) * 1000
passed = elapsed_ms < 500
if not passed:
all_passed = False
results.append({"name": name, "elapsed_ms": elapsed_ms, "passed": passed})
return {
"id": "AC-1.2.3",
"description": "Inference time <500ms",
"results": results,
"passed": all_passed,
}
def verify_long_message_handling():
"""AC-1.2.4: Handles messages up to 5000 characters"""
detector = ScamDetector(load_model=False)
# Create 5000+ character message
long_message = "You won a lottery prize! Send OTP now. " * 150 # ~5850 chars
start = time.time()
result = detector.detect(long_message)
elapsed_ms = (time.time() - start) * 1000
success = (
isinstance(result, dict) and
"scam_detected" in result and
elapsed_ms < 500
)
return {
"id": "AC-1.2.4",
"description": "Handles messages up to 5000 chars",
"message_length": len(long_message),
"elapsed_ms": elapsed_ms,
"passed": success,
}
def verify_calibrated_confidence():
"""AC-1.2.5: Returns calibrated confidence scores (not just 0/1)"""
detector = ScamDetector(load_model=False)
all_confidences = set()
for msg in SCAM_MESSAGES + LEGITIMATE_MESSAGES:
result = detector.detect(msg)
all_confidences.add(round(result["confidence"], 2))
# Check that we have varied confidence scores
has_variation = len(all_confidences) > 3
# Check high confidence for clear scam
high_conf_result = detector.detect(
"Congratulations! You won ₹10 lakh lottery prize! Send OTP immediately to claim!"
)
# Check low confidence for legitimate
low_conf_result = detector.detect("Hi, how are you?")
proper_calibration = (
high_conf_result["confidence"] > 0.8 and
low_conf_result["confidence"] < 0.3
)
return {
"id": "AC-1.2.5",
"description": "Calibrated confidence scores",
"unique_scores": len(all_confidences),
"has_variation": has_variation,
"proper_calibration": proper_calibration,
"passed": has_variation and proper_calibration,
}
def main():
print("=" * 60)
print("Task 3.2: Scam Classification - Acceptance Criteria Verification")
print("=" * 60)
print()
# Reset cache
reset_detector_cache()
# Run verifications
accuracy_result = verify_accuracy()
fp_result = verify_false_positive_rate()
time_result = verify_inference_time()
long_msg_result = verify_long_message_handling()
calibration_result = verify_calibrated_confidence()
# Print accuracy results
print(f"AC-1.2.1: Scam detection accuracy")
print(f" Accuracy: {accuracy_result['accuracy']:.0%} ({accuracy_result['correct']}/{accuracy_result['total']})")
print(f" Required: >90%")
status = "PASS" if accuracy_result["passed"] else "FAIL"
print(f" Status: {status}")
print()
# Print false positive results
print(f"AC-1.2.2: False positive rate")
print(f" FP Rate: {fp_result['fp_rate']:.0%} ({fp_result['false_positives']}/{fp_result['total']})")
print(f" Required: <5%")
status = "PASS" if fp_result["passed"] else "FAIL"
print(f" Status: {status}")
print()
# Print inference time results
print(f"AC-1.2.3: Inference time")
for r in time_result["results"]:
status = "PASS" if r["passed"] else "FAIL"
print(f" {r['name']}: {r['elapsed_ms']:.2f}ms [{status}]")
print(f" Required: <500ms")
status = "PASS" if time_result["passed"] else "FAIL"
print(f" Status: {status}")
print()
# Print long message results
print(f"AC-1.2.4: Long message handling")
print(f" Message length: {long_msg_result['message_length']} chars")
print(f" Processing time: {long_msg_result['elapsed_ms']:.2f}ms")
print(f" Required: Handle up to 5000 chars")
status = "PASS" if long_msg_result["passed"] else "FAIL"
print(f" Status: {status}")
print()
# Print calibration results
print(f"AC-1.2.5: Calibrated confidence scores")
print(f" Unique confidence values: {calibration_result['unique_scores']}")
print(f" Has variation: {calibration_result['has_variation']}")
print(f" Proper calibration: {calibration_result['proper_calibration']}")
status = "PASS" if calibration_result["passed"] else "FAIL"
print(f" Status: {status}")
print()
# Summary
print("=" * 60)
print("SUMMARY")
print("=" * 60)
all_results = [accuracy_result, fp_result, time_result, long_msg_result, calibration_result]
all_passed = all(r["passed"] for r in all_results)
for r in all_results:
status = "PASS" if r["passed"] else "FAIL"
print(f" {r['id']}: {r['description']} - {status}")
print()
if all_passed:
print("ALL ACCEPTANCE CRITERIA PASSED")
return 0
else:
print("SOME ACCEPTANCE CRITERIA FAILED")
return 1
if __name__ == "__main__":
sys.exit(main())
|