Digvijay05 commited on
Commit
e301e28
·
verified ·
1 Parent(s): 971c841

Upload folder using huggingface_hub

Browse files
config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation": "gelu",
3
+ "architectures": [
4
+ "DistilBertForSequenceClassification"
5
+ ],
6
+ "attention_dropout": 0.1,
7
+ "bos_token_id": null,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "dtype": "float32",
11
+ "eos_token_id": null,
12
+ "hidden_dim": 3072,
13
+ "initializer_range": 0.02,
14
+ "max_position_embeddings": 512,
15
+ "model_type": "distilbert",
16
+ "n_heads": 12,
17
+ "n_layers": 6,
18
+ "pad_token_id": 0,
19
+ "qa_dropout": 0.1,
20
+ "seq_classif_dropout": 0.2,
21
+ "sinusoidal_pos_embds": false,
22
+ "tie_weights_": true,
23
+ "tie_word_embeddings": true,
24
+ "transformers_version": "5.2.0",
25
+ "use_cache": false,
26
+ "vocab_size": 30522
27
+ }
confusion_matrix.png ADDED
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdfd635c0df927e6f5c1038da96f8cf4535803724bf1b949c399464ccb5b615f
3
+ size 267832560
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": "tokenizers",
3
+ "cls_token": "[CLS]",
4
+ "do_lower_case": true,
5
+ "is_local": false,
6
+ "mask_token": "[MASK]",
7
+ "model_max_length": 512,
8
+ "pad_token": "[PAD]",
9
+ "sep_token": "[SEP]",
10
+ "strip_accents": null,
11
+ "tokenize_chinese_chars": true,
12
+ "tokenizer_class": "BertTokenizer",
13
+ "unk_token": "[UNK]"
14
+ }
training_metrics.json ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_stats": {
3
+ "total_samples": 8438,
4
+ "train_size": 7594,
5
+ "val_size": 844,
6
+ "class_distribution_raw": {
7
+ "0": 0.7250533301730268,
8
+ "1": 0.2749466698269732
9
+ }
10
+ },
11
+ "training_metrics": {
12
+ "best_f1": 0.989247311827957,
13
+ "best_model_accuracy": 0.9940758293838863
14
+ },
15
+ "calibration": {
16
+ "optimal_threshold": 0.0027917588595300913,
17
+ "calibrated_precision": 0.9508196721311475,
18
+ "calibrated_recall": 1.0,
19
+ "class_weights_used": {
20
+ "0": 0.689604445897352,
21
+ "1": 1.8185344827586207
22
+ }
23
+ },
24
+ "robustness": [
25
+ {
26
+ "type": "URL Obfuscation",
27
+ "text": "Win $1000 fast! Click hxxp://scammy-link[.]com",
28
+ "probability": 0.9997965693473816,
29
+ "predicted": 1,
30
+ "expected": 1,
31
+ "passed": true
32
+ },
33
+ {
34
+ "type": "URL Obfuscation",
35
+ "text": "Your account is locked. Verify at bit.ly/secure-login-392",
36
+ "probability": 0.9997977614402771,
37
+ "predicted": 1,
38
+ "expected": 1,
39
+ "passed": true
40
+ },
41
+ {
42
+ "type": "Numeric Replacement",
43
+ "text": "W1NNER! Y0u have b33n select3d for a pr1ze.",
44
+ "probability": 0.9936283230781555,
45
+ "predicted": 1,
46
+ "expected": 1,
47
+ "passed": true
48
+ },
49
+ {
50
+ "type": "Mixed Case",
51
+ "text": "cOnGrAtUlAtIoNs, yOu WoN a FrEe iPhOnE!",
52
+ "probability": 0.8983756899833679,
53
+ "predicted": 1,
54
+ "expected": 1,
55
+ "passed": true
56
+ },
57
+ {
58
+ "type": "Short SMS",
59
+ "text": "Call me now",
60
+ "probability": 0.00036344866384752095,
61
+ "predicted": 0,
62
+ "expected": 1,
63
+ "passed": false
64
+ },
65
+ {
66
+ "type": "Short SMS",
67
+ "text": "Hey",
68
+ "probability": 0.0003704441187437624,
69
+ "predicted": 0,
70
+ "expected": 0,
71
+ "passed": true
72
+ }
73
+ ],
74
+ "deployment": {
75
+ "model_size_mb": 255.41,
76
+ "cpu_latency_estimate": "~10-30ms / sequence",
77
+ "gpu_latency_estimate": "~2-5ms / sequence",
78
+ "recommendation": "CPU (FastAPI wrapped, ONNX quantization recommended)"
79
+ },
80
+ "model_path": "./cipher_distilbert_detection"
81
+ }