khalidsyfullah commited on
Commit
5c48a7c
·
verified ·
1 Parent(s): 5335dde

TinyBanglaClickbaitBERT: Distilled multi-task Bengali clickbait detection model

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tiny_bangla_clickbait_bert.onnx.data filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language: bn
3
+ tags:
4
+ - bengali
5
+ - clickbait-detection
6
+ - knowledge-distillation
7
+ - tinybert
8
+ - multi-task
9
+ license: mit
10
+ ---
11
+
12
+ # TinyBanglaClickbaitBERT
13
+
14
+ A compact Bengali clickbait detection model distilled from BanglaClickbaitBERT.
15
+
16
+ ## Model Details
17
+ - **Architecture**: 4-layer Transformer (384-d, 6 heads)
18
+ - **Parameters**: 20.1M (vs 110.2M teacher → 5.5x compression)
19
+ - **Tasks**: Binary (clickbait/non-clickbait) + 11-class multiclass
20
+
21
+ ## Performance
22
+
23
+ | Task | Student F1 | Teacher F1 | Retention |
24
+ |------|-----------|-----------|-----------|
25
+ | Binary (macro) | 0.8543 | 0.8849 | 96.5% |
26
+ | Multiclass (macro) | 0.4201 | 0.5088 | 82.6% |
27
+
28
+ ## Speed
29
+ - Teacher: 7.9 ms | Student: 3.4 ms | **2.3x speedup**
30
+ - Teacher: 440.9 MB | Student: 80.3 MB | **5.5x compression**
31
+
32
+ ## Usage
33
+ ```python
34
+ import torch
35
+ checkpoint = torch.load("tiny_bangla_clickbait_bert.pt")
36
+ # See label_config.json for class mappings
37
+ ```
38
+
39
+ ## Distilled from
40
+ [khalidsyfullah/bangla-clickbait-multitask](https://huggingface.co/khalidsyfullah/bangla-clickbait-multitask)
best_student.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e800c7361c88fba63208d53bc00f292778f6f4519858748c189673547f98165b
3
+ size 80346719
label_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "binary_classes": [
3
+ "clickbait",
4
+ "not_clickbait"
5
+ ],
6
+ "multi_classes": [
7
+ "analytical",
8
+ "celebrity_click",
9
+ "curiosity_gap",
10
+ "emotional",
11
+ "factual",
12
+ "fake_news",
13
+ "general",
14
+ "informative",
15
+ "misleading",
16
+ "reportage",
17
+ "sensational"
18
+ ],
19
+ "hierarchy_map": {
20
+ "2": 0,
21
+ "7": 1,
22
+ "1": 0,
23
+ "9": 1,
24
+ "3": 0,
25
+ "5": 0,
26
+ "4": 1,
27
+ "10": 0,
28
+ "8": 0,
29
+ "6": 1,
30
+ "0": 1
31
+ },
32
+ "num_binary_labels": 2,
33
+ "num_multi_labels": 11
34
+ }
tiny_bangla_clickbait_bert.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1dfabb3835572befe6f2f8da15c661c411e082fceb0af397c9a23b1c6ee34e7
3
+ size 534942
tiny_bangla_clickbait_bert.onnx.data ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62c474e7422e0d64fd46c7627b1bfc9e230084ef87e4dfc6c10d8c32bf97f93f
3
+ size 78577664
tiny_bangla_clickbait_bert.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8fc402a84ce85e0b47a3f82441aec86dfc823c5123091dc5c660b995bb3bf7da
3
+ size 80348933
tiny_quantized.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14ef1189857205ee4a236233245cc13e0ed95ad7cdebde80d3dbb754235ece31
3
+ size 57655510
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": "tokenizers",
3
+ "cls_token": "[CLS]",
4
+ "do_lower_case": false,
5
+ "full_tokenizer_file": null,
6
+ "is_local": false,
7
+ "mask_token": "[MASK]",
8
+ "model_max_length": 1000000000000000019884624838656,
9
+ "pad_token": "[PAD]",
10
+ "sep_token": "[SEP]",
11
+ "strip_accents": null,
12
+ "tokenize_chinese_chars": false,
13
+ "tokenizer_class": "TokenizersBackend",
14
+ "unk_token": "[UNK]"
15
+ }