mojad121 commited on
Commit
8ac24b7
·
verified ·
1 Parent(s): eb5f9f2

Upload 5 files

Browse files
Files changed (5) hide show
  1. README.md +55 -0
  2. config.json +39 -0
  3. label_config.json +22 -0
  4. tokenizer.json +0 -0
  5. tokenizer_config.json +14 -0
README.md ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Sentinel-D DistilBERT Intent Classifier v1
2
+
3
+ ## Model Details
4
+ - **Base Model**: distilbert-base-uncased
5
+ - **Task**: Sequence Classification (4 classes)
6
+ - **Training Date**: 2026-03-04T03:36:30.927788
7
+ - **Classes**: VERSION_PIN, API_MIGRATION, MONKEY_PATCH, FULL_REFACTOR
8
+
9
+ ## Intent Classes
10
+ 1. **VERSION_PIN**: Pinning or locking dependency versions
11
+ 2. **API_MIGRATION**: Migrating between API versions or protocols
12
+ 3. **MONKEY_PATCH**: Quick/temporary fixes via runtime patching
13
+ 4. **FULL_REFACTOR**: Complete rewriting or structural redesign
14
+
15
+ ## Usage
16
+
17
+ ```python
18
+ from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
19
+ import torch
20
+ import json
21
+
22
+ tokenizer = DistilBertTokenizer.from_pretrained("./distilbert-intent-classifier-v1")
23
+ model = DistilBertForSequenceClassification.from_pretrained("./distilbert-intent-classifier-v1")
24
+
25
+ text = "I updated my package.json to lock the Express version to 4.18.0"
26
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
27
+
28
+ with torch.no_grad():
29
+ outputs = model(**inputs)
30
+ logits = outputs.logits
31
+ predicted_class_id = logits.argmax(-1).item()
32
+
33
+ # Map ID back to label
34
+ label_config = json.load(open("label_config.json"))
35
+ predicted_label = label_config["id_to_label"][str(predicted_class_id)]
36
+ print(f"Predicted Intent: {predicted_label}")
37
+ ```
38
+
39
+ ## Files
40
+ - `pytorch_model.bin`: Fine-tuned model weights
41
+ - `config.json`: Model configuration
42
+ - `vocab.txt`: Tokenizer vocabulary
43
+ - `label_config.json`: Intent class mappings
44
+ - `README.md`: This file
45
+
46
+ ## Training Configuration
47
+ - Epochs: 6
48
+ - Batch Size: 16
49
+ - Learning Rate: Dynamic tuning from 1e-05 to 5e-05
50
+ - Optimizer: AdamW with weighted cross-entropy loss
51
+ - Class Imbalance Handling: Random oversampling + weighted loss
52
+
53
+ ## Performance Targets
54
+ - Accuracy: >= 0.80
55
+ - Macro F1: >= 0.80
config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation": "gelu",
3
+ "architectures": [
4
+ "DistilBertForSequenceClassification"
5
+ ],
6
+ "attention_dropout": 0.1,
7
+ "bos_token_id": null,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "dtype": "float32",
11
+ "eos_token_id": null,
12
+ "hidden_dim": 3072,
13
+ "id2label": {
14
+ "0": "LABEL_0",
15
+ "1": "LABEL_1",
16
+ "2": "LABEL_2",
17
+ "3": "LABEL_3"
18
+ },
19
+ "initializer_range": 0.02,
20
+ "label2id": {
21
+ "LABEL_0": 0,
22
+ "LABEL_1": 1,
23
+ "LABEL_2": 2,
24
+ "LABEL_3": 3
25
+ },
26
+ "max_position_embeddings": 512,
27
+ "model_type": "distilbert",
28
+ "n_heads": 12,
29
+ "n_layers": 6,
30
+ "pad_token_id": 0,
31
+ "qa_dropout": 0.1,
32
+ "seq_classif_dropout": 0.2,
33
+ "sinusoidal_pos_embds": false,
34
+ "tie_weights_": true,
35
+ "tie_word_embeddings": true,
36
+ "transformers_version": "5.2.0",
37
+ "use_cache": false,
38
+ "vocab_size": 30522
39
+ }
label_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "label_to_id": {
3
+ "VERSION_PIN": 0,
4
+ "API_MIGRATION": 1,
5
+ "MONKEY_PATCH": 2,
6
+ "FULL_REFACTOR": 3
7
+ },
8
+ "id_to_label": {
9
+ "0": "VERSION_PIN",
10
+ "1": "API_MIGRATION",
11
+ "2": "MONKEY_PATCH",
12
+ "3": "FULL_REFACTOR"
13
+ },
14
+ "intent_classes": [
15
+ "VERSION_PIN",
16
+ "API_MIGRATION",
17
+ "MONKEY_PATCH",
18
+ "FULL_REFACTOR"
19
+ ],
20
+ "model_name": "distilbert-base-uncased",
21
+ "training_date": "2026-03-04T03:36:30.926809"
22
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": "tokenizers",
3
+ "cls_token": "[CLS]",
4
+ "do_lower_case": true,
5
+ "is_local": false,
6
+ "mask_token": "[MASK]",
7
+ "model_max_length": 512,
8
+ "pad_token": "[PAD]",
9
+ "sep_token": "[SEP]",
10
+ "strip_accents": null,
11
+ "tokenize_chinese_chars": true,
12
+ "tokenizer_class": "DistilBertTokenizer",
13
+ "unk_token": "[UNK]"
14
+ }