cisco-ehsan commited on
Commit
e493cdf
·
verified ·
1 Parent(s): a58ba75

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +76 -5
README.md CHANGED
@@ -12,10 +12,81 @@ It is built on top of **SecureBERT 2.0**.
12
  ## Usage Example
13
  ```python
14
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
15
- tokenizer = AutoTokenizer.from_pretrained('CiscoAITeam/SecureBERT2.0-code-vuln-detection')
16
- model = AutoModelForSequenceClassification.from_pretrained('CiscoAITeam/SecureBERT2.0-code-vuln-detection')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  ```
 
18
 
19
- ## Notes
20
- - The model was fine-tuned for vulnerability classification in code.
21
- - Ensure that the tokenizer matches the one used during fine-tuning.
 
 
 
 
 
 
12
  ## Usage Example
13
  ```python
14
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
15
+ import torch
16
+
17
+ # Path to your converted Hugging Face model folder
18
+ model_dir = "CiscoAITeam/SecureBERT2.0-code-vuln-detection"
19
+
20
+ # Load tokenizer and model
21
+ tokenizer = AutoTokenizer.from_pretrained(model_dir)
22
+ model = AutoModelForSequenceClassification.from_pretrained(model_dir)
23
+
24
+ # Put model in evaluation mode
25
+ model.eval()
26
+
27
+ # Example input code snippet (string)
28
+ example_code = """
29
+ static void FUNC_0(WmallDecodeCtx *VAR_0, int VAR_1, int VAR_2, int16_t VAR_3, int16_t VAR_4)
30
+ {
31
+ int16_t icoef;
32
+ int VAR_5 = VAR_0->cdlms[VAR_1][VAR_2].VAR_5;
33
+ int16_t range = 1 << (VAR_0->bits_per_sample - 1);
34
+ int VAR_6 = VAR_0->bits_per_sample > 16 ? 4 : 2;
35
+ if (VAR_3 > VAR_4) {
36
+ for (icoef = 0; icoef < VAR_0->cdlms[VAR_1][VAR_2].order; icoef++)
37
+ VAR_0->cdlms[VAR_1][VAR_2].coefs[icoef] +=
38
+ VAR_0->cdlms[VAR_1][VAR_2].lms_updates[icoef + VAR_5];
39
+ } else {
40
+ for (icoef = 0; icoef < VAR_0->cdlms[VAR_1][VAR_2].order; icoef++)
41
+ VAR_0->cdlms[VAR_1][VAR_2].coefs[icoef] -=
42
+ VAR_0->cdlms[VAR_1][VAR_2].lms_updates[icoef];
43
+ }
44
+ VAR_0->cdlms[VAR_1][VAR_2].VAR_5--;
45
+ VAR_0->cdlms[VAR_1][VAR_2].lms_prevvalues[VAR_5] = av_clip(VAR_3, -range, range - 1);
46
+ if (VAR_3 > VAR_4)
47
+ VAR_0->cdlms[VAR_1][VAR_2].lms_updates[VAR_5] = VAR_0->update_speed[VAR_1];
48
+ else if (VAR_3 < VAR_4)
49
+ VAR_0->cdlms[VAR_1][VAR_2].lms_updates[VAR_5] = -VAR_0->update_speed[VAR_1];
50
+
51
+ VAR_0->cdlms[VAR_1][VAR_2].lms_updates[VAR_5 + VAR_0->cdlms[VAR_1][VAR_2].order >> 4] >>= 2;
52
+ VAR_0->cdlms[VAR_1][VAR_2].lms_updates[VAR_5 + VAR_0->cdlms[VAR_1][VAR_2].order >> 3] >>= 1;
53
+
54
+ if (VAR_0->cdlms[VAR_1][VAR_2].VAR_5 == 0) {
55
+
56
+ memcpy(VAR_0->cdlms[VAR_1][VAR_2].lms_prevvalues + VAR_0->cdlms[VAR_1][VAR_2].order,
57
+ VAR_0->cdlms[VAR_1][VAR_2].lms_prevvalues,
58
+ VAR_6 * VAR_0->cdlms[VAR_1][VAR_2].order);
59
+ memcpy(VAR_0->cdlms[VAR_1][VAR_2].lms_updates + VAR_0->cdlms[VAR_1][VAR_2].order,
60
+ VAR_0->cdlms[VAR_1][VAR_2].lms_updates,
61
+ VAR_6 * VAR_0->cdlms[VAR_1][VAR_2].order);
62
+ VAR_0->cdlms[VAR_1][VAR_2].VAR_5 = VAR_0->cdlms[VAR_1][VAR_2].order;
63
+ }
64
+ }
65
+
66
+
67
+
68
+ """
69
+
70
+ # Tokenize input
71
+ inputs = tokenizer(example_code, return_tensors="pt", truncation=True, padding=True)
72
+
73
+ # Run model
74
+ with torch.no_grad():
75
+ outputs = model(**inputs)
76
+ logits = outputs.logits
77
+
78
+ # Get predicted class
79
+ predicted_class = torch.argmax(logits, dim=-1).item()
80
+
81
+ print(f"Predicted class ID: {predicted_class}")
82
  ```
83
+ Reference:
84
 
85
+ ```
86
+ @article{aghaei2025securebert,
87
+ title={SecureBERT 2.0: Advanced Language Model for Cybersecurity Intelligence},
88
+ author={Aghaei, Ehsan and Jain, Sarthak and Arun, Prashanth and Sambamoorthy, Arjun},
89
+ journal={arXiv preprint arXiv:2510.00240},
90
+ year={2025}
91
+ }
92
+ ```