cisco-ai
/

SecureBERT2.0-code-vuln-detection

@@ -12,10 +12,81 @@ It is built on top of **SecureBERT 2.0**.
 ## Usage Example
 ```python
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
-tokenizer = AutoTokenizer.from_pretrained('CiscoAITeam/SecureBERT2.0-code-vuln-detection')
-model = AutoModelForSequenceClassification.from_pretrained('CiscoAITeam/SecureBERT2.0-code-vuln-detection')
 ```
-## Notes
-- The model was fine-tuned for vulnerability classification in code.
-- Ensure that the tokenizer matches the one used during fine-tuning.

 ## Usage Example
 ```python
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import torch
+# Path to your converted Hugging Face model folder
+model_dir = "CiscoAITeam/SecureBERT2.0-code-vuln-detection"
+# Load tokenizer and model
+tokenizer = AutoTokenizer.from_pretrained(model_dir)
+model = AutoModelForSequenceClassification.from_pretrained(model_dir)
+# Put model in evaluation mode
+model.eval()
+# Example input code snippet (string)
+example_code = """
+  static void FUNC_0(WmallDecodeCtx *VAR_0, int VAR_1, int VAR_2, int16_t VAR_3, int16_t VAR_4)
+{
+    int16_t icoef;
+    int VAR_5 = VAR_0->cdlms[VAR_1][VAR_2].VAR_5;
+    int16_t range = 1 << (VAR_0->bits_per_sample - 1);
+    int VAR_6 = VAR_0->bits_per_sample > 16 ? 4 : 2;
+    if (VAR_3 > VAR_4) {
+        for (icoef = 0; icoef < VAR_0->cdlms[VAR_1][VAR_2].order; icoef++)
+            VAR_0->cdlms[VAR_1][VAR_2].coefs[icoef] +=
+                VAR_0->cdlms[VAR_1][VAR_2].lms_updates[icoef + VAR_5];
+    } else {
+        for (icoef = 0; icoef < VAR_0->cdlms[VAR_1][VAR_2].order; icoef++)
+            VAR_0->cdlms[VAR_1][VAR_2].coefs[icoef] -=
+                VAR_0->cdlms[VAR_1][VAR_2].lms_updates[icoef];
+    }
+    VAR_0->cdlms[VAR_1][VAR_2].VAR_5--;
+    VAR_0->cdlms[VAR_1][VAR_2].lms_prevvalues[VAR_5] = av_clip(VAR_3, -range, range - 1);
+    if (VAR_3 > VAR_4)
+        VAR_0->cdlms[VAR_1][VAR_2].lms_updates[VAR_5] = VAR_0->update_speed[VAR_1];
+    else if (VAR_3 < VAR_4)
+        VAR_0->cdlms[VAR_1][VAR_2].lms_updates[VAR_5] = -VAR_0->update_speed[VAR_1];
+    VAR_0->cdlms[VAR_1][VAR_2].lms_updates[VAR_5 + VAR_0->cdlms[VAR_1][VAR_2].order >> 4] >>= 2;
+    VAR_0->cdlms[VAR_1][VAR_2].lms_updates[VAR_5 + VAR_0->cdlms[VAR_1][VAR_2].order >> 3] >>= 1;
+    if (VAR_0->cdlms[VAR_1][VAR_2].VAR_5 == 0) {
+        memcpy(VAR_0->cdlms[VAR_1][VAR_2].lms_prevvalues + VAR_0->cdlms[VAR_1][VAR_2].order,
+               VAR_0->cdlms[VAR_1][VAR_2].lms_prevvalues,
+               VAR_6 * VAR_0->cdlms[VAR_1][VAR_2].order);
+        memcpy(VAR_0->cdlms[VAR_1][VAR_2].lms_updates + VAR_0->cdlms[VAR_1][VAR_2].order,
+               VAR_0->cdlms[VAR_1][VAR_2].lms_updates,
+               VAR_6 * VAR_0->cdlms[VAR_1][VAR_2].order);
+        VAR_0->cdlms[VAR_1][VAR_2].VAR_5 = VAR_0->cdlms[VAR_1][VAR_2].order;
+    }
+}
+"""
+# Tokenize input
+inputs = tokenizer(example_code, return_tensors="pt", truncation=True, padding=True)
+# Run model
+with torch.no_grad():
+    outputs = model(**inputs)
+    logits = outputs.logits
+# Get predicted class
+predicted_class = torch.argmax(logits, dim=-1).item()
+print(f"Predicted class ID: {predicted_class}")
 ```
+Reference:
+```
+@article{aghaei2025securebert,
+  title={SecureBERT 2.0: Advanced Language Model for Cybersecurity Intelligence},
+  author={Aghaei, Ehsan and Jain, Sarthak and Arun, Prashanth and Sambamoorthy, Arjun},
+  journal={arXiv preprint arXiv:2510.00240},
+  year={2025}
+}
+```