Spaces:

CIRCL
/

CWE-PARENT-patch-Vulnerability-Patch-Classification-Roberta-Base

Sleeping

App Files Files Community

elselse commited on Sep 1, 2025

Commit

bcd6411

verified ·

1 Parent(s): c364fcb

Update app.py

Browse files

Files changed (1) hide show

app.py +52 -40

app.py CHANGED Viewed

@@ -1,46 +1,58 @@
 import gradio as gr
-import torch
 import json
-from transformers import AutoTokenizer, AutoModelForSequenceClassification
-model_path = "CIRCL/cwe-parent-vulnerability-classification-roberta-base"
-tokenizer = AutoTokenizer.from_pretrained(model_path)
-model = AutoModelForSequenceClassification.from_pretrained(model_path)
-model.eval()
-with open("child_to_parent_mapping.json", "r") as f:
-    child_to_ancestor = json.load(f)
-id2label = model.config.id2label
-def extract_commit_text_hg_style(input_text):
-    return input_text.strip()
-def predict_ancestors(input_text):
-    text = extract_commit_text_hg_style(input_text)
-    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding="max_length", max_length=512)
-    with torch.no_grad():
-        outputs = model(**inputs)
-        logits = outputs.logits
-        probs = torch.softmax(logits, dim=-1)
-    topk = torch.topk(probs, k=5)
-    top_ids = topk.indices[0].tolist()
-    top_scores = topk.values[0].tolist()
-    results = []
-    for i, (idx, score) in enumerate(zip(top_ids, top_scores), 1):
-        cwe_child = id2label[str(idx)]
-        ancestor = child_to_ancestor.get(cwe_child, "N/A")
-        results.append(f"{i}. CWE-{cwe_child} (ancestor: CWE-{ancestor}) - {score:.4f}")
-    return "\n".join(results)
-gr.Interface(
-    fn=predict_ancestors,
-    inputs=gr.Textbox(label="Commit message or patch (e.g., 'hg')"),
-    outputs=gr.Textbox(label="Top 5 Predicted CWE Ancestors"),
-    title="CWE Ancestor Predictor",
-    description="Entrez un message de commit ou un patch. Le modèle prédit les 5 CWE enfants les plus probables et affiche leurs ancêtres."
-).launch()

 import gradio as gr
 import json
+from transformers import pipeline
+import torch
+import random
+import numpy as np
+torch.manual_seed(42)
+random.seed(42)
+np.random.seed(42)
+torch.use_deterministic_algorithms(True)
+# Load Hugging Face model (text classification)
+classifier = pipeline(
+    task="text-classification",
+    model="CIRCL/cwe-parent-vulnerability-classification-roberta-base",
+    top_k=None
+)
+classifier.model.eval()
+# Load child-to-parent mapping
+with open("child_to_parent_mapping.json", "r") as f:
+    child_to_parent = json.load(f)
+def predict_cwe(commit_message: str):
+    """
+    Predict CWE(s) from a commit message and map to parent CWEs.
+    """
+    results = classifier(commit_message)[0]
+    sorted_results = sorted(results, key=lambda x: x["score"], reverse=True)
+    threshold = 0.2
+    filtered_results = [item for item in sorted_results if item["score"] >= threshold]
+    # Map predictions to parent CWE (if available)
+    mapped_results = {}
+    for item in sorted_results[:5]:
+        mapped_results[item["label"]] = round(float(item["score"]), 4)
+    return mapped_results
+# Gradio UI
+demo = gr.Interface(
+    fn=predict_cwe,
+    inputs=gr.Textbox(lines=3, placeholder="Enter your commit message here..."),
+    outputs=gr.Label(num_top_classes=5),
+    title="CWE Prediction from Commit Message",
+    description="This tool uses a fine-tuned model to predict CWE categories from Git commit messages. "
+                "Predicted child CWEs are mapped to their parent CWEs if applicable.",
+    examples=[
+        ["Fixed buffer overflow in input parsing"],
+        ["SQL injection possible in login flow"],
+        ["Improved input validation to prevent XSS"],
+    ]
+)
+if __name__ == "__main__":
+    demo.launch()