Spaces:

CIRCL
/

CWE-PARENT-patch-Vulnerability-Patch-Classification-Roberta-Base

Runtime error

App Files Files Community

elselse commited on Sep 1, 2025

Commit

4c74307

verified ·

1 Parent(s): 0bfd012

Update app.py

Browse files

Files changed (1) hide show

app.py +48 -65

app.py CHANGED Viewed

@@ -1,70 +1,53 @@
 import gradio as gr
-import json
-from transformers import pipeline
 import torch
-import random
-import numpy as np
-torch.manual_seed(42)
-random.seed(42)
-np.random.seed(42)
-torch.use_deterministic_algorithms(True)
-# Load Hugging Face model (text classification)
-classifier = pipeline(
-    task="text-classification",
-    model="CIRCL/cwe-parent-vulnerability-classification-roberta-base",
-    top_k=None
-)
-classifier.model.eval()
-# Load child-to-parent mapping
 with open("child_to_parent_mapping.json", "r") as f:
-    child_to_parent = json.load(f)
-def predict_cwe(commit_message: str):
-    """
-    Predict CWE(s) from a commit message and map to parent CWEs if applicable.
-    """
-    results = classifier(commit_message)[0]
-    sorted_results = sorted(results, key=lambda x: x["score"], reverse=True)
-    threshold = 0.2
-    filtered_results = [item for item in sorted_results if item["score"] >= threshold]
-    parent_scores = {}
-    for item in filtered_results:
-        label = item["label"].replace("CWEEEE_", "")
-        score = float(item["score"])
-        parent_label = child_to_parent.get(label, label)
-        if parent_label in parent_scores:
-            parent_scores[parent_label] += score
-        else:
-            parent_scores[parent_label] = score
-    # Sort by score descending and round
-    sorted_parent_scores = sorted(parent_scores.items(), key=lambda x: x[1], reverse=True)
-    return {f"CWE-{k}": round(v, 4) for k, v in sorted_parent_scores[:5]}
-# Gradio UI
-demo = gr.Interface(
-    fn=predict_cwe,
-    inputs=gr.Textbox(lines=3, placeholder="Enter your commit message here..."),
-    outputs=gr.Label(num_top_classes=5),
-    title="CWE Prediction from Commit Message",
-    description="This tool uses a fine-tuned model to predict CWE categories from Git commit messages. "
-                "Predicted child CWEs are mapped to their parent CWEs if applicable.",
-    examples=[
-        ["Fixed buffer overflow in input parsing"],
-        ["SQL injection possible in login flow"],
-        ["Improved input validation to prevent XSS"],
-    ]
-)
-if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
 import torch
+import json
+import base64
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+model_path = "CIRCL/cwe-parent-vulnerability-classification-roberta-base"
+tokenizer = AutoTokenizer.from_pretrained(model_path)
+model = AutoModelForSequenceClassification.from_pretrained(model_path)
+model.eval()
 with open("child_to_parent_mapping.json", "r") as f:
+    child_to_ancestor = json.load(f)
+with open(f"{model_path}/config.json", "r") as f:
+    config = json.load(f)
+    id2label = config["id2label"]
+# Fonction d'extraction pour simuler une entrée formatée
+def extract_commit_text_hg_style(input_text):
+    # Ici, on pourrait simuler un vrai patch ou commit. Pour l’instant, on prend l’entrée brute.
+    return input_text.strip()
+# Fonction Gradio de prédiction
+def predict_ancestors(input_text):
+    text = extract_commit_text_hg_style(input_text)
+    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding="max_length", max_length=512)
+    with torch.no_grad():
+        outputs = model(**inputs)
+        logits = outputs.logits
+        probs = torch.softmax(logits, dim=-1)
+    topk = torch.topk(probs, k=5)
+    top_ids = topk.indices[0].tolist()
+    top_scores = topk.values[0].tolist()
+    results = []
+    for i, (idx, score) in enumerate(zip(top_ids, top_scores), 1):
+        cwe_child = id2label[str(idx)]
+        ancestor = child_to_ancestor.get(cwe_child, "N/A")
+        results.append(f"{i}. CWE-{cwe_child} (ancestor: CWE-{ancestor}) - {score:.4f}")
+    return results
+# Interface Gradio
+gr.Interface(
+    fn=predict_ancestors,
+    inputs=gr.Textbox(label="Commit message or patch (e.g., 'hg')"),
+    outputs=gr.outputs.Textbox(label="Top 5 Predicted CWE Ancestors"),
+    title="CWE Ancestor Predictor",
+    description="Entrez un message de commit ou un patch. Le modèle prédit les 5 CWE enfants les plus probables et affiche leurs ancêtres."
+).launch()