Add Code Security Analyzer Gradio app
Browse files
app.py
ADDED
|
@@ -0,0 +1,535 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Code Security Risk Analyzer - Gradio UI
|
| 3 |
+
Analyzes code for OWASP Top 10, CWE vulnerabilities.
|
| 4 |
+
Outputs structured security report with vulnerability details, severity, and fixes.
|
| 5 |
+
"""
|
| 6 |
+
import json
|
| 7 |
+
import re
|
| 8 |
+
import torch
|
| 9 |
+
import gradio as gr
|
| 10 |
+
from transformers import (
|
| 11 |
+
AutoTokenizer,
|
| 12 |
+
AutoModelForSequenceClassification,
|
| 13 |
+
T5ForConditionalGeneration,
|
| 14 |
+
)
|
| 15 |
+
|
| 16 |
+
# ============================================================
|
| 17 |
+
# Label Mappings
|
| 18 |
+
# ============================================================
|
| 19 |
+
TARGET_CWES = [
|
| 20 |
+
"safe", "CWE-20", "CWE-22", "CWE-78", "CWE-79", "CWE-89", "CWE-94",
|
| 21 |
+
"CWE-119", "CWE-125", "CWE-190", "CWE-200", "CWE-264", "CWE-269",
|
| 22 |
+
"CWE-276", "CWE-284", "CWE-287", "CWE-310", "CWE-327", "CWE-330",
|
| 23 |
+
"CWE-352", "CWE-362", "CWE-399", "CWE-401", "CWE-416", "CWE-434",
|
| 24 |
+
"CWE-476", "CWE-502", "CWE-601", "CWE-787", "CWE-798", "CWE-918",
|
| 25 |
+
]
|
| 26 |
+
|
| 27 |
+
CWE_NAMES = {
|
| 28 |
+
"safe": "Safe Code",
|
| 29 |
+
"CWE-20": "Improper Input Validation",
|
| 30 |
+
"CWE-22": "Path Traversal",
|
| 31 |
+
"CWE-78": "OS Command Injection",
|
| 32 |
+
"CWE-79": "Cross-Site Scripting (XSS)",
|
| 33 |
+
"CWE-89": "SQL Injection",
|
| 34 |
+
"CWE-94": "Code Injection",
|
| 35 |
+
"CWE-119": "Buffer Overflow",
|
| 36 |
+
"CWE-125": "Out-of-bounds Read",
|
| 37 |
+
"CWE-190": "Integer Overflow",
|
| 38 |
+
"CWE-200": "Information Exposure",
|
| 39 |
+
"CWE-264": "Permissions/Privileges/Access Controls",
|
| 40 |
+
"CWE-269": "Improper Privilege Management",
|
| 41 |
+
"CWE-276": "Incorrect Default Permissions",
|
| 42 |
+
"CWE-284": "Improper Access Control",
|
| 43 |
+
"CWE-287": "Improper Authentication",
|
| 44 |
+
"CWE-310": "Cryptographic Issues",
|
| 45 |
+
"CWE-327": "Broken Crypto Algorithm",
|
| 46 |
+
"CWE-330": "Insufficient Randomness",
|
| 47 |
+
"CWE-352": "Cross-Site Request Forgery (CSRF)",
|
| 48 |
+
"CWE-362": "Race Condition",
|
| 49 |
+
"CWE-399": "Resource Management Errors",
|
| 50 |
+
"CWE-401": "Memory Leak",
|
| 51 |
+
"CWE-416": "Use After Free",
|
| 52 |
+
"CWE-434": "Unrestricted File Upload",
|
| 53 |
+
"CWE-476": "NULL Pointer Dereference",
|
| 54 |
+
"CWE-502": "Insecure Deserialization",
|
| 55 |
+
"CWE-601": "Open Redirect",
|
| 56 |
+
"CWE-787": "Out-of-bounds Write",
|
| 57 |
+
"CWE-798": "Hardcoded Credentials",
|
| 58 |
+
"CWE-918": "Server-Side Request Forgery (SSRF)",
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
CWE_TO_OWASP = {
|
| 62 |
+
"CWE-22": "A01:2021 - Broken Access Control",
|
| 63 |
+
"CWE-200": "A01:2021 - Broken Access Control",
|
| 64 |
+
"CWE-264": "A01:2021 - Broken Access Control",
|
| 65 |
+
"CWE-276": "A01:2021 - Broken Access Control",
|
| 66 |
+
"CWE-284": "A01:2021 - Broken Access Control",
|
| 67 |
+
"CWE-352": "A01:2021 - Broken Access Control",
|
| 68 |
+
"CWE-601": "A01:2021 - Broken Access Control",
|
| 69 |
+
"CWE-269": "A01:2021 - Broken Access Control",
|
| 70 |
+
"CWE-310": "A02:2021 - Cryptographic Failures",
|
| 71 |
+
"CWE-327": "A02:2021 - Cryptographic Failures",
|
| 72 |
+
"CWE-330": "A02:2021 - Cryptographic Failures",
|
| 73 |
+
"CWE-20": "A03:2021 - Injection",
|
| 74 |
+
"CWE-78": "A03:2021 - Injection",
|
| 75 |
+
"CWE-79": "A03:2021 - Injection",
|
| 76 |
+
"CWE-89": "A03:2021 - Injection",
|
| 77 |
+
"CWE-94": "A03:2021 - Injection",
|
| 78 |
+
"CWE-119": "A03:2021 - Injection",
|
| 79 |
+
"CWE-125": "A03:2021 - Injection",
|
| 80 |
+
"CWE-190": "A03:2021 - Injection",
|
| 81 |
+
"CWE-416": "A03:2021 - Injection",
|
| 82 |
+
"CWE-476": "A03:2021 - Injection",
|
| 83 |
+
"CWE-401": "A03:2021 - Injection",
|
| 84 |
+
"CWE-787": "A03:2021 - Injection",
|
| 85 |
+
"CWE-434": "A04:2021 - Insecure Design",
|
| 86 |
+
"CWE-362": "A04:2021 - Insecure Design",
|
| 87 |
+
"CWE-399": "A04:2021 - Insecure Design",
|
| 88 |
+
"CWE-287": "A07:2021 - Identification & Auth Failures",
|
| 89 |
+
"CWE-798": "A07:2021 - Identification & Auth Failures",
|
| 90 |
+
"CWE-502": "A08:2021 - Software & Data Integrity Failures",
|
| 91 |
+
"CWE-918": "A10:2021 - Server-Side Request Forgery",
|
| 92 |
+
}
|
| 93 |
+
|
| 94 |
+
SEVERITY_MAP = {
|
| 95 |
+
"CWE-89": ("Critical", 95), "CWE-78": ("Critical", 93),
|
| 96 |
+
"CWE-94": ("Critical", 92), "CWE-502": ("Critical", 90),
|
| 97 |
+
"CWE-918": ("Critical", 88), "CWE-798": ("Critical", 87),
|
| 98 |
+
"CWE-119": ("High", 85), "CWE-787": ("High", 84),
|
| 99 |
+
"CWE-416": ("High", 83), "CWE-79": ("High", 80),
|
| 100 |
+
"CWE-22": ("High", 78), "CWE-287": ("High", 77),
|
| 101 |
+
"CWE-284": ("High", 76), "CWE-434": ("High", 75),
|
| 102 |
+
"CWE-125": ("Medium", 70), "CWE-190": ("Medium", 68),
|
| 103 |
+
"CWE-352": ("Medium", 67), "CWE-476": ("Medium", 65),
|
| 104 |
+
"CWE-362": ("Medium", 63), "CWE-20": ("Medium", 60),
|
| 105 |
+
"CWE-264": ("Medium", 58), "CWE-269": ("Medium", 57),
|
| 106 |
+
"CWE-310": ("Medium", 65), "CWE-327": ("Medium", 62),
|
| 107 |
+
"CWE-330": ("Medium", 55), "CWE-399": ("Low", 45),
|
| 108 |
+
"CWE-401": ("Low", 42), "CWE-200": ("Low", 40),
|
| 109 |
+
"CWE-276": ("Low", 38), "CWE-601": ("Medium", 55),
|
| 110 |
+
}
|
| 111 |
+
|
| 112 |
+
EXPLANATIONS = {
|
| 113 |
+
"CWE-89": "**SQL Injection** means an attacker can manipulate your database queries by injecting malicious SQL code through user inputs. This could let them steal, modify, or delete ALL your data. Imagine someone typing `'; DROP TABLE users; --` into a login form.",
|
| 114 |
+
"CWE-79": "**Cross-Site Scripting (XSS)** lets attackers inject malicious JavaScript into your web pages. When other users visit the page, the script runs in their browser - stealing cookies, session tokens, or redirecting them to fake sites.",
|
| 115 |
+
"CWE-78": "**OS Command Injection** means user input is being passed directly to system commands. An attacker could run ANY command on your server - install malware, steal files, or take complete control.",
|
| 116 |
+
"CWE-94": "**Code Injection** allows attackers to inject and execute arbitrary code in your application. Functions like `eval()`, `exec()`, or dynamic code compilation with untrusted input are the usual culprits.",
|
| 117 |
+
"CWE-119": "**Buffer Overflow** happens when your code writes data beyond the allocated memory buffer. Attackers can exploit this to crash your program, corrupt data, or even execute malicious code.",
|
| 118 |
+
"CWE-125": "**Out-of-bounds Read** means your code reads memory outside the intended buffer. This can leak sensitive data like passwords, encryption keys, or other users' data from memory.",
|
| 119 |
+
"CWE-190": "**Integer Overflow** occurs when an arithmetic operation produces a value too large for the data type. This can cause crashes, infinite loops, or be chained with buffer overflows for code execution.",
|
| 120 |
+
"CWE-200": "**Information Exposure** means sensitive data (API keys, passwords, internal paths, stack traces) is being leaked to unauthorized parties through error messages, logs, or responses.",
|
| 121 |
+
"CWE-264": "**Improper Access Control** means users can access resources or perform actions they shouldn't be authorized for. Missing permission checks let attackers escalate privileges.",
|
| 122 |
+
"CWE-287": "**Authentication Bypass** means the login/identity verification can be circumvented. Attackers could access any account without knowing the password.",
|
| 123 |
+
"CWE-310": "**Cryptographic Issues** - you're using weak, broken, or improperly configured encryption. Data you think is protected may be easily decryptable by attackers.",
|
| 124 |
+
"CWE-352": "**CSRF** tricks authenticated users into performing unwanted actions on your site. An attacker's page could make a user unknowingly transfer money, change their email, or delete their account.",
|
| 125 |
+
"CWE-362": "**Race Condition** means two operations compete for the same resource without proper synchronization. Attackers can exploit the timing window to bypass security checks.",
|
| 126 |
+
"CWE-416": "**Use After Free** - memory is being used after it's been freed. Attackers can manipulate the freed memory to execute arbitrary code or crash the application.",
|
| 127 |
+
"CWE-434": "**Unrestricted File Upload** lets attackers upload malicious files (like web shells) to your server. They could then execute code remotely and take full control.",
|
| 128 |
+
"CWE-476": "**NULL Pointer Dereference** - your code tries to use a pointer/reference that's NULL. This crashes the program and can be exploited for denial-of-service attacks.",
|
| 129 |
+
"CWE-502": "**Insecure Deserialization** means untrusted data is being deserialized without validation. Attackers can craft malicious serialized objects that execute code when deserialized.",
|
| 130 |
+
"CWE-601": "**Open Redirect** lets attackers redirect users from your trusted site to a malicious one. This is commonly used in phishing attacks to steal credentials.",
|
| 131 |
+
"CWE-787": "**Out-of-bounds Write** - data is written outside the intended memory buffer. This is a severe vulnerability that often leads to remote code execution.",
|
| 132 |
+
"CWE-798": "**Hardcoded Credentials** - passwords, API keys, or tokens are embedded directly in the source code. Anyone with access to the code (or the compiled binary) can extract them.",
|
| 133 |
+
"CWE-918": "**SSRF** lets attackers make your server send requests to internal systems. They could scan your network, access internal APIs, or read cloud metadata to steal credentials.",
|
| 134 |
+
"CWE-22": "**Path Traversal** means user input is used in file paths without sanitization. Attackers can use `../` sequences to access any file on the server - config files, passwords, source code.",
|
| 135 |
+
"CWE-269": "**Privilege Escalation** - a user can gain higher privileges than intended. A regular user might become an admin, accessing sensitive operations and data.",
|
| 136 |
+
"CWE-276": "**Incorrect Permissions** - files or resources have permissions that are too permissive. Sensitive files might be world-readable, exposing secrets.",
|
| 137 |
+
"CWE-327": "**Broken Cryptography** - you're using algorithms like MD5 or SHA1 that are cryptographically broken. Attackers can forge hashes or decrypt data.",
|
| 138 |
+
"CWE-330": "**Insufficient Randomness** - security-critical random values (tokens, keys, IDs) are predictable. Attackers can guess session tokens or API keys.",
|
| 139 |
+
"CWE-399": "**Resource Management Issues** - improper handling of system resources can lead to denial of service through resource exhaustion.",
|
| 140 |
+
"CWE-401": "**Memory Leak** - memory is allocated but never freed. Over time, the application uses more and more memory until it crashes.",
|
| 141 |
+
"CWE-20": "**Improper Input Validation** - user input isn't properly checked before use. This is the root cause of many other vulnerabilities like injection and overflow attacks.",
|
| 142 |
+
"CWE-284": "**Broken Access Control** - authorization checks are missing or incorrectly implemented. Users can access other users' data or admin functionality.",
|
| 143 |
+
}
|
| 144 |
+
|
| 145 |
+
# ============================================================
|
| 146 |
+
# Model Loading
|
| 147 |
+
# ============================================================
|
| 148 |
+
CLASSIFIER_ID = "ayshajavd/graphcodebert-vuln-classifier"
|
| 149 |
+
FIXER_ID = "ayshajavd/codet5p-vuln-fixer"
|
| 150 |
+
|
| 151 |
+
print("Loading classifier...")
|
| 152 |
+
try:
|
| 153 |
+
cls_tokenizer = AutoTokenizer.from_pretrained(CLASSIFIER_ID)
|
| 154 |
+
cls_model = AutoModelForSequenceClassification.from_pretrained(CLASSIFIER_ID)
|
| 155 |
+
cls_model.eval()
|
| 156 |
+
CLASSIFIER_LOADED = True
|
| 157 |
+
print("Classifier loaded successfully")
|
| 158 |
+
except Exception as e:
|
| 159 |
+
print(f"Classifier not available yet: {e}")
|
| 160 |
+
cls_tokenizer = AutoTokenizer.from_pretrained("microsoft/graphcodebert-base")
|
| 161 |
+
cls_model = AutoModelForSequenceClassification.from_pretrained(
|
| 162 |
+
"microsoft/graphcodebert-base",
|
| 163 |
+
num_labels=31,
|
| 164 |
+
problem_type="multi_label_classification",
|
| 165 |
+
)
|
| 166 |
+
cls_model.eval()
|
| 167 |
+
CLASSIFIER_LOADED = False
|
| 168 |
+
print("Loaded base GraphCodeBERT as fallback")
|
| 169 |
+
|
| 170 |
+
print("Loading fix generator...")
|
| 171 |
+
try:
|
| 172 |
+
fix_tokenizer = AutoTokenizer.from_pretrained(FIXER_ID)
|
| 173 |
+
fix_model = T5ForConditionalGeneration.from_pretrained(FIXER_ID)
|
| 174 |
+
fix_model.eval()
|
| 175 |
+
FIXER_LOADED = True
|
| 176 |
+
print("Fix generator loaded successfully")
|
| 177 |
+
except Exception as e:
|
| 178 |
+
print(f"Fix generator not available yet: {e}")
|
| 179 |
+
fix_tokenizer = AutoTokenizer.from_pretrained("Salesforce/codet5p-220m")
|
| 180 |
+
fix_model = T5ForConditionalGeneration.from_pretrained("Salesforce/codet5p-220m")
|
| 181 |
+
fix_model.eval()
|
| 182 |
+
FIXER_LOADED = False
|
| 183 |
+
print("Loaded base CodeT5+ as fallback")
|
| 184 |
+
|
| 185 |
+
|
| 186 |
+
def detect_language(code: str) -> str:
|
| 187 |
+
"""Detect programming language from code content."""
|
| 188 |
+
code_lower = code[:500].lower()
|
| 189 |
+
if "<?php" in code_lower:
|
| 190 |
+
return "PHP"
|
| 191 |
+
if "package main" in code_lower and "func " in code_lower:
|
| 192 |
+
return "Go"
|
| 193 |
+
if "#include" in code_lower:
|
| 194 |
+
if "class " in code_lower or "std::" in code_lower or "cout" in code_lower:
|
| 195 |
+
return "C++"
|
| 196 |
+
return "C"
|
| 197 |
+
if "import java" in code_lower or "public class" in code_lower or "public static void main" in code_lower:
|
| 198 |
+
return "Java"
|
| 199 |
+
if re.search(r'\b(const |let |var |function |=>|require\(|module\.exports)', code_lower):
|
| 200 |
+
return "JavaScript"
|
| 201 |
+
if re.search(r'\b(def |import |from |class |self\.|print\()', code_lower):
|
| 202 |
+
return "Python"
|
| 203 |
+
if "fn " in code_lower and "let mut" in code_lower:
|
| 204 |
+
return "Rust"
|
| 205 |
+
return "Unknown"
|
| 206 |
+
|
| 207 |
+
|
| 208 |
+
def generate_fix(code: str, language: str) -> str:
|
| 209 |
+
"""Generate a security fix for vulnerable code."""
|
| 210 |
+
prefix = f"fix {language.lower()}: "
|
| 211 |
+
input_text = prefix + code
|
| 212 |
+
input_ids = fix_tokenizer(
|
| 213 |
+
input_text, return_tensors="pt",
|
| 214 |
+
max_length=512, truncation=True
|
| 215 |
+
).input_ids
|
| 216 |
+
|
| 217 |
+
with torch.no_grad():
|
| 218 |
+
output_ids = fix_model.generate(
|
| 219 |
+
input_ids,
|
| 220 |
+
max_length=512,
|
| 221 |
+
num_beams=5,
|
| 222 |
+
early_stopping=True,
|
| 223 |
+
no_repeat_ngram_size=3,
|
| 224 |
+
)
|
| 225 |
+
|
| 226 |
+
return fix_tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
| 227 |
+
|
| 228 |
+
|
| 229 |
+
def analyze_code(code: str) -> str:
|
| 230 |
+
"""Main analysis function - returns formatted security report."""
|
| 231 |
+
if not code or not code.strip():
|
| 232 |
+
return "Please paste some code to analyze."
|
| 233 |
+
|
| 234 |
+
language = detect_language(code)
|
| 235 |
+
|
| 236 |
+
# Classify
|
| 237 |
+
inputs = cls_tokenizer(
|
| 238 |
+
code, return_tensors="pt",
|
| 239 |
+
max_length=512, truncation=True, padding=True
|
| 240 |
+
)
|
| 241 |
+
|
| 242 |
+
with torch.no_grad():
|
| 243 |
+
outputs = cls_model(**inputs)
|
| 244 |
+
logits = outputs.logits
|
| 245 |
+
probs = torch.sigmoid(logits).squeeze().numpy()
|
| 246 |
+
|
| 247 |
+
# Get detected vulnerabilities (threshold 0.3 for sensitivity)
|
| 248 |
+
threshold = 0.3
|
| 249 |
+
detected = []
|
| 250 |
+
for i, (cwe, prob) in enumerate(zip(TARGET_CWES, probs)):
|
| 251 |
+
if cwe == "safe":
|
| 252 |
+
continue
|
| 253 |
+
if prob > threshold:
|
| 254 |
+
detected.append((cwe, float(prob)))
|
| 255 |
+
|
| 256 |
+
# Sort by confidence
|
| 257 |
+
detected.sort(key=lambda x: x[1], reverse=True)
|
| 258 |
+
|
| 259 |
+
safe_prob = float(probs[0])
|
| 260 |
+
|
| 261 |
+
# Build report
|
| 262 |
+
report = []
|
| 263 |
+
report.append("# Code Security Analysis Report\n")
|
| 264 |
+
report.append(f"**Language Detected:** {language}")
|
| 265 |
+
model_status = "Trained Model" if CLASSIFIER_LOADED else "Base Model (untrained - results are for demo only)"
|
| 266 |
+
fix_status = "Trained" if FIXER_LOADED else "Base Model"
|
| 267 |
+
report.append(f"**Model Status:** {model_status}")
|
| 268 |
+
report.append(f"**Fix Generator:** {fix_status}\n")
|
| 269 |
+
|
| 270 |
+
if not detected:
|
| 271 |
+
overall_score = max(0, int(100 * safe_prob))
|
| 272 |
+
report.append(f"## No Vulnerabilities Detected")
|
| 273 |
+
report.append(f"**Overall Risk Score:** {100 - overall_score}/100 (Low Risk)")
|
| 274 |
+
report.append(f"**Safe Code Confidence:** {safe_prob:.1%}\n")
|
| 275 |
+
report.append("The analyzed code appears to be safe based on our detection model. "
|
| 276 |
+
"However, always review code manually and use additional static analysis tools.")
|
| 277 |
+
return "\n".join(report)
|
| 278 |
+
|
| 279 |
+
# Calculate overall risk score
|
| 280 |
+
max_severity = max(SEVERITY_MAP.get(cwe, ("Low", 30))[1] for cwe, _ in detected)
|
| 281 |
+
avg_confidence = sum(p for _, p in detected) / len(detected)
|
| 282 |
+
overall_risk = min(100, int(max_severity * avg_confidence * 1.2))
|
| 283 |
+
|
| 284 |
+
if overall_risk >= 80:
|
| 285 |
+
risk_level = "Critical"
|
| 286 |
+
elif overall_risk >= 60:
|
| 287 |
+
risk_level = "High"
|
| 288 |
+
elif overall_risk >= 40:
|
| 289 |
+
risk_level = "Medium"
|
| 290 |
+
else:
|
| 291 |
+
risk_level = "Low"
|
| 292 |
+
|
| 293 |
+
report.append(f"## {len(detected)} Vulnerability(ies) Detected\n")
|
| 294 |
+
report.append(f"**Overall Risk Score:** {overall_risk}/100 ({risk_level})")
|
| 295 |
+
report.append(f"**Safe Code Probability:** {safe_prob:.1%}\n")
|
| 296 |
+
|
| 297 |
+
report.append("---\n")
|
| 298 |
+
|
| 299 |
+
# Detail each vulnerability
|
| 300 |
+
for idx, (cwe, confidence) in enumerate(detected, 1):
|
| 301 |
+
name = CWE_NAMES.get(cwe, cwe)
|
| 302 |
+
owasp = CWE_TO_OWASP.get(cwe, "N/A")
|
| 303 |
+
severity, score = SEVERITY_MAP.get(cwe, ("Medium", 50))
|
| 304 |
+
explanation = EXPLANATIONS.get(cwe, "This vulnerability could pose a security risk to your application.")
|
| 305 |
+
exploit_likelihood = min(100, int(confidence * score))
|
| 306 |
+
|
| 307 |
+
report.append(f"### {idx}. {name}")
|
| 308 |
+
report.append(f"| Property | Value |")
|
| 309 |
+
report.append(f"|----------|-------|")
|
| 310 |
+
report.append(f"| **CWE ID** | {cwe} |")
|
| 311 |
+
report.append(f"| **OWASP Category** | {owasp} |")
|
| 312 |
+
report.append(f"| **Severity** | {severity} ({score}/100) |")
|
| 313 |
+
report.append(f"| **Detection Confidence** | {confidence:.1%} |")
|
| 314 |
+
report.append(f"| **Exploit Likelihood** | {exploit_likelihood}% |")
|
| 315 |
+
report.append(f"\n**Why This Is Dangerous:**\n{explanation}\n")
|
| 316 |
+
|
| 317 |
+
# Attack chain analysis
|
| 318 |
+
if len(detected) > 1:
|
| 319 |
+
report.append("---\n")
|
| 320 |
+
report.append("## Attack Chain Analysis\n")
|
| 321 |
+
report.append("Multiple vulnerabilities can be chained together for a more severe attack:\n")
|
| 322 |
+
|
| 323 |
+
chain_steps = []
|
| 324 |
+
has_input = any(c in ["CWE-20", "CWE-89", "CWE-79", "CWE-78", "CWE-94"] for c, _ in detected)
|
| 325 |
+
has_access = any(c in ["CWE-264", "CWE-269", "CWE-284", "CWE-287"] for c, _ in detected)
|
| 326 |
+
has_data = any(c in ["CWE-200", "CWE-22", "CWE-125"] for c, _ in detected)
|
| 327 |
+
has_exec = any(c in ["CWE-119", "CWE-416", "CWE-787", "CWE-502"] for c, _ in detected)
|
| 328 |
+
|
| 329 |
+
step = 1
|
| 330 |
+
if has_input:
|
| 331 |
+
chain_steps.append(f"{step}. **Initial Access** - Exploit input validation weakness to inject malicious payload")
|
| 332 |
+
step += 1
|
| 333 |
+
if has_access:
|
| 334 |
+
chain_steps.append(f"{step}. **Privilege Escalation** - Bypass access controls to gain elevated permissions")
|
| 335 |
+
step += 1
|
| 336 |
+
if has_data:
|
| 337 |
+
chain_steps.append(f"{step}. **Data Exfiltration** - Read sensitive files or memory to extract secrets")
|
| 338 |
+
step += 1
|
| 339 |
+
if has_exec:
|
| 340 |
+
chain_steps.append(f"{step}. **Remote Code Execution** - Exploit memory corruption or deserialization for code execution")
|
| 341 |
+
step += 1
|
| 342 |
+
|
| 343 |
+
if chain_steps:
|
| 344 |
+
report.append("\n".join(chain_steps))
|
| 345 |
+
else:
|
| 346 |
+
vuln_names = [CWE_NAMES.get(c, c) for c, _ in detected[:3]]
|
| 347 |
+
report.append(f"The combination of **{' + '.join(vuln_names)}** increases the attack surface. "
|
| 348 |
+
f"An attacker could exploit one vulnerability to amplify the impact of another.")
|
| 349 |
+
|
| 350 |
+
# Generate fix
|
| 351 |
+
report.append("\n---\n")
|
| 352 |
+
report.append("## Suggested Secure Fix\n")
|
| 353 |
+
try:
|
| 354 |
+
fix = generate_fix(code, language)
|
| 355 |
+
if fix and fix.strip():
|
| 356 |
+
report.append(f"```{language.lower()}\n{fix}\n```\n")
|
| 357 |
+
else:
|
| 358 |
+
report.append("*Fix generation returned empty result. Please review manually.*\n")
|
| 359 |
+
except Exception as e:
|
| 360 |
+
report.append(f"*Fix generation failed: {str(e)}. Please review manually.*\n")
|
| 361 |
+
|
| 362 |
+
report.append("---\n")
|
| 363 |
+
report.append("*This report was generated by an AI model. Always verify findings with manual code review and additional security tools.*")
|
| 364 |
+
|
| 365 |
+
return "\n".join(report)
|
| 366 |
+
|
| 367 |
+
|
| 368 |
+
# ============================================================
|
| 369 |
+
# Example Code Snippets
|
| 370 |
+
# ============================================================
|
| 371 |
+
EXAMPLES = [
|
| 372 |
+
["""import sqlite3
|
| 373 |
+
|
| 374 |
+
def get_user(username):
|
| 375 |
+
conn = sqlite3.connect('users.db')
|
| 376 |
+
cursor = conn.cursor()
|
| 377 |
+
query = f"SELECT * FROM users WHERE username = '{username}'"
|
| 378 |
+
cursor.execute(query)
|
| 379 |
+
return cursor.fetchone()
|
| 380 |
+
|
| 381 |
+
def login(request):
|
| 382 |
+
username = request.form['username']
|
| 383 |
+
password = request.form['password']
|
| 384 |
+
user = get_user(username)
|
| 385 |
+
if user and user[2] == password:
|
| 386 |
+
return "Login successful"
|
| 387 |
+
return "Login failed"
|
| 388 |
+
"""],
|
| 389 |
+
["""#include <stdio.h>
|
| 390 |
+
#include <string.h>
|
| 391 |
+
|
| 392 |
+
void process_input(char *user_input) {
|
| 393 |
+
char buffer[64];
|
| 394 |
+
strcpy(buffer, user_input);
|
| 395 |
+
printf("Processed: %s\\n", buffer);
|
| 396 |
+
}
|
| 397 |
+
|
| 398 |
+
int main(int argc, char *argv[]) {
|
| 399 |
+
if (argc > 1) {
|
| 400 |
+
process_input(argv[1]);
|
| 401 |
+
}
|
| 402 |
+
return 0;
|
| 403 |
+
}
|
| 404 |
+
"""],
|
| 405 |
+
["""const express = require('express');
|
| 406 |
+
const app = express();
|
| 407 |
+
|
| 408 |
+
app.get('/search', (req, res) => {
|
| 409 |
+
const query = req.query.q;
|
| 410 |
+
res.send(`<h1>Search Results for: ${query}</h1>
|
| 411 |
+
<p>No results found for "${query}"</p>`);
|
| 412 |
+
});
|
| 413 |
+
|
| 414 |
+
app.get('/profile/:id', (req, res) => {
|
| 415 |
+
const userId = req.params.id;
|
| 416 |
+
db.query('SELECT * FROM users WHERE id = ' + userId, (err, user) => {
|
| 417 |
+
res.send(`<h2>${user.name}</h2><p>${user.bio}</p>`);
|
| 418 |
+
});
|
| 419 |
+
});
|
| 420 |
+
"""],
|
| 421 |
+
["""import requests
|
| 422 |
+
import hashlib
|
| 423 |
+
|
| 424 |
+
API_KEY = "sk-proj-abc123def456ghi789"
|
| 425 |
+
DB_PASSWORD = "admin123"
|
| 426 |
+
SECRET_KEY = "super_secret_key_2024"
|
| 427 |
+
|
| 428 |
+
def connect_to_api():
|
| 429 |
+
headers = {"Authorization": f"Bearer {API_KEY}"}
|
| 430 |
+
response = requests.get("https://api.example.com/data", headers=headers)
|
| 431 |
+
return response.json()
|
| 432 |
+
|
| 433 |
+
def hash_password(password):
|
| 434 |
+
return hashlib.md5(password.encode()).hexdigest()
|
| 435 |
+
|
| 436 |
+
def verify_admin(token):
|
| 437 |
+
if token == SECRET_KEY:
|
| 438 |
+
return True
|
| 439 |
+
return False
|
| 440 |
+
"""],
|
| 441 |
+
["""import sqlite3
|
| 442 |
+
from hashlib import sha256
|
| 443 |
+
import hmac
|
| 444 |
+
import secrets
|
| 445 |
+
|
| 446 |
+
def get_user(username):
|
| 447 |
+
conn = sqlite3.connect('users.db')
|
| 448 |
+
cursor = conn.cursor()
|
| 449 |
+
cursor.execute("SELECT * FROM users WHERE username = ?", (username,))
|
| 450 |
+
return cursor.fetchone()
|
| 451 |
+
|
| 452 |
+
def hash_password(password, salt=None):
|
| 453 |
+
if salt is None:
|
| 454 |
+
salt = secrets.token_hex(16)
|
| 455 |
+
hashed = sha256((salt + password).encode()).hexdigest()
|
| 456 |
+
return f"{salt}:{hashed}"
|
| 457 |
+
|
| 458 |
+
def verify_password(password, stored_hash):
|
| 459 |
+
salt, expected_hash = stored_hash.split(':')
|
| 460 |
+
actual_hash = sha256((salt + password).encode()).hexdigest()
|
| 461 |
+
return hmac.compare_digest(actual_hash, expected_hash)
|
| 462 |
+
"""],
|
| 463 |
+
]
|
| 464 |
+
|
| 465 |
+
# ============================================================
|
| 466 |
+
# Gradio UI
|
| 467 |
+
# ============================================================
|
| 468 |
+
with gr.Blocks(
|
| 469 |
+
title="Code Security Risk Analyzer",
|
| 470 |
+
theme=gr.themes.Soft(),
|
| 471 |
+
css="""
|
| 472 |
+
.report-output { font-size: 14px; }
|
| 473 |
+
.gradio-container { max-width: 1200px; margin: auto; }
|
| 474 |
+
"""
|
| 475 |
+
) as demo:
|
| 476 |
+
gr.Markdown("""
|
| 477 |
+
# AI-Powered Code Security Risk Analyzer
|
| 478 |
+
### Detect OWASP Top 10, CWE vulnerabilities, and get secure fixes
|
| 479 |
+
|
| 480 |
+
Paste any code snippet (Python, JavaScript, Java, C, C++, PHP, Go) and get a comprehensive security audit.
|
| 481 |
+
|
| 482 |
+
**Powered by:** GraphCodeBERT (vulnerability detection) + CodeT5+ (fix generation)
|
| 483 |
+
""")
|
| 484 |
+
|
| 485 |
+
with gr.Row():
|
| 486 |
+
with gr.Column(scale=1):
|
| 487 |
+
code_input = gr.Code(
|
| 488 |
+
label="Paste Your Code Here",
|
| 489 |
+
language="python",
|
| 490 |
+
lines=20,
|
| 491 |
+
)
|
| 492 |
+
analyze_btn = gr.Button("Analyze Security", variant="primary", size="lg")
|
| 493 |
+
|
| 494 |
+
with gr.Column(scale=1):
|
| 495 |
+
report_output = gr.Markdown(
|
| 496 |
+
label="Security Report",
|
| 497 |
+
elem_classes=["report-output"],
|
| 498 |
+
)
|
| 499 |
+
|
| 500 |
+
gr.Examples(
|
| 501 |
+
examples=EXAMPLES,
|
| 502 |
+
inputs=[code_input],
|
| 503 |
+
label="Example Code Snippets (click to load)",
|
| 504 |
+
)
|
| 505 |
+
|
| 506 |
+
analyze_btn.click(
|
| 507 |
+
fn=analyze_code,
|
| 508 |
+
inputs=[code_input],
|
| 509 |
+
outputs=[report_output],
|
| 510 |
+
)
|
| 511 |
+
|
| 512 |
+
gr.Markdown("""
|
| 513 |
+
---
|
| 514 |
+
### Vulnerability Categories Covered
|
| 515 |
+
|
| 516 |
+
| OWASP Category | Vulnerabilities |
|
| 517 |
+
|---|---|
|
| 518 |
+
| **A01: Broken Access Control** | Path Traversal, IDOR, Missing Auth, Privilege Escalation, CSRF, Open Redirect |
|
| 519 |
+
| **A02: Cryptographic Failures** | Weak Crypto (MD5/SHA1), Insufficient Randomness, Broken Algorithms |
|
| 520 |
+
| **A03: Injection** | SQL Injection, XSS, Command Injection, Code Injection, Buffer Overflow |
|
| 521 |
+
| **A04: Insecure Design** | Race Conditions, Unrestricted Upload, Resource Management |
|
| 522 |
+
| **A07: Auth Failures** | Improper Authentication, Hardcoded Credentials |
|
| 523 |
+
| **A08: Integrity Failures** | Insecure Deserialization |
|
| 524 |
+
| **A10: SSRF** | Server-Side Request Forgery |
|
| 525 |
+
|
| 526 |
+
**Languages:** Python, JavaScript, Java, C, C++, PHP, Go
|
| 527 |
+
|
| 528 |
+
**Models:** [GraphCodeBERT](https://huggingface.co/microsoft/graphcodebert-base) (detection) | [CodeT5+](https://huggingface.co/Salesforce/codet5p-220m) (fix generation)
|
| 529 |
+
|
| 530 |
+
**Dataset:** [code-security-vulnerability-dataset](https://huggingface.co/datasets/ayshajavd/code-security-vulnerability-dataset) - 175K samples from BigVul, PrimeVul, CyberNative DPO
|
| 531 |
+
""")
|
| 532 |
+
|
| 533 |
+
|
| 534 |
+
if __name__ == "__main__":
|
| 535 |
+
demo.launch()
|