Spaces:
Sleeping
Sleeping
File size: 23,148 Bytes
11d02f9 7336b37 11d02f9 bde8632 11d02f9 7336b37 11d02f9 bde8632 11d02f9 bde8632 11d02f9 7336b37 11d02f9 7336b37 11d02f9 bde8632 11d02f9 7336b37 11d02f9 bde8632 11d02f9 bde8632 11d02f9 bde8632 11d02f9 bde8632 11d02f9 bde8632 11d02f9 7336b37 bde8632 11d02f9 7336b37 11d02f9 bde8632 11d02f9 bde8632 11d02f9 bde8632 11d02f9 bde8632 7336b37 bde8632 7336b37 bde8632 11d02f9 bde8632 11d02f9 7336b37 bde8632 7336b37 bde8632 11d02f9 bde8632 7336b37 bde8632 7336b37 bde8632 7336b37 bde8632 7336b37 bde8632 7336b37 bde8632 7336b37 bde8632 7336b37 bde8632 7336b37 bde8632 11d02f9 7336b37 11d02f9 7336b37 f9e851d 11d02f9 7336b37 11d02f9 bde8632 11d02f9 7336b37 11d02f9 bde8632 11d02f9 bde8632 a8d19b3 bde8632 a8d19b3 bde8632 7336b37 bde8632 11d02f9 bde8632 11d02f9 f9e851d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 | """
Code Security Risk Analyzer v2 - Gradio UI + REST API
=====================================================
IMPROVEMENTS OVER v1:
- Per-class threshold optimization (not global 0.3)
- Temperature scaling calibration (meaningful probabilities)
- Uses label_config.json for thresholds + calibration
- Better vulnerability detection across rare CWEs
Run AFTER notebooks 1-4 to use the improved models.
Upload this to: https://huggingface.co/spaces/ayshajavd/code-security-analyzer
"""
import json
import re
import time
import torch
import gradio as gr
from transformers import (
AutoTokenizer,
AutoModelForSequenceClassification,
T5ForConditionalGeneration,
)
from huggingface_hub import hf_hub_download
import numpy as np
# ============================================================
# Label Mappings
# ============================================================
TARGET_CWES = [
"safe", "CWE-20", "CWE-22", "CWE-78", "CWE-79", "CWE-89", "CWE-94",
"CWE-119", "CWE-125", "CWE-190", "CWE-200", "CWE-264", "CWE-269",
"CWE-276", "CWE-284", "CWE-287", "CWE-310", "CWE-327", "CWE-330",
"CWE-352", "CWE-362", "CWE-399", "CWE-401", "CWE-416", "CWE-434",
"CWE-476", "CWE-502", "CWE-601", "CWE-787", "CWE-798", "CWE-918",
]
CWE_NAMES = {
"safe": "Safe Code",
"CWE-20": "Improper Input Validation",
"CWE-22": "Path Traversal",
"CWE-78": "OS Command Injection",
"CWE-79": "Cross-Site Scripting (XSS)",
"CWE-89": "SQL Injection",
"CWE-94": "Code Injection",
"CWE-119": "Buffer Overflow",
"CWE-125": "Out-of-bounds Read",
"CWE-190": "Integer Overflow",
"CWE-200": "Information Exposure",
"CWE-264": "Permissions/Privileges/Access Controls",
"CWE-269": "Improper Privilege Management",
"CWE-276": "Incorrect Default Permissions",
"CWE-284": "Improper Access Control",
"CWE-287": "Improper Authentication",
"CWE-310": "Cryptographic Issues",
"CWE-327": "Broken Crypto Algorithm",
"CWE-330": "Insufficient Randomness",
"CWE-352": "Cross-Site Request Forgery (CSRF)",
"CWE-362": "Race Condition",
"CWE-399": "Resource Management Errors",
"CWE-401": "Memory Leak",
"CWE-416": "Use After Free",
"CWE-434": "Unrestricted File Upload",
"CWE-476": "NULL Pointer Dereference",
"CWE-502": "Insecure Deserialization",
"CWE-601": "Open Redirect",
"CWE-787": "Out-of-bounds Write",
"CWE-798": "Hardcoded Credentials",
"CWE-918": "Server-Side Request Forgery (SSRF)",
}
CWE_TO_OWASP = {
"CWE-22": "A01:2021 - Broken Access Control",
"CWE-200": "A01:2021 - Broken Access Control",
"CWE-264": "A01:2021 - Broken Access Control",
"CWE-276": "A01:2021 - Broken Access Control",
"CWE-284": "A01:2021 - Broken Access Control",
"CWE-352": "A01:2021 - Broken Access Control",
"CWE-601": "A01:2021 - Broken Access Control",
"CWE-269": "A01:2021 - Broken Access Control",
"CWE-310": "A02:2021 - Cryptographic Failures",
"CWE-327": "A02:2021 - Cryptographic Failures",
"CWE-330": "A02:2021 - Cryptographic Failures",
"CWE-20": "A03:2021 - Injection",
"CWE-78": "A03:2021 - Injection",
"CWE-79": "A03:2021 - Injection",
"CWE-89": "A03:2021 - Injection",
"CWE-94": "A03:2021 - Injection",
"CWE-119": "A03:2021 - Injection",
"CWE-125": "A03:2021 - Injection",
"CWE-190": "A03:2021 - Injection",
"CWE-416": "A03:2021 - Injection",
"CWE-476": "A03:2021 - Injection",
"CWE-401": "A03:2021 - Injection",
"CWE-787": "A03:2021 - Injection",
"CWE-434": "A04:2021 - Insecure Design",
"CWE-362": "A04:2021 - Insecure Design",
"CWE-399": "A04:2021 - Insecure Design",
"CWE-287": "A07:2021 - Identification & Auth Failures",
"CWE-798": "A07:2021 - Identification & Auth Failures",
"CWE-502": "A08:2021 - Software & Data Integrity Failures",
"CWE-918": "A10:2021 - Server-Side Request Forgery",
}
SEVERITY_MAP = {
"CWE-89": ("Critical", 95), "CWE-78": ("Critical", 93),
"CWE-94": ("Critical", 92), "CWE-502": ("Critical", 90),
"CWE-918": ("Critical", 88), "CWE-798": ("Critical", 87),
"CWE-119": ("High", 85), "CWE-787": ("High", 84),
"CWE-416": ("High", 83), "CWE-79": ("High", 80),
"CWE-22": ("High", 78), "CWE-287": ("High", 77),
"CWE-284": ("High", 76), "CWE-434": ("High", 75),
"CWE-125": ("Medium", 70), "CWE-190": ("Medium", 68),
"CWE-352": ("Medium", 67), "CWE-476": ("Medium", 65),
"CWE-362": ("Medium", 63), "CWE-20": ("Medium", 60),
"CWE-264": ("Medium", 58), "CWE-269": ("Medium", 57),
"CWE-310": ("Medium", 65), "CWE-327": ("Medium", 62),
"CWE-330": ("Medium", 55), "CWE-399": ("Low", 45),
"CWE-401": ("Low", 42), "CWE-200": ("Low", 40),
"CWE-276": ("Low", 38), "CWE-601": ("Medium", 55),
}
EXPLANATIONS = {
"CWE-89": "**SQL Injection** means an attacker can manipulate your database queries by injecting malicious SQL code through user inputs. This could let them steal, modify, or delete ALL your data.",
"CWE-79": "**Cross-Site Scripting (XSS)** lets attackers inject malicious JavaScript into your web pages. When other users visit the page, the script runs in their browser - stealing cookies, session tokens, or redirecting them to fake sites.",
"CWE-78": "**OS Command Injection** means user input is being passed directly to system commands. An attacker could run ANY command on your server.",
"CWE-94": "**Code Injection** allows attackers to inject and execute arbitrary code. Functions like `eval()`, `exec()`, or dynamic code compilation with untrusted input are the usual culprits.",
"CWE-119": "**Buffer Overflow** happens when your code writes data beyond the allocated memory buffer. Attackers can exploit this to crash your program or execute malicious code.",
"CWE-125": "**Out-of-bounds Read** means your code reads memory outside the intended buffer. This can leak sensitive data like passwords or encryption keys.",
"CWE-190": "**Integer Overflow** occurs when an arithmetic operation produces a value too large for the data type, which can be chained with buffer overflows for code execution.",
"CWE-200": "**Information Exposure** means sensitive data (API keys, passwords, stack traces) is being leaked to unauthorized parties.",
"CWE-264": "**Improper Access Control** means users can access resources or perform actions they shouldn't be authorized for.",
"CWE-287": "**Authentication Bypass** means the login/identity verification can be circumvented.",
"CWE-310": "**Cryptographic Issues** - you're using weak, broken, or improperly configured encryption.",
"CWE-352": "**CSRF** tricks authenticated users into performing unwanted actions on your site.",
"CWE-362": "**Race Condition** means two operations compete for the same resource without proper synchronization.",
"CWE-416": "**Use After Free** - memory is being used after it's been freed. Attackers can exploit this for arbitrary code execution.",
"CWE-434": "**Unrestricted File Upload** lets attackers upload malicious files (like web shells) to your server.",
"CWE-476": "**NULL Pointer Dereference** - your code tries to use a pointer that's NULL, causing crashes.",
"CWE-502": "**Insecure Deserialization** means untrusted data is deserialized without validation, enabling code execution.",
"CWE-601": "**Open Redirect** lets attackers redirect users from your trusted site to a malicious one for phishing.",
"CWE-787": "**Out-of-bounds Write** - data is written outside the intended memory buffer, often leading to remote code execution.",
"CWE-798": "**Hardcoded Credentials** - passwords, API keys, or tokens are embedded directly in the source code.",
"CWE-918": "**SSRF** lets attackers make your server send requests to internal systems, accessing internal APIs or cloud metadata.",
"CWE-22": "**Path Traversal** means user input is used in file paths without sanitization. Attackers can use `../` to access any file on the server.",
"CWE-269": "**Privilege Escalation** - a user can gain higher privileges than intended.",
"CWE-276": "**Incorrect Permissions** - files or resources have permissions that are too permissive.",
"CWE-327": "**Broken Cryptography** - you're using algorithms like MD5 or SHA1 that are cryptographically broken.",
"CWE-330": "**Insufficient Randomness** - security-critical random values (tokens, keys) are predictable.",
"CWE-399": "**Resource Management Issues** - improper handling of system resources can lead to denial of service.",
"CWE-401": "**Memory Leak** - memory is allocated but never freed, eventually causing crashes.",
"CWE-20": "**Improper Input Validation** - user input isn't properly checked before use, enabling many other vulnerabilities.",
"CWE-284": "**Broken Access Control** - authorization checks are missing or incorrectly implemented.",
}
# ============================================================
# Model Loading
# ============================================================
CLASSIFIER_ID = "ayshajavd/graphcodebert-vuln-classifier"
FIXER_ID = "ayshajavd/codet5p-vuln-fixer"
THRESHOLDS = {cwe: 0.3 for cwe in TARGET_CWES}
TEMPERATURE = 1.0
print("Loading classifier...")
try:
cls_tokenizer = AutoTokenizer.from_pretrained(CLASSIFIER_ID)
cls_model = AutoModelForSequenceClassification.from_pretrained(CLASSIFIER_ID)
cls_model.eval()
CLASSIFIER_LOADED = True
print("Classifier loaded successfully")
try:
config_path = hf_hub_download(CLASSIFIER_ID, "label_config.json")
with open(config_path) as f:
label_config = json.load(f)
if "optimized_thresholds" in label_config:
THRESHOLDS = label_config["optimized_thresholds"]
print(f"Per-class thresholds loaded ({len(THRESHOLDS)} classes)")
if "temperature" in label_config:
TEMPERATURE = label_config["temperature"]
print(f"Temperature calibration loaded (T={TEMPERATURE:.4f})")
except Exception as e:
print(f"Could not load label_config: {e}. Using defaults.")
except Exception as e:
print(f"Classifier not available: {e}")
cls_tokenizer = AutoTokenizer.from_pretrained("huggingface/CodeBERTa-small-v1")
cls_model = AutoModelForSequenceClassification.from_pretrained(
"huggingface/CodeBERTa-small-v1", num_labels=31, problem_type="multi_label_classification",
)
cls_model.eval()
CLASSIFIER_LOADED = False
print("Loading fix generator...")
try:
fix_tokenizer = AutoTokenizer.from_pretrained(FIXER_ID)
fix_model = T5ForConditionalGeneration.from_pretrained(FIXER_ID)
fix_model.eval()
FIXER_LOADED = True
print("Fix generator loaded successfully")
except Exception as e:
print(f"Fix generator not available: {e}")
fix_tokenizer = AutoTokenizer.from_pretrained("Salesforce/codet5p-220m")
fix_model = T5ForConditionalGeneration.from_pretrained("Salesforce/codet5p-220m")
fix_model.eval()
FIXER_LOADED = False
def detect_language(code: str) -> str:
code_lower = code[:500].lower()
if "<?php" in code_lower: return "PHP"
if "package main" in code_lower and "func " in code_lower: return "Go"
if "#include" in code_lower:
if "class " in code_lower or "std::" in code_lower or "cout" in code_lower: return "C++"
return "C"
if "import java" in code_lower or "public class" in code_lower: return "Java"
if re.search(r'\b(const |let |var |function |=>|require\(|module\.exports)', code_lower): return "JavaScript"
if re.search(r'\b(def |import |from |class |self\.|print\()', code_lower): return "Python"
return "Unknown"
def classify_code(code):
inputs = cls_tokenizer(code, return_tensors="pt", max_length=512, truncation=True, padding=True)
with torch.no_grad():
logits = cls_model(**inputs).logits.squeeze()
calibrated_logits = logits / TEMPERATURE
probs = torch.sigmoid(calibrated_logits).numpy()
detected = []
for i, (cwe, p) in enumerate(zip(TARGET_CWES, probs)):
if cwe == "safe":
continue
threshold = THRESHOLDS.get(cwe, 0.3)
if p > threshold:
detected.append((cwe, float(p)))
detected.sort(key=lambda x: x[1], reverse=True)
return detected, float(probs[0]), {cwe: float(p) for cwe, p in zip(TARGET_CWES, probs)}
def generate_fix(code, language, cwe_id=None):
if cwe_id:
cwe_name = CWE_NAMES.get(cwe_id, cwe_id)
prefix = f"fix {cwe_name} vulnerability in {language.lower()}: "
else:
prefix = f"fix {language.lower()}: "
input_ids = fix_tokenizer(prefix + code, return_tensors="pt", max_length=512, truncation=True).input_ids
with torch.no_grad():
out = fix_model.generate(input_ids, max_length=512, num_beams=5, early_stopping=True, no_repeat_ngram_size=3)
return fix_tokenizer.decode(out[0], skip_special_tokens=True)
def build_json_report(code):
language = detect_language(code)
detected, safe_prob, all_probs = classify_code(code)
if not detected:
overall_risk = max(0, int(100 - 100 * safe_prob))
risk_level = "Low"
else:
max_sev = max(SEVERITY_MAP.get(c, ("Low", 30))[1] for c, _ in detected)
avg_conf = sum(p for _, p in detected) / len(detected)
overall_risk = min(100, int(max_sev * avg_conf * 1.2))
risk_level = "Critical" if overall_risk >= 80 else "High" if overall_risk >= 60 else "Medium" if overall_risk >= 40 else "Low"
vulns = []
for cwe, conf in detected:
sev, score = SEVERITY_MAP.get(cwe, ("Medium", 50))
threshold_used = THRESHOLDS.get(cwe, 0.3)
vulns.append({
"cwe_id": cwe, "name": CWE_NAMES.get(cwe, cwe),
"owasp_category": CWE_TO_OWASP.get(cwe, "N/A"),
"severity": sev, "severity_score": score,
"detection_confidence": round(conf, 4),
"threshold_used": round(threshold_used, 3),
"exploit_likelihood": min(100, int(conf * score)),
"explanation": EXPLANATIONS.get(cwe, "Security risk detected.").replace("**", ""),
})
chain = None
if len(detected) > 1:
steps = []
cats = {c for c, _ in detected}
if cats & {"CWE-20","CWE-89","CWE-79","CWE-78","CWE-94"}:
steps.append({"step": len(steps)+1, "phase": "Initial Access", "description": "Exploit input validation weakness"})
if cats & {"CWE-264","CWE-269","CWE-284","CWE-287"}:
steps.append({"step": len(steps)+1, "phase": "Privilege Escalation", "description": "Bypass access controls"})
if cats & {"CWE-200","CWE-22","CWE-125"}:
steps.append({"step": len(steps)+1, "phase": "Data Exfiltration", "description": "Read sensitive files or memory"})
if cats & {"CWE-119","CWE-416","CWE-787","CWE-502"}:
steps.append({"step": len(steps)+1, "phase": "Code Execution", "description": "Exploit memory corruption"})
if steps: chain = steps
fix = None
try:
top_cwe = detected[0][0] if detected else None
f = generate_fix(code, language, top_cwe)
if f and f.strip(): fix = f
except: pass
return {
"language": language,
"model_status": {
"classifier": "trained_v2" if CLASSIFIER_LOADED else "base_model",
"fix_generator": "trained_v2" if FIXER_LOADED else "base_model",
"calibration": f"T={TEMPERATURE:.4f}" if TEMPERATURE != 1.0 else "none",
"thresholds": "per_class_optimized" if any(v != 0.3 for v in THRESHOLDS.values()) else "global_0.3",
},
"overall_risk_score": overall_risk, "risk_level": risk_level,
"safe_probability": round(safe_prob, 4), "num_vulnerabilities": len(vulns),
"vulnerabilities": vulns, "attack_chain": chain, "suggested_fix": fix,
"all_class_probabilities": all_probs,
"timestamp": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
}
def analyze_code(code):
if not code or not code.strip(): return "Please paste some code to analyze."
data = build_json_report(code)
r = ["# Code Security Analysis Report\n"]
r.append(f"**Language:** {data['language']}")
cls_status = "Trained v2 (GraphCodeBERT + ASL)" if data['model_status']['classifier'] == 'trained_v2' else "Base Model"
fix_status = "Trained v2 (CodeT5+ CWE-aware)" if data['model_status']['fix_generator'] == 'trained_v2' else "Base Model"
r.append(f"**Classifier:** {cls_status}")
r.append(f"**Fix Generator:** {fix_status}")
if data['model_status']['calibration'] != 'none':
r.append(f"**Calibration:** {data['model_status']['calibration']} | **Thresholds:** {data['model_status']['thresholds']}")
r.append("")
if data['num_vulnerabilities'] == 0:
r.append("## No Vulnerabilities Detected")
r.append(f"**Risk Score:** {data['overall_risk_score']}/100 | **Safe Confidence:** {data['safe_probability']:.1%}\n")
r.append("Code appears safe. Always supplement with manual review and SAST tools.")
return "\n".join(r)
emoji = {"Critical":"π΄","High":"π ","Medium":"π‘","Low":"π’"}.get(data['risk_level'],"βͺ")
r.append(f"## {emoji} {data['num_vulnerabilities']} Vulnerability(ies) Detected\n")
r.append(f"**Risk Score:** {data['overall_risk_score']}/100 ({data['risk_level']}) | **Safe Probability:** {data['safe_probability']:.1%}\n---\n")
for i, v in enumerate(data['vulnerabilities'], 1):
se = {"Critical":"π΄","High":"π ","Medium":"π‘","Low":"π’"}.get(v['severity'],"βͺ")
r.append(f"### {i}. {se} {v['name']}")
r.append("| Property | Value |\n|----------|-------|")
r.append(f"| **CWE ID** | {v['cwe_id']} |")
r.append(f"| **OWASP** | {v['owasp_category']} |")
r.append(f"| **Severity** | {v['severity']} ({v['severity_score']}/100) |")
r.append(f"| **Confidence** | {v['detection_confidence']:.1%} (calibrated) |")
r.append(f"| **Threshold** | {v['threshold_used']:.3f} (per-class optimized) |")
r.append(f"| **Exploit Likelihood** | {v['exploit_likelihood']}% |")
r.append(f"\n**Why Dangerous:** {v['explanation']}\n")
if data['attack_chain']:
r.append("---\n## Attack Chain\n")
for s in data['attack_chain']:
r.append(f"{s['step']}. **{s['phase']}** β {s['description']}")
r.append("\n---\n## Suggested Fix\n")
if data['suggested_fix']:
r.append(f"```{data['language'].lower()}\n{data['suggested_fix']}\n```")
else:
r.append("*Fix generation unavailable. Please review manually.*")
r.append("\n---\n*AI-generated report (v2: calibrated probabilities + per-class thresholds). Verify with manual review and SAST tools.*")
return "\n".join(r)
def get_json_report(code):
if not code or not code.strip(): return {"error": "No code provided"}
return build_json_report(code)
EXAMPLES = [
["""import sqlite3\n\ndef get_user(username):\n conn = sqlite3.connect('users.db')\n query = f"SELECT * FROM users WHERE username = '{username}'"\n return conn.execute(query).fetchone()\n"""],
["""#include <stdio.h>\n#include <string.h>\n\nvoid process_input(char *user_input) {\n char buffer[64];\n strcpy(buffer, user_input);\n printf("Processed: %s\\n", buffer);\n}\n"""],
["""const express = require('express');\nconst app = express();\n\napp.get('/search', (req, res) => {\n const query = req.query.q;\n res.send(`<h1>Results for: ${query}</h1>`);\n});\n"""],
["""import requests, hashlib\n\nAPI_KEY = "sk-proj-abc123def456"\nDB_PASSWORD = "admin123"\n\ndef hash_password(password):\n return hashlib.md5(password.encode()).hexdigest()\n"""],
["""import sqlite3\nfrom hashlib import sha256\nimport hmac, secrets\n\ndef get_user(username):\n conn = sqlite3.connect('users.db')\n conn.execute("SELECT * FROM users WHERE username = ?", (username,))\n return conn.fetchone()\n"""],
]
with gr.Blocks(
title="Code Security Risk Analyzer v2",
theme=gr.themes.Soft(),
css=".gradio-container { max-width: 1200px; margin: auto; }",
) as demo:
gr.Markdown("""
# π AI-Powered Code Security Risk Analyzer v2
### Detect OWASP Top 10 & CWE vulnerabilities with calibrated confidence + per-class thresholds
Paste code in Python, JavaScript, Java, C, C++, PHP, or Go.
**Models:** [GraphCodeBERT](https://huggingface.co/ayshajavd/graphcodebert-vuln-classifier) (detection, Macro F1=0.476) + [CodeT5+](https://huggingface.co/ayshajavd/codet5p-vuln-fixer) (fixes, BLEU=81.0) | **Dataset:** [175K samples](https://huggingface.co/datasets/ayshajavd/code-security-vulnerability-dataset)
**v2 Improvements:** Per-class threshold optimization | Temperature-calibrated probabilities | Asymmetric Loss training | GraphCodeBERT-base (125M params) | CodeT5+ 220M CWE-aware fixer
""")
with gr.Row():
with gr.Column(scale=1):
code_input = gr.Code(label="Paste Your Code Here", language="python", lines=20)
with gr.Row():
analyze_btn = gr.Button("π Analyze Security", variant="primary", size="lg")
json_btn = gr.Button("π JSON Report", variant="secondary", size="lg")
with gr.Column(scale=1):
report_output = gr.Markdown(label="Security Report")
json_output = gr.JSON(label="JSON Report", visible=False)
gr.Examples(examples=EXAMPLES, inputs=[code_input], label="Example Code Snippets")
def show_json(code):
return gr.update(visible=True, value=get_json_report(code))
analyze_btn.click(fn=analyze_code, inputs=[code_input], outputs=[report_output], api_name="analyze")
json_btn.click(fn=show_json, inputs=[code_input], outputs=[json_output])
with gr.Row(visible=False):
api_json_btn = gr.Button("get_json", visible=False)
api_json_btn.click(fn=get_json_report, inputs=[code_input], outputs=[json_output], api_name="get_json_report")
with gr.Accordion("π REST API Documentation", open=False):
gr.Markdown("""
### Python Client
```python
from gradio_client import Client
client = Client("ayshajavd/code-security-analyzer")
report = client.predict(code="your code here", api_name="/analyze")
json_report = client.predict(code="your code here", api_name="/get_json_report")
```
### cURL
```bash
curl -X POST https://ayshajavd-code-security-analyzer.hf.space/call/analyze \\
-H "Content-Type: application/json" -d '{"data": ["your code here"]}'
```
""")
gr.Markdown("""
---
### 30 CWE Vulnerability Classes β OWASP Top 10
| OWASP Category | CWEs |
|---|---|
| **A01: Broken Access Control** | CWE-22, 200, 264, 269, 276, 284, 352, 601 |
| **A02: Cryptographic Failures** | CWE-310, 327, 330 |
| **A03: Injection** | CWE-20, 78, 79, 89, 94, 119, 125, 190, 401, 416, 476, 787 |
| **A04: Insecure Design** | CWE-362, 399, 434 |
| **A07: Auth Failures** | CWE-287, 798 |
| **A08: Integrity Failures** | CWE-502 |
| **A10: SSRF** | CWE-918 |
""")
if __name__ == "__main__":
demo.launch()
|