ayshajavd commited on
Commit
11d02f9
·
verified ·
1 Parent(s): 344326b

Add Code Security Analyzer Gradio app

Browse files
Files changed (1) hide show
  1. app.py +535 -0
app.py ADDED
@@ -0,0 +1,535 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Code Security Risk Analyzer - Gradio UI
3
+ Analyzes code for OWASP Top 10, CWE vulnerabilities.
4
+ Outputs structured security report with vulnerability details, severity, and fixes.
5
+ """
6
+ import json
7
+ import re
8
+ import torch
9
+ import gradio as gr
10
+ from transformers import (
11
+ AutoTokenizer,
12
+ AutoModelForSequenceClassification,
13
+ T5ForConditionalGeneration,
14
+ )
15
+
16
+ # ============================================================
17
+ # Label Mappings
18
+ # ============================================================
19
+ TARGET_CWES = [
20
+ "safe", "CWE-20", "CWE-22", "CWE-78", "CWE-79", "CWE-89", "CWE-94",
21
+ "CWE-119", "CWE-125", "CWE-190", "CWE-200", "CWE-264", "CWE-269",
22
+ "CWE-276", "CWE-284", "CWE-287", "CWE-310", "CWE-327", "CWE-330",
23
+ "CWE-352", "CWE-362", "CWE-399", "CWE-401", "CWE-416", "CWE-434",
24
+ "CWE-476", "CWE-502", "CWE-601", "CWE-787", "CWE-798", "CWE-918",
25
+ ]
26
+
27
+ CWE_NAMES = {
28
+ "safe": "Safe Code",
29
+ "CWE-20": "Improper Input Validation",
30
+ "CWE-22": "Path Traversal",
31
+ "CWE-78": "OS Command Injection",
32
+ "CWE-79": "Cross-Site Scripting (XSS)",
33
+ "CWE-89": "SQL Injection",
34
+ "CWE-94": "Code Injection",
35
+ "CWE-119": "Buffer Overflow",
36
+ "CWE-125": "Out-of-bounds Read",
37
+ "CWE-190": "Integer Overflow",
38
+ "CWE-200": "Information Exposure",
39
+ "CWE-264": "Permissions/Privileges/Access Controls",
40
+ "CWE-269": "Improper Privilege Management",
41
+ "CWE-276": "Incorrect Default Permissions",
42
+ "CWE-284": "Improper Access Control",
43
+ "CWE-287": "Improper Authentication",
44
+ "CWE-310": "Cryptographic Issues",
45
+ "CWE-327": "Broken Crypto Algorithm",
46
+ "CWE-330": "Insufficient Randomness",
47
+ "CWE-352": "Cross-Site Request Forgery (CSRF)",
48
+ "CWE-362": "Race Condition",
49
+ "CWE-399": "Resource Management Errors",
50
+ "CWE-401": "Memory Leak",
51
+ "CWE-416": "Use After Free",
52
+ "CWE-434": "Unrestricted File Upload",
53
+ "CWE-476": "NULL Pointer Dereference",
54
+ "CWE-502": "Insecure Deserialization",
55
+ "CWE-601": "Open Redirect",
56
+ "CWE-787": "Out-of-bounds Write",
57
+ "CWE-798": "Hardcoded Credentials",
58
+ "CWE-918": "Server-Side Request Forgery (SSRF)",
59
+ }
60
+
61
+ CWE_TO_OWASP = {
62
+ "CWE-22": "A01:2021 - Broken Access Control",
63
+ "CWE-200": "A01:2021 - Broken Access Control",
64
+ "CWE-264": "A01:2021 - Broken Access Control",
65
+ "CWE-276": "A01:2021 - Broken Access Control",
66
+ "CWE-284": "A01:2021 - Broken Access Control",
67
+ "CWE-352": "A01:2021 - Broken Access Control",
68
+ "CWE-601": "A01:2021 - Broken Access Control",
69
+ "CWE-269": "A01:2021 - Broken Access Control",
70
+ "CWE-310": "A02:2021 - Cryptographic Failures",
71
+ "CWE-327": "A02:2021 - Cryptographic Failures",
72
+ "CWE-330": "A02:2021 - Cryptographic Failures",
73
+ "CWE-20": "A03:2021 - Injection",
74
+ "CWE-78": "A03:2021 - Injection",
75
+ "CWE-79": "A03:2021 - Injection",
76
+ "CWE-89": "A03:2021 - Injection",
77
+ "CWE-94": "A03:2021 - Injection",
78
+ "CWE-119": "A03:2021 - Injection",
79
+ "CWE-125": "A03:2021 - Injection",
80
+ "CWE-190": "A03:2021 - Injection",
81
+ "CWE-416": "A03:2021 - Injection",
82
+ "CWE-476": "A03:2021 - Injection",
83
+ "CWE-401": "A03:2021 - Injection",
84
+ "CWE-787": "A03:2021 - Injection",
85
+ "CWE-434": "A04:2021 - Insecure Design",
86
+ "CWE-362": "A04:2021 - Insecure Design",
87
+ "CWE-399": "A04:2021 - Insecure Design",
88
+ "CWE-287": "A07:2021 - Identification & Auth Failures",
89
+ "CWE-798": "A07:2021 - Identification & Auth Failures",
90
+ "CWE-502": "A08:2021 - Software & Data Integrity Failures",
91
+ "CWE-918": "A10:2021 - Server-Side Request Forgery",
92
+ }
93
+
94
+ SEVERITY_MAP = {
95
+ "CWE-89": ("Critical", 95), "CWE-78": ("Critical", 93),
96
+ "CWE-94": ("Critical", 92), "CWE-502": ("Critical", 90),
97
+ "CWE-918": ("Critical", 88), "CWE-798": ("Critical", 87),
98
+ "CWE-119": ("High", 85), "CWE-787": ("High", 84),
99
+ "CWE-416": ("High", 83), "CWE-79": ("High", 80),
100
+ "CWE-22": ("High", 78), "CWE-287": ("High", 77),
101
+ "CWE-284": ("High", 76), "CWE-434": ("High", 75),
102
+ "CWE-125": ("Medium", 70), "CWE-190": ("Medium", 68),
103
+ "CWE-352": ("Medium", 67), "CWE-476": ("Medium", 65),
104
+ "CWE-362": ("Medium", 63), "CWE-20": ("Medium", 60),
105
+ "CWE-264": ("Medium", 58), "CWE-269": ("Medium", 57),
106
+ "CWE-310": ("Medium", 65), "CWE-327": ("Medium", 62),
107
+ "CWE-330": ("Medium", 55), "CWE-399": ("Low", 45),
108
+ "CWE-401": ("Low", 42), "CWE-200": ("Low", 40),
109
+ "CWE-276": ("Low", 38), "CWE-601": ("Medium", 55),
110
+ }
111
+
112
+ EXPLANATIONS = {
113
+ "CWE-89": "**SQL Injection** means an attacker can manipulate your database queries by injecting malicious SQL code through user inputs. This could let them steal, modify, or delete ALL your data. Imagine someone typing `'; DROP TABLE users; --` into a login form.",
114
+ "CWE-79": "**Cross-Site Scripting (XSS)** lets attackers inject malicious JavaScript into your web pages. When other users visit the page, the script runs in their browser - stealing cookies, session tokens, or redirecting them to fake sites.",
115
+ "CWE-78": "**OS Command Injection** means user input is being passed directly to system commands. An attacker could run ANY command on your server - install malware, steal files, or take complete control.",
116
+ "CWE-94": "**Code Injection** allows attackers to inject and execute arbitrary code in your application. Functions like `eval()`, `exec()`, or dynamic code compilation with untrusted input are the usual culprits.",
117
+ "CWE-119": "**Buffer Overflow** happens when your code writes data beyond the allocated memory buffer. Attackers can exploit this to crash your program, corrupt data, or even execute malicious code.",
118
+ "CWE-125": "**Out-of-bounds Read** means your code reads memory outside the intended buffer. This can leak sensitive data like passwords, encryption keys, or other users' data from memory.",
119
+ "CWE-190": "**Integer Overflow** occurs when an arithmetic operation produces a value too large for the data type. This can cause crashes, infinite loops, or be chained with buffer overflows for code execution.",
120
+ "CWE-200": "**Information Exposure** means sensitive data (API keys, passwords, internal paths, stack traces) is being leaked to unauthorized parties through error messages, logs, or responses.",
121
+ "CWE-264": "**Improper Access Control** means users can access resources or perform actions they shouldn't be authorized for. Missing permission checks let attackers escalate privileges.",
122
+ "CWE-287": "**Authentication Bypass** means the login/identity verification can be circumvented. Attackers could access any account without knowing the password.",
123
+ "CWE-310": "**Cryptographic Issues** - you're using weak, broken, or improperly configured encryption. Data you think is protected may be easily decryptable by attackers.",
124
+ "CWE-352": "**CSRF** tricks authenticated users into performing unwanted actions on your site. An attacker's page could make a user unknowingly transfer money, change their email, or delete their account.",
125
+ "CWE-362": "**Race Condition** means two operations compete for the same resource without proper synchronization. Attackers can exploit the timing window to bypass security checks.",
126
+ "CWE-416": "**Use After Free** - memory is being used after it's been freed. Attackers can manipulate the freed memory to execute arbitrary code or crash the application.",
127
+ "CWE-434": "**Unrestricted File Upload** lets attackers upload malicious files (like web shells) to your server. They could then execute code remotely and take full control.",
128
+ "CWE-476": "**NULL Pointer Dereference** - your code tries to use a pointer/reference that's NULL. This crashes the program and can be exploited for denial-of-service attacks.",
129
+ "CWE-502": "**Insecure Deserialization** means untrusted data is being deserialized without validation. Attackers can craft malicious serialized objects that execute code when deserialized.",
130
+ "CWE-601": "**Open Redirect** lets attackers redirect users from your trusted site to a malicious one. This is commonly used in phishing attacks to steal credentials.",
131
+ "CWE-787": "**Out-of-bounds Write** - data is written outside the intended memory buffer. This is a severe vulnerability that often leads to remote code execution.",
132
+ "CWE-798": "**Hardcoded Credentials** - passwords, API keys, or tokens are embedded directly in the source code. Anyone with access to the code (or the compiled binary) can extract them.",
133
+ "CWE-918": "**SSRF** lets attackers make your server send requests to internal systems. They could scan your network, access internal APIs, or read cloud metadata to steal credentials.",
134
+ "CWE-22": "**Path Traversal** means user input is used in file paths without sanitization. Attackers can use `../` sequences to access any file on the server - config files, passwords, source code.",
135
+ "CWE-269": "**Privilege Escalation** - a user can gain higher privileges than intended. A regular user might become an admin, accessing sensitive operations and data.",
136
+ "CWE-276": "**Incorrect Permissions** - files or resources have permissions that are too permissive. Sensitive files might be world-readable, exposing secrets.",
137
+ "CWE-327": "**Broken Cryptography** - you're using algorithms like MD5 or SHA1 that are cryptographically broken. Attackers can forge hashes or decrypt data.",
138
+ "CWE-330": "**Insufficient Randomness** - security-critical random values (tokens, keys, IDs) are predictable. Attackers can guess session tokens or API keys.",
139
+ "CWE-399": "**Resource Management Issues** - improper handling of system resources can lead to denial of service through resource exhaustion.",
140
+ "CWE-401": "**Memory Leak** - memory is allocated but never freed. Over time, the application uses more and more memory until it crashes.",
141
+ "CWE-20": "**Improper Input Validation** - user input isn't properly checked before use. This is the root cause of many other vulnerabilities like injection and overflow attacks.",
142
+ "CWE-284": "**Broken Access Control** - authorization checks are missing or incorrectly implemented. Users can access other users' data or admin functionality.",
143
+ }
144
+
145
+ # ============================================================
146
+ # Model Loading
147
+ # ============================================================
148
+ CLASSIFIER_ID = "ayshajavd/graphcodebert-vuln-classifier"
149
+ FIXER_ID = "ayshajavd/codet5p-vuln-fixer"
150
+
151
+ print("Loading classifier...")
152
+ try:
153
+ cls_tokenizer = AutoTokenizer.from_pretrained(CLASSIFIER_ID)
154
+ cls_model = AutoModelForSequenceClassification.from_pretrained(CLASSIFIER_ID)
155
+ cls_model.eval()
156
+ CLASSIFIER_LOADED = True
157
+ print("Classifier loaded successfully")
158
+ except Exception as e:
159
+ print(f"Classifier not available yet: {e}")
160
+ cls_tokenizer = AutoTokenizer.from_pretrained("microsoft/graphcodebert-base")
161
+ cls_model = AutoModelForSequenceClassification.from_pretrained(
162
+ "microsoft/graphcodebert-base",
163
+ num_labels=31,
164
+ problem_type="multi_label_classification",
165
+ )
166
+ cls_model.eval()
167
+ CLASSIFIER_LOADED = False
168
+ print("Loaded base GraphCodeBERT as fallback")
169
+
170
+ print("Loading fix generator...")
171
+ try:
172
+ fix_tokenizer = AutoTokenizer.from_pretrained(FIXER_ID)
173
+ fix_model = T5ForConditionalGeneration.from_pretrained(FIXER_ID)
174
+ fix_model.eval()
175
+ FIXER_LOADED = True
176
+ print("Fix generator loaded successfully")
177
+ except Exception as e:
178
+ print(f"Fix generator not available yet: {e}")
179
+ fix_tokenizer = AutoTokenizer.from_pretrained("Salesforce/codet5p-220m")
180
+ fix_model = T5ForConditionalGeneration.from_pretrained("Salesforce/codet5p-220m")
181
+ fix_model.eval()
182
+ FIXER_LOADED = False
183
+ print("Loaded base CodeT5+ as fallback")
184
+
185
+
186
+ def detect_language(code: str) -> str:
187
+ """Detect programming language from code content."""
188
+ code_lower = code[:500].lower()
189
+ if "<?php" in code_lower:
190
+ return "PHP"
191
+ if "package main" in code_lower and "func " in code_lower:
192
+ return "Go"
193
+ if "#include" in code_lower:
194
+ if "class " in code_lower or "std::" in code_lower or "cout" in code_lower:
195
+ return "C++"
196
+ return "C"
197
+ if "import java" in code_lower or "public class" in code_lower or "public static void main" in code_lower:
198
+ return "Java"
199
+ if re.search(r'\b(const |let |var |function |=>|require\(|module\.exports)', code_lower):
200
+ return "JavaScript"
201
+ if re.search(r'\b(def |import |from |class |self\.|print\()', code_lower):
202
+ return "Python"
203
+ if "fn " in code_lower and "let mut" in code_lower:
204
+ return "Rust"
205
+ return "Unknown"
206
+
207
+
208
+ def generate_fix(code: str, language: str) -> str:
209
+ """Generate a security fix for vulnerable code."""
210
+ prefix = f"fix {language.lower()}: "
211
+ input_text = prefix + code
212
+ input_ids = fix_tokenizer(
213
+ input_text, return_tensors="pt",
214
+ max_length=512, truncation=True
215
+ ).input_ids
216
+
217
+ with torch.no_grad():
218
+ output_ids = fix_model.generate(
219
+ input_ids,
220
+ max_length=512,
221
+ num_beams=5,
222
+ early_stopping=True,
223
+ no_repeat_ngram_size=3,
224
+ )
225
+
226
+ return fix_tokenizer.decode(output_ids[0], skip_special_tokens=True)
227
+
228
+
229
+ def analyze_code(code: str) -> str:
230
+ """Main analysis function - returns formatted security report."""
231
+ if not code or not code.strip():
232
+ return "Please paste some code to analyze."
233
+
234
+ language = detect_language(code)
235
+
236
+ # Classify
237
+ inputs = cls_tokenizer(
238
+ code, return_tensors="pt",
239
+ max_length=512, truncation=True, padding=True
240
+ )
241
+
242
+ with torch.no_grad():
243
+ outputs = cls_model(**inputs)
244
+ logits = outputs.logits
245
+ probs = torch.sigmoid(logits).squeeze().numpy()
246
+
247
+ # Get detected vulnerabilities (threshold 0.3 for sensitivity)
248
+ threshold = 0.3
249
+ detected = []
250
+ for i, (cwe, prob) in enumerate(zip(TARGET_CWES, probs)):
251
+ if cwe == "safe":
252
+ continue
253
+ if prob > threshold:
254
+ detected.append((cwe, float(prob)))
255
+
256
+ # Sort by confidence
257
+ detected.sort(key=lambda x: x[1], reverse=True)
258
+
259
+ safe_prob = float(probs[0])
260
+
261
+ # Build report
262
+ report = []
263
+ report.append("# Code Security Analysis Report\n")
264
+ report.append(f"**Language Detected:** {language}")
265
+ model_status = "Trained Model" if CLASSIFIER_LOADED else "Base Model (untrained - results are for demo only)"
266
+ fix_status = "Trained" if FIXER_LOADED else "Base Model"
267
+ report.append(f"**Model Status:** {model_status}")
268
+ report.append(f"**Fix Generator:** {fix_status}\n")
269
+
270
+ if not detected:
271
+ overall_score = max(0, int(100 * safe_prob))
272
+ report.append(f"## No Vulnerabilities Detected")
273
+ report.append(f"**Overall Risk Score:** {100 - overall_score}/100 (Low Risk)")
274
+ report.append(f"**Safe Code Confidence:** {safe_prob:.1%}\n")
275
+ report.append("The analyzed code appears to be safe based on our detection model. "
276
+ "However, always review code manually and use additional static analysis tools.")
277
+ return "\n".join(report)
278
+
279
+ # Calculate overall risk score
280
+ max_severity = max(SEVERITY_MAP.get(cwe, ("Low", 30))[1] for cwe, _ in detected)
281
+ avg_confidence = sum(p for _, p in detected) / len(detected)
282
+ overall_risk = min(100, int(max_severity * avg_confidence * 1.2))
283
+
284
+ if overall_risk >= 80:
285
+ risk_level = "Critical"
286
+ elif overall_risk >= 60:
287
+ risk_level = "High"
288
+ elif overall_risk >= 40:
289
+ risk_level = "Medium"
290
+ else:
291
+ risk_level = "Low"
292
+
293
+ report.append(f"## {len(detected)} Vulnerability(ies) Detected\n")
294
+ report.append(f"**Overall Risk Score:** {overall_risk}/100 ({risk_level})")
295
+ report.append(f"**Safe Code Probability:** {safe_prob:.1%}\n")
296
+
297
+ report.append("---\n")
298
+
299
+ # Detail each vulnerability
300
+ for idx, (cwe, confidence) in enumerate(detected, 1):
301
+ name = CWE_NAMES.get(cwe, cwe)
302
+ owasp = CWE_TO_OWASP.get(cwe, "N/A")
303
+ severity, score = SEVERITY_MAP.get(cwe, ("Medium", 50))
304
+ explanation = EXPLANATIONS.get(cwe, "This vulnerability could pose a security risk to your application.")
305
+ exploit_likelihood = min(100, int(confidence * score))
306
+
307
+ report.append(f"### {idx}. {name}")
308
+ report.append(f"| Property | Value |")
309
+ report.append(f"|----------|-------|")
310
+ report.append(f"| **CWE ID** | {cwe} |")
311
+ report.append(f"| **OWASP Category** | {owasp} |")
312
+ report.append(f"| **Severity** | {severity} ({score}/100) |")
313
+ report.append(f"| **Detection Confidence** | {confidence:.1%} |")
314
+ report.append(f"| **Exploit Likelihood** | {exploit_likelihood}% |")
315
+ report.append(f"\n**Why This Is Dangerous:**\n{explanation}\n")
316
+
317
+ # Attack chain analysis
318
+ if len(detected) > 1:
319
+ report.append("---\n")
320
+ report.append("## Attack Chain Analysis\n")
321
+ report.append("Multiple vulnerabilities can be chained together for a more severe attack:\n")
322
+
323
+ chain_steps = []
324
+ has_input = any(c in ["CWE-20", "CWE-89", "CWE-79", "CWE-78", "CWE-94"] for c, _ in detected)
325
+ has_access = any(c in ["CWE-264", "CWE-269", "CWE-284", "CWE-287"] for c, _ in detected)
326
+ has_data = any(c in ["CWE-200", "CWE-22", "CWE-125"] for c, _ in detected)
327
+ has_exec = any(c in ["CWE-119", "CWE-416", "CWE-787", "CWE-502"] for c, _ in detected)
328
+
329
+ step = 1
330
+ if has_input:
331
+ chain_steps.append(f"{step}. **Initial Access** - Exploit input validation weakness to inject malicious payload")
332
+ step += 1
333
+ if has_access:
334
+ chain_steps.append(f"{step}. **Privilege Escalation** - Bypass access controls to gain elevated permissions")
335
+ step += 1
336
+ if has_data:
337
+ chain_steps.append(f"{step}. **Data Exfiltration** - Read sensitive files or memory to extract secrets")
338
+ step += 1
339
+ if has_exec:
340
+ chain_steps.append(f"{step}. **Remote Code Execution** - Exploit memory corruption or deserialization for code execution")
341
+ step += 1
342
+
343
+ if chain_steps:
344
+ report.append("\n".join(chain_steps))
345
+ else:
346
+ vuln_names = [CWE_NAMES.get(c, c) for c, _ in detected[:3]]
347
+ report.append(f"The combination of **{' + '.join(vuln_names)}** increases the attack surface. "
348
+ f"An attacker could exploit one vulnerability to amplify the impact of another.")
349
+
350
+ # Generate fix
351
+ report.append("\n---\n")
352
+ report.append("## Suggested Secure Fix\n")
353
+ try:
354
+ fix = generate_fix(code, language)
355
+ if fix and fix.strip():
356
+ report.append(f"```{language.lower()}\n{fix}\n```\n")
357
+ else:
358
+ report.append("*Fix generation returned empty result. Please review manually.*\n")
359
+ except Exception as e:
360
+ report.append(f"*Fix generation failed: {str(e)}. Please review manually.*\n")
361
+
362
+ report.append("---\n")
363
+ report.append("*This report was generated by an AI model. Always verify findings with manual code review and additional security tools.*")
364
+
365
+ return "\n".join(report)
366
+
367
+
368
+ # ============================================================
369
+ # Example Code Snippets
370
+ # ============================================================
371
+ EXAMPLES = [
372
+ ["""import sqlite3
373
+
374
+ def get_user(username):
375
+ conn = sqlite3.connect('users.db')
376
+ cursor = conn.cursor()
377
+ query = f"SELECT * FROM users WHERE username = '{username}'"
378
+ cursor.execute(query)
379
+ return cursor.fetchone()
380
+
381
+ def login(request):
382
+ username = request.form['username']
383
+ password = request.form['password']
384
+ user = get_user(username)
385
+ if user and user[2] == password:
386
+ return "Login successful"
387
+ return "Login failed"
388
+ """],
389
+ ["""#include <stdio.h>
390
+ #include <string.h>
391
+
392
+ void process_input(char *user_input) {
393
+ char buffer[64];
394
+ strcpy(buffer, user_input);
395
+ printf("Processed: %s\\n", buffer);
396
+ }
397
+
398
+ int main(int argc, char *argv[]) {
399
+ if (argc > 1) {
400
+ process_input(argv[1]);
401
+ }
402
+ return 0;
403
+ }
404
+ """],
405
+ ["""const express = require('express');
406
+ const app = express();
407
+
408
+ app.get('/search', (req, res) => {
409
+ const query = req.query.q;
410
+ res.send(`<h1>Search Results for: ${query}</h1>
411
+ <p>No results found for "${query}"</p>`);
412
+ });
413
+
414
+ app.get('/profile/:id', (req, res) => {
415
+ const userId = req.params.id;
416
+ db.query('SELECT * FROM users WHERE id = ' + userId, (err, user) => {
417
+ res.send(`<h2>${user.name}</h2><p>${user.bio}</p>`);
418
+ });
419
+ });
420
+ """],
421
+ ["""import requests
422
+ import hashlib
423
+
424
+ API_KEY = "sk-proj-abc123def456ghi789"
425
+ DB_PASSWORD = "admin123"
426
+ SECRET_KEY = "super_secret_key_2024"
427
+
428
+ def connect_to_api():
429
+ headers = {"Authorization": f"Bearer {API_KEY}"}
430
+ response = requests.get("https://api.example.com/data", headers=headers)
431
+ return response.json()
432
+
433
+ def hash_password(password):
434
+ return hashlib.md5(password.encode()).hexdigest()
435
+
436
+ def verify_admin(token):
437
+ if token == SECRET_KEY:
438
+ return True
439
+ return False
440
+ """],
441
+ ["""import sqlite3
442
+ from hashlib import sha256
443
+ import hmac
444
+ import secrets
445
+
446
+ def get_user(username):
447
+ conn = sqlite3.connect('users.db')
448
+ cursor = conn.cursor()
449
+ cursor.execute("SELECT * FROM users WHERE username = ?", (username,))
450
+ return cursor.fetchone()
451
+
452
+ def hash_password(password, salt=None):
453
+ if salt is None:
454
+ salt = secrets.token_hex(16)
455
+ hashed = sha256((salt + password).encode()).hexdigest()
456
+ return f"{salt}:{hashed}"
457
+
458
+ def verify_password(password, stored_hash):
459
+ salt, expected_hash = stored_hash.split(':')
460
+ actual_hash = sha256((salt + password).encode()).hexdigest()
461
+ return hmac.compare_digest(actual_hash, expected_hash)
462
+ """],
463
+ ]
464
+
465
+ # ============================================================
466
+ # Gradio UI
467
+ # ============================================================
468
+ with gr.Blocks(
469
+ title="Code Security Risk Analyzer",
470
+ theme=gr.themes.Soft(),
471
+ css="""
472
+ .report-output { font-size: 14px; }
473
+ .gradio-container { max-width: 1200px; margin: auto; }
474
+ """
475
+ ) as demo:
476
+ gr.Markdown("""
477
+ # AI-Powered Code Security Risk Analyzer
478
+ ### Detect OWASP Top 10, CWE vulnerabilities, and get secure fixes
479
+
480
+ Paste any code snippet (Python, JavaScript, Java, C, C++, PHP, Go) and get a comprehensive security audit.
481
+
482
+ **Powered by:** GraphCodeBERT (vulnerability detection) + CodeT5+ (fix generation)
483
+ """)
484
+
485
+ with gr.Row():
486
+ with gr.Column(scale=1):
487
+ code_input = gr.Code(
488
+ label="Paste Your Code Here",
489
+ language="python",
490
+ lines=20,
491
+ )
492
+ analyze_btn = gr.Button("Analyze Security", variant="primary", size="lg")
493
+
494
+ with gr.Column(scale=1):
495
+ report_output = gr.Markdown(
496
+ label="Security Report",
497
+ elem_classes=["report-output"],
498
+ )
499
+
500
+ gr.Examples(
501
+ examples=EXAMPLES,
502
+ inputs=[code_input],
503
+ label="Example Code Snippets (click to load)",
504
+ )
505
+
506
+ analyze_btn.click(
507
+ fn=analyze_code,
508
+ inputs=[code_input],
509
+ outputs=[report_output],
510
+ )
511
+
512
+ gr.Markdown("""
513
+ ---
514
+ ### Vulnerability Categories Covered
515
+
516
+ | OWASP Category | Vulnerabilities |
517
+ |---|---|
518
+ | **A01: Broken Access Control** | Path Traversal, IDOR, Missing Auth, Privilege Escalation, CSRF, Open Redirect |
519
+ | **A02: Cryptographic Failures** | Weak Crypto (MD5/SHA1), Insufficient Randomness, Broken Algorithms |
520
+ | **A03: Injection** | SQL Injection, XSS, Command Injection, Code Injection, Buffer Overflow |
521
+ | **A04: Insecure Design** | Race Conditions, Unrestricted Upload, Resource Management |
522
+ | **A07: Auth Failures** | Improper Authentication, Hardcoded Credentials |
523
+ | **A08: Integrity Failures** | Insecure Deserialization |
524
+ | **A10: SSRF** | Server-Side Request Forgery |
525
+
526
+ **Languages:** Python, JavaScript, Java, C, C++, PHP, Go
527
+
528
+ **Models:** [GraphCodeBERT](https://huggingface.co/microsoft/graphcodebert-base) (detection) | [CodeT5+](https://huggingface.co/Salesforce/codet5p-220m) (fix generation)
529
+
530
+ **Dataset:** [code-security-vulnerability-dataset](https://huggingface.co/datasets/ayshajavd/code-security-vulnerability-dataset) - 175K samples from BigVul, PrimeVul, CyberNative DPO
531
+ """)
532
+
533
+
534
+ if __name__ == "__main__":
535
+ demo.launch()