adding rag to client
Browse files- app.py +66 -48
- requirements.txt +5 -0
app.py
CHANGED
|
@@ -2,7 +2,7 @@ import gradio as gr
|
|
| 2 |
from smolagents import InferenceClientModel, CodeAgent, MCPClient
|
| 3 |
|
| 4 |
# MCP Server URL for GitHub tools
|
| 5 |
-
MCP_SERVER_URL = "https://
|
| 6 |
|
| 7 |
def parse_github_url(url):
|
| 8 |
"""Parse GitHub URL to extract owner, repo, and file path"""
|
|
@@ -31,7 +31,6 @@ def analyze_vulnerabilities(message, history, hf_token):
|
|
| 31 |
return "β Please provide a Hugging Face API key. Get one from [Hugging Face](https://huggingface.co/settings/tokens)"
|
| 32 |
|
| 33 |
try:
|
| 34 |
-
# Connect to MCP server
|
| 35 |
mcp_client = MCPClient({
|
| 36 |
"url": MCP_SERVER_URL,
|
| 37 |
"timeout": 120
|
|
@@ -41,12 +40,12 @@ def analyze_vulnerabilities(message, history, hf_token):
|
|
| 41 |
# Initialize AI model with user's token
|
| 42 |
model = InferenceClientModel(token=hf_token.strip())
|
| 43 |
|
| 44 |
-
# Create AI agent with GitHub MCP tools
|
| 45 |
agent = CodeAgent(
|
| 46 |
-
tools=
|
| 47 |
model=model,
|
| 48 |
additional_authorized_imports=["json", "ast", "urllib", "base64", "re"],
|
| 49 |
-
max_steps=
|
| 50 |
)
|
| 51 |
|
| 52 |
# Parse the GitHub URL
|
|
@@ -58,54 +57,72 @@ def analyze_vulnerabilities(message, history, hf_token):
|
|
| 58 |
# Generate different prompts based on whether it's a file or repository
|
| 59 |
if file_path:
|
| 60 |
enhanced_prompt = f"""
|
| 61 |
-
You are a cybersecurity expert. Analyze the specific GitHub file for security vulnerabilities.
|
| 62 |
-
|
| 63 |
GitHub URL: {message}
|
| 64 |
Repository: {owner}/{repo}
|
| 65 |
File Path: {file_path}
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
- Command injection: os.system, exec, eval calls
|
| 73 |
-
- Input validation: unvalidated user inputs
|
| 74 |
- Error handling: unhandled exceptions that could leak info
|
| 75 |
-
- Hardcoded secrets: API keys, passwords, tokens
|
| 76 |
- Unsafe operations: file operations without validation
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
-
|
| 80 |
-
|
| 81 |
-
-
|
| 82 |
-
|
| 83 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
"""
|
| 85 |
else:
|
| 86 |
enhanced_prompt = f"""
|
| 87 |
-
You are a cybersecurity expert. Analyze the GitHub repository for security vulnerabilities.
|
| 88 |
-
|
| 89 |
Repository: {message}
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
-
|
| 97 |
-
-
|
| 98 |
-
-
|
| 99 |
-
|
| 100 |
-
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
-
|
| 104 |
-
-
|
| 105 |
-
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
"""
|
| 110 |
|
| 111 |
# Run the AI agent analysis
|
|
@@ -129,11 +146,12 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
|
|
| 129 |
This intelligent vulnerability scanner leverages cutting-edge AI agents and Model Context Protocol (MCP) tools to perform comprehensive security analysis of GitHub repositories and individual files.
|
| 130 |
|
| 131 |
**Key Features:**
|
| 132 |
-
-
|
| 133 |
-
-
|
| 134 |
-
-
|
| 135 |
-
-
|
| 136 |
-
-
|
|
|
|
| 137 |
|
| 138 |
**Project Links:**
|
| 139 |
- π **Source Code**: [GitHub Repository](https://github.com/banno-0720/vulnerability-scanner)
|
|
|
|
| 2 |
from smolagents import InferenceClientModel, CodeAgent, MCPClient
|
| 3 |
|
| 4 |
# MCP Server URL for GitHub tools
|
| 5 |
+
MCP_SERVER_URL = "https://baction-vulnerability-scanner-server.hf.space/gradio_api/mcp/"
|
| 6 |
|
| 7 |
def parse_github_url(url):
|
| 8 |
"""Parse GitHub URL to extract owner, repo, and file path"""
|
|
|
|
| 31 |
return "β Please provide a Hugging Face API key. Get one from [Hugging Face](https://huggingface.co/settings/tokens)"
|
| 32 |
|
| 33 |
try:
|
|
|
|
| 34 |
mcp_client = MCPClient({
|
| 35 |
"url": MCP_SERVER_URL,
|
| 36 |
"timeout": 120
|
|
|
|
| 40 |
# Initialize AI model with user's token
|
| 41 |
model = InferenceClientModel(token=hf_token.strip())
|
| 42 |
|
| 43 |
+
# Create AI agent with GitHub MCP tools and CVE database
|
| 44 |
agent = CodeAgent(
|
| 45 |
+
tools=tools,
|
| 46 |
model=model,
|
| 47 |
additional_authorized_imports=["json", "ast", "urllib", "base64", "re"],
|
| 48 |
+
max_steps=12
|
| 49 |
)
|
| 50 |
|
| 51 |
# Parse the GitHub URL
|
|
|
|
| 57 |
# Generate different prompts based on whether it's a file or repository
|
| 58 |
if file_path:
|
| 59 |
enhanced_prompt = f"""
|
| 60 |
+
You are a cybersecurity expert with access to a comprehensive CVE knowledge base. Analyze the specific GitHub file for security vulnerabilities.
|
|
|
|
| 61 |
GitHub URL: {message}
|
| 62 |
Repository: {owner}/{repo}
|
| 63 |
File Path: {file_path}
|
| 64 |
+
Please follow this enhanced analysis workflow:
|
| 65 |
+
1. **Repository & File Analysis**:
|
| 66 |
+
- Get repository information to verify it exists
|
| 67 |
+
- Get the content of the specific file: {file_path}
|
| 68 |
+
- Identify the programming language and framework used
|
| 69 |
+
2. **CVE Knowledge Base Research**:
|
| 70 |
+
- Use the search_cve_database tool to search for relevant vulnerability patterns based on the code you find
|
| 71 |
+
- Search for common weaknesses related to the programming language/framework
|
| 72 |
+
- Look up specific vulnerability types you identify in the code
|
| 73 |
+
3. **Comprehensive Security Analysis**:
|
| 74 |
- Command injection: os.system, exec, eval calls
|
| 75 |
+
- Input validation: unvalidated user inputs, missing sanitization
|
| 76 |
- Error handling: unhandled exceptions that could leak info
|
| 77 |
+
- Hardcoded secrets: API keys, passwords, tokens, database credentials
|
| 78 |
- Unsafe operations: file operations without validation
|
| 79 |
+
- Authentication/authorization flaws
|
| 80 |
+
- Cross-site scripting (XSS) vulnerabilities
|
| 81 |
+
- SQL injection vulnerabilities
|
| 82 |
+
4. **Enhanced Security Report**:
|
| 83 |
+
- π **File Overview** (path, language, size, framework)
|
| 84 |
+
- π **Vulnerability Summary** (counts by severity with CWE mappings)
|
| 85 |
+
- π¨ **Detailed Findings** with:
|
| 86 |
+
- Line numbers and code snippets
|
| 87 |
+
- **CWE Classification** from CVE knowledge base
|
| 88 |
+
- **CVSS Severity** based on similar CVEs
|
| 89 |
+
- Security impact and exploitation scenarios
|
| 90 |
+
- **Remediation advice** with best practices
|
| 91 |
+
- **Related CVE examples** from knowledge base
|
| 92 |
+
Use the search_cve_database tool extensively to provide context-aware analysis based on real-world vulnerability data.
|
| 93 |
"""
|
| 94 |
else:
|
| 95 |
enhanced_prompt = f"""
|
| 96 |
+
You are a cybersecurity expert with access to a comprehensive CVE knowledge base. Analyze the GitHub repository for security vulnerabilities.
|
|
|
|
| 97 |
Repository: {message}
|
| 98 |
+
Please follow this enhanced analysis workflow:
|
| 99 |
+
1. **Repository Discovery**:
|
| 100 |
+
- Get repository information to verify it exists and understand the tech stack
|
| 101 |
+
- Scan for code files (.py, .js, .ts, .php, .java, .cpp, .c, .cs, .go, .rb, .rs, .swift, .kt, .scala, .sh, .bash, .ps1, .ipynb, .sql, .xml, .yaml, .yml, .json, .config, .ini, .env)
|
| 102 |
+
- Prioritize the most critical files (main application files, configuration files, database schemas)
|
| 103 |
+
2. **CVE Knowledge Base Research**:
|
| 104 |
+
- Use the search_cve_database tool to research common vulnerabilities for the identified tech stack
|
| 105 |
+
- Search for framework-specific vulnerabilities (e.g., "Django SQL injection", "React XSS", "Node.js command injection")
|
| 106 |
+
- Look up configuration-related vulnerabilities for the technologies used
|
| 107 |
+
3. **Comprehensive Security Analysis** (analyze 5-8 most important files):
|
| 108 |
+
- **Injection Vulnerabilities**: SQL injection, command injection, code injection
|
| 109 |
+
- **Input Validation**: Unvalidated inputs, missing sanitization, parameter tampering
|
| 110 |
+
- **Authentication & Authorization**: Broken access controls, session management
|
| 111 |
+
- **Data Exposure**: Hardcoded secrets, information disclosure, insecure storage
|
| 112 |
+
- **Configuration Issues**: Debug mode, insecure defaults, missing security headers
|
| 113 |
+
- **Framework-Specific**: Technology-specific vulnerability patterns from CVE database
|
| 114 |
+
4. **Enhanced Security Report**:
|
| 115 |
+
- π **Repository Overview** (tech stack, architecture, security posture)
|
| 116 |
+
- π **Files Analyzed** (prioritized list with rationale)
|
| 117 |
+
- π **Vulnerability Summary** with CWE classifications and CVSS scores
|
| 118 |
+
- π¨ **Detailed Findings** including:
|
| 119 |
+
- File paths and line numbers
|
| 120 |
+
- **CWE Classification** from CVE knowledge base
|
| 121 |
+
- **Severity Assessment** based on CVSS scores from similar CVEs
|
| 122 |
+
- Code snippets and exploitation scenarios
|
| 123 |
+
- **Remediation Strategies** with best practices
|
| 124 |
+
- **Related CVE References** for context
|
| 125 |
+
Use the search_cve_database tool extensively to provide evidence-based analysis grounded in real-world vulnerability data.
|
| 126 |
"""
|
| 127 |
|
| 128 |
# Run the AI agent analysis
|
|
|
|
| 146 |
This intelligent vulnerability scanner leverages cutting-edge AI agents and Model Context Protocol (MCP) tools to perform comprehensive security analysis of GitHub repositories and individual files.
|
| 147 |
|
| 148 |
**Key Features:**
|
| 149 |
+
- **π€ AI-Powered Analysis**: Uses advanced language models with agentic RAG for intelligent vulnerability detection
|
| 150 |
+
- **π CVE Knowledge Base**: Leverages real CVE data to provide CWE classifications and CVSS severity scores
|
| 151 |
+
- **π Deep Code Analysis**: Scans for SQL injection, XSS, command injection, and framework-specific vulnerabilities
|
| 152 |
+
- **π Repository & File Support**: Analyze entire repositories or focus on specific files
|
| 153 |
+
- **π Enhanced Reports**: Comprehensive security reports with CVE references, CWE mappings, and remediation strategies
|
| 154 |
+
- **π Secure Processing**: Your API keys are used securely and never stored
|
| 155 |
|
| 156 |
**Project Links:**
|
| 157 |
- π **Source Code**: [GitHub Repository](https://github.com/banno-0720/vulnerability-scanner)
|
requirements.txt
CHANGED
|
@@ -6,4 +6,9 @@ smolagents>=0.1.0
|
|
| 6 |
requests>=2.28.0
|
| 7 |
python-dotenv>=1.0.0
|
| 8 |
pydantic>=2.11,<2.12
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
smolagents[mcp]>=0.1.0
|
|
|
|
| 6 |
requests>=2.28.0
|
| 7 |
python-dotenv>=1.0.0
|
| 8 |
pydantic>=2.11,<2.12
|
| 9 |
+
pandas>=1.5.0
|
| 10 |
+
langchain>=0.1.0
|
| 11 |
+
langchain-community>=0.0.20
|
| 12 |
+
sentence-transformers>=2.2.0
|
| 13 |
+
rank-bm25>=0.2.2
|
| 14 |
smolagents[mcp]>=0.1.0
|