File size: 9,981 Bytes
bd34d1a b788ba2 bd34d1a c324061 bd34d1a b788ba2 bd34d1a b788ba2 bd34d1a b788ba2 bd34d1a b788ba2 bd34d1a b788ba2 bd34d1a b788ba2 bd34d1a b788ba2 c324061 bd34d1a b788ba2 bd34d1a b788ba2 bd34d1a b788ba2 bd34d1a b788ba2 bd34d1a b788ba2 bd34d1a b788ba2 bd34d1a b788ba2 bd34d1a b788ba2 bd34d1a b788ba2 bd34d1a b788ba2 bd34d1a b788ba2 bd34d1a b788ba2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 |
import gradio as gr
import re
import requests
from markdownify import markdownify
from requests.exceptions import RequestException
from smolagents import (
CodeAgent,
ToolCallingAgent,
InferenceClientModel,
WebSearchTool,
MCPClient,
tool
)
# MCP Server URL for GitHub tools
MCP_SERVER_URL = "https://baction-vulnerability-scanner-server.hf.space/gradio_api/mcp/"
@tool
def visit_webpage(url: str) -> str:
"""Visits a webpage at the given URL and returns its content as a markdown string.
Args:
url: The URL of the webpage to visit.
Returns:
The content of the webpage converted to Markdown, or an error message if the request fails.
"""
try:
# Add user agent to avoid blocking
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
# Send a GET request to the URL
response = requests.get(url, headers=headers, timeout=30)
response.raise_for_status() # Raise an exception for bad status codes
# Convert the HTML content to Markdown
markdown_content = markdownify(response.text).strip()
# Remove multiple line breaks
markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
# Limit content length to avoid overwhelming the AI
if len(markdown_content) > 5000:
markdown_content = markdown_content[:5000] + "\n\n[Content truncated due to length...]"
return markdown_content
except RequestException as e:
return f"Error fetching the webpage: {str(e)}"
except Exception as e:
return f"An unexpected error occurred: {str(e)}"
def parse_github_url(url):
"""Parse GitHub URL to extract owner, repo, and file path"""
# Handle repository URLs
repo_pattern = r'https://github\.com/([^/]+)/([^/]+)/?$'
repo_match = re.match(repo_pattern, url.strip())
if repo_match:
return repo_match.group(1), repo_match.group(2), None
# Handle file URLs
file_pattern = r'https://github\.com/([^/]+)/([^/]+)/blob/[^/]+/(.+)$'
file_match = re.match(file_pattern, url.strip())
if file_match:
return file_match.group(1), file_match.group(2), file_match.group(3)
return None, None, None
def analyze_vulnerabilities_multiagent(message, history, hf_token):
"""Multi-agent vulnerability analysis with web scraping capabilities"""
# Validate HF token input
if not hf_token.strip():
return "β Please provide a Hugging Face API key. Get one from [Hugging Face](https://huggingface.co/settings/tokens)"
try:
# Parse the GitHub URL
owner, repo, file_path = parse_github_url(message)
if not owner or not repo:
return "β Invalid GitHub URL. Please provide a valid GitHub repository or file URL."
if not file_path:
return "β Please provide a specific file URL for analysis. Repository-wide analysis is not supported in multi-agent mode."
# Connect to MCP server for GitHub tools
mcp_client = MCPClient({
"url": MCP_SERVER_URL,
"timeout": 120
})
github_tools = mcp_client.get_tools()
# Initialize AI model
model = InferenceClientModel(token=hf_token.strip())
# Create a single agent with all tools (simpler approach)
all_tools = github_tools + [visit_webpage]
# Create single agent instead of multi-agent to avoid tool_choice issues
agent = CodeAgent(
tools=all_tools,
model=model,
additional_authorized_imports=["re", "requests"],
max_steps=12
)
# Simplified prompt for single agent analysis
enhanced_prompt = f"""
You are a cybersecurity expert. Analyze this GitHub file for security vulnerabilities.
GitHub URL: {message}
Repository: {owner}/{repo}
File Path: {file_path}
**ANALYSIS STEPS:**
1. **Get File Data**:
- Use get_repository_info with owner="{owner}", repo="{repo}"
- Use get_file_content with owner="{owner}", repo="{repo}", path="{file_path}"
2. **Find Vulnerabilities**:
Analyze code for:
- SQL injection patterns
- Command injection (eval, exec, os.system)
- XSS vulnerabilities
- Path traversal
- Hardcoded secrets
- Input validation issues
3. **CVE Research**:
- Search for CVEs: simple_cve_search("SQL injection", 3)
- Extract CVE IDs from the string result using regex
- Visit NVD for the first CVE: visit_webpage("https://nvd.nist.gov/vuln/detail/CVE-XXXX-XXXX")
- Include the full NVD webpage content in your report
4. **Generate Report**:
# π‘οΈ Security Analysis Report
## π File Overview
- **Path**: {file_path}
- **Repository**: {owner}/{repo}
## π¨ Vulnerabilities Found
[List vulnerabilities with line numbers]
## π CVE Research
**Top Related CVE**: [First CVE ID from regex extraction]
**CVE Details Webpage Content**: [Complete content from visit_webpage call]
**Key Details from CVE Details**: [CVSS score, attack vector, impact extracted from webpage]
## β οΈ Other Possible CVEs
[Show other 2 CVE IDs from search]
## π οΈ Remediation
[Specific fixes]
## β οΈ Disclaimer
AI analysis may not be 100% accurate. Manual security review recommended.
**REMEMBER**: Always call visit_webpage for the first CVE ID to get detailed CVE information!
**CRITICAL INSTRUCTIONS**:
- simple_cve_search returns a STRING with CVE IDs and descriptions
- Extract CVE IDs using: re.findall(r'CVE-\d{4}-\d+', cve_search_string)
- TRY to visit CVE Details webpage for the first CVE ID found (more reliable than NVD)
- Use this exact pattern:
1. Call simple_cve_search("SQL injection", 3)
2. Extract CVE IDs with regex from the returned string
3. Take the first CVE ID from the list
4. Call visit_webpage("https://www.cvedetails.com/cve/CVE-YYYY-NNNNN/") with the EXACT CVE ID (keep hyphens)
5. If webpage fails (403 error), continue with analysis using CVE search results only
- Keep variable names simple and avoid complex operations
- ALWAYS use keyword arguments for MCP tools (e.g., owner="user", repo="repo", path="file.py")
- NOTE: CVE format is standard CVE-YYYY-NNNNN (like CVE-2024-54762)
- Example: If you get "CVE-2024-54762", visit "https://www.cvedetails.com/cve/CVE-2024-54762/"
- DO NOT remove hyphens from CVE IDs when visiting CVE Details URLs
- If CVE Details access fails, use the CVE descriptions from simple_cve_search results
"""
# Run the agent analysis
result = agent.run(enhanced_prompt)
# Disconnect MCP client
mcp_client.disconnect()
return str(result)
except Exception as e:
return f"β Error in multi-agent analysis: {str(e)}\n\nPlease ensure:\nβ’ Valid GitHub file URL (not repository URL)\nβ’ Hugging Face token is correct\nβ’ File is accessible"
# Gradio UI
with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
gr.Markdown("## π‘οΈ Enhanced GitHub Vulnerability Scanner")
gr.Markdown("""
**Advanced Security Analysis with Web Scraping**
This intelligent vulnerability scanner uses AI agents with web scraping capabilities to perform comprehensive security analysis of GitHub files.
**Key Features:**
- **π€ AI Agent System**: Single agent with multiple tools for efficient analysis
- **π Web Scraping**: Automatically visits NVD webpages to get detailed CVE information
- **π CVE Database Integration**: Searches CVE knowledge base and gets top 3 matches
- **π Smart Analysis**: AI-generated vulnerability descriptions (not hardcoded)
- **π Detailed Reports**: Comprehensive reports with NVD data and remediation advice
- **β οΈ Accuracy Disclaimer**: Shows alternative CVEs and warns about AI limitations
**Project Links:**
- π **Source Code**: [GitHub Repository](https://github.com/banno-0720/vulnerability-scanner)
- π§ **MCP Server**: [Hugging Face Space](https://huggingface.co/spaces/HimanshuGoyal2004/github-mcp-server)
β οΈ **Important Notice**: This tool is designed for legitimate security research and vulnerability assessment purposes only. Do not use this scanner for malicious activities, unauthorized access, or any illegal purposes. Always ensure you have proper authorization before scanning repositories that don't belong to you.
""")
gr.Markdown("---")
# API Configuration Section
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### π API Configuration")
hf_token_box = gr.Textbox(
label="π€ Hugging Face API Key",
placeholder="Enter your Hugging Face API key for AI model access",
type="password",
info="π Get your free key: https://huggingface.co/settings/tokens"
)
gr.Markdown("---")
gr.Markdown("### π¬ Enhanced Security Analysis")
gr.Markdown("Paste a GitHub **FILE URL** (not repository URL) below to start the enhanced security analysis.")
# Chatbot Interface
chatbot = gr.ChatInterface(
fn=lambda msg, hist, hf_token: analyze_vulnerabilities_multiagent(msg, hist, hf_token),
additional_inputs=[hf_token_box],
type="messages",
examples=[
["https://github.com/ayushmittal62/vunreability_scanner_testing/blob/master/database/schema.sql", ""],
["https://github.com/ayushmittal62/vunreability_scanner_testing/blob/master/python/database.py", ""],
["https://github.com/banno-0720/documentation-agent/blob/main/code.py", ""]
],
)
if __name__ == "__main__":
demo.launch(server_port=7860) # Different port to avoid conflict with server |