Baction commited on
Commit
c324061
Β·
1 Parent(s): 82f837e

adding rag to client

Browse files
Files changed (2) hide show
  1. app.py +66 -48
  2. requirements.txt +5 -0
app.py CHANGED
@@ -2,7 +2,7 @@ import gradio as gr
2
  from smolagents import InferenceClientModel, CodeAgent, MCPClient
3
 
4
  # MCP Server URL for GitHub tools
5
- MCP_SERVER_URL = "https://himanshugoyal2004-github-mcp-server.hf.space/gradio_api/mcp/"
6
 
7
  def parse_github_url(url):
8
  """Parse GitHub URL to extract owner, repo, and file path"""
@@ -31,7 +31,6 @@ def analyze_vulnerabilities(message, history, hf_token):
31
  return "❌ Please provide a Hugging Face API key. Get one from [Hugging Face](https://huggingface.co/settings/tokens)"
32
 
33
  try:
34
- # Connect to MCP server
35
  mcp_client = MCPClient({
36
  "url": MCP_SERVER_URL,
37
  "timeout": 120
@@ -41,12 +40,12 @@ def analyze_vulnerabilities(message, history, hf_token):
41
  # Initialize AI model with user's token
42
  model = InferenceClientModel(token=hf_token.strip())
43
 
44
- # Create AI agent with GitHub MCP tools
45
  agent = CodeAgent(
46
- tools=[*tools],
47
  model=model,
48
  additional_authorized_imports=["json", "ast", "urllib", "base64", "re"],
49
- max_steps=10
50
  )
51
 
52
  # Parse the GitHub URL
@@ -58,54 +57,72 @@ def analyze_vulnerabilities(message, history, hf_token):
58
  # Generate different prompts based on whether it's a file or repository
59
  if file_path:
60
  enhanced_prompt = f"""
61
- You are a cybersecurity expert. Analyze the specific GitHub file for security vulnerabilities.
62
-
63
  GitHub URL: {message}
64
  Repository: {owner}/{repo}
65
  File Path: {file_path}
66
-
67
- Please:
68
- 1. First, get repository information to verify it exists
69
- 2. Get the content of the specific file: {file_path}
70
- 3. Analyze the file content line by line for security vulnerabilities
71
- 4. Look for these security issues:
 
 
 
 
72
  - Command injection: os.system, exec, eval calls
73
- - Input validation: unvalidated user inputs
74
  - Error handling: unhandled exceptions that could leak info
75
- - Hardcoded secrets: API keys, passwords, tokens
76
  - Unsafe operations: file operations without validation
77
-
78
- 5. Create a professional security report with:
79
- - πŸ” File Overview (path, language, size)
80
- - πŸ“Š Vulnerability Summary (counts by severity)
81
- - 🚨 Detailed Findings (line numbers, code snippets, impacts, fixes)
82
-
83
- Use simple string operations and avoid complex regex patterns. Focus on clear, actionable security findings.
 
 
 
 
 
 
 
84
  """
85
  else:
86
  enhanced_prompt = f"""
87
- You are a cybersecurity expert. Analyze the GitHub repository for security vulnerabilities.
88
-
89
  Repository: {message}
90
-
91
- Please:
92
- 1. First, get repository information to verify it exists
93
- 2. Scan the repository for code files (.py, .js, .ts, .php, .java, .cpp, .c, .cs, .go, .rb, .rs, .swift, .kt, .scala, .sh, .bash, .ps1, .ipynb, .sql, .xml, .yaml, .yml, .json, .config, .ini, .env)
94
- 3. For the first 5-10 most important code files, get their content and analyze for security issues
95
- 4. Look for these security vulnerabilities:
96
- - Command injection: os.system, exec, eval calls
97
- - Input validation: unvalidated user inputs, missing parameter checks
98
- - Error handling: unhandled exceptions, information disclosure
99
- - Hardcoded secrets: API keys, passwords, database credentials
100
- - Unsafe operations: file operations, deserialization without validation
101
-
102
- 5. Generate a comprehensive security report with:
103
- - πŸ” Repository Overview
104
- - πŸ“ Files Analyzed
105
- - πŸ“Š Vulnerability Summary (counts by severity)
106
- - 🚨 Detailed Findings (file paths, line numbers, code snippets, impacts, remediation)
107
-
108
- Use simple string operations and focus on the most critical security issues. Limit analysis to prevent timeouts.
 
 
 
 
 
 
 
 
 
109
  """
110
 
111
  # Run the AI agent analysis
@@ -129,11 +146,12 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
129
  This intelligent vulnerability scanner leverages cutting-edge AI agents and Model Context Protocol (MCP) tools to perform comprehensive security analysis of GitHub repositories and individual files.
130
 
131
  **Key Features:**
132
- - **Deep Code Analysis**: Scans for common security vulnerabilities including SQL injection, XSS, command injection, and more
133
- - **AI-Powered Detection**: Uses advanced language models to understand code context and identify complex security issues
134
- - **Repository & File Support**: Analyze entire repositories or focus on specific files
135
- - **Detailed Reports**: Get comprehensive security reports with severity levels, line numbers, and remediation suggestions
136
- - **Secure Processing**: Your API keys are used securely and never stored
 
137
 
138
  **Project Links:**
139
  - πŸ“‚ **Source Code**: [GitHub Repository](https://github.com/banno-0720/vulnerability-scanner)
 
2
  from smolagents import InferenceClientModel, CodeAgent, MCPClient
3
 
4
  # MCP Server URL for GitHub tools
5
+ MCP_SERVER_URL = "https://baction-vulnerability-scanner-server.hf.space/gradio_api/mcp/"
6
 
7
  def parse_github_url(url):
8
  """Parse GitHub URL to extract owner, repo, and file path"""
 
31
  return "❌ Please provide a Hugging Face API key. Get one from [Hugging Face](https://huggingface.co/settings/tokens)"
32
 
33
  try:
 
34
  mcp_client = MCPClient({
35
  "url": MCP_SERVER_URL,
36
  "timeout": 120
 
40
  # Initialize AI model with user's token
41
  model = InferenceClientModel(token=hf_token.strip())
42
 
43
+ # Create AI agent with GitHub MCP tools and CVE database
44
  agent = CodeAgent(
45
+ tools=tools,
46
  model=model,
47
  additional_authorized_imports=["json", "ast", "urllib", "base64", "re"],
48
+ max_steps=12
49
  )
50
 
51
  # Parse the GitHub URL
 
57
  # Generate different prompts based on whether it's a file or repository
58
  if file_path:
59
  enhanced_prompt = f"""
60
+ You are a cybersecurity expert with access to a comprehensive CVE knowledge base. Analyze the specific GitHub file for security vulnerabilities.
 
61
  GitHub URL: {message}
62
  Repository: {owner}/{repo}
63
  File Path: {file_path}
64
+ Please follow this enhanced analysis workflow:
65
+ 1. **Repository & File Analysis**:
66
+ - Get repository information to verify it exists
67
+ - Get the content of the specific file: {file_path}
68
+ - Identify the programming language and framework used
69
+ 2. **CVE Knowledge Base Research**:
70
+ - Use the search_cve_database tool to search for relevant vulnerability patterns based on the code you find
71
+ - Search for common weaknesses related to the programming language/framework
72
+ - Look up specific vulnerability types you identify in the code
73
+ 3. **Comprehensive Security Analysis**:
74
  - Command injection: os.system, exec, eval calls
75
+ - Input validation: unvalidated user inputs, missing sanitization
76
  - Error handling: unhandled exceptions that could leak info
77
+ - Hardcoded secrets: API keys, passwords, tokens, database credentials
78
  - Unsafe operations: file operations without validation
79
+ - Authentication/authorization flaws
80
+ - Cross-site scripting (XSS) vulnerabilities
81
+ - SQL injection vulnerabilities
82
+ 4. **Enhanced Security Report**:
83
+ - πŸ” **File Overview** (path, language, size, framework)
84
+ - πŸ“Š **Vulnerability Summary** (counts by severity with CWE mappings)
85
+ - 🚨 **Detailed Findings** with:
86
+ - Line numbers and code snippets
87
+ - **CWE Classification** from CVE knowledge base
88
+ - **CVSS Severity** based on similar CVEs
89
+ - Security impact and exploitation scenarios
90
+ - **Remediation advice** with best practices
91
+ - **Related CVE examples** from knowledge base
92
+ Use the search_cve_database tool extensively to provide context-aware analysis based on real-world vulnerability data.
93
  """
94
  else:
95
  enhanced_prompt = f"""
96
+ You are a cybersecurity expert with access to a comprehensive CVE knowledge base. Analyze the GitHub repository for security vulnerabilities.
 
97
  Repository: {message}
98
+ Please follow this enhanced analysis workflow:
99
+ 1. **Repository Discovery**:
100
+ - Get repository information to verify it exists and understand the tech stack
101
+ - Scan for code files (.py, .js, .ts, .php, .java, .cpp, .c, .cs, .go, .rb, .rs, .swift, .kt, .scala, .sh, .bash, .ps1, .ipynb, .sql, .xml, .yaml, .yml, .json, .config, .ini, .env)
102
+ - Prioritize the most critical files (main application files, configuration files, database schemas)
103
+ 2. **CVE Knowledge Base Research**:
104
+ - Use the search_cve_database tool to research common vulnerabilities for the identified tech stack
105
+ - Search for framework-specific vulnerabilities (e.g., "Django SQL injection", "React XSS", "Node.js command injection")
106
+ - Look up configuration-related vulnerabilities for the technologies used
107
+ 3. **Comprehensive Security Analysis** (analyze 5-8 most important files):
108
+ - **Injection Vulnerabilities**: SQL injection, command injection, code injection
109
+ - **Input Validation**: Unvalidated inputs, missing sanitization, parameter tampering
110
+ - **Authentication & Authorization**: Broken access controls, session management
111
+ - **Data Exposure**: Hardcoded secrets, information disclosure, insecure storage
112
+ - **Configuration Issues**: Debug mode, insecure defaults, missing security headers
113
+ - **Framework-Specific**: Technology-specific vulnerability patterns from CVE database
114
+ 4. **Enhanced Security Report**:
115
+ - πŸ” **Repository Overview** (tech stack, architecture, security posture)
116
+ - πŸ“ **Files Analyzed** (prioritized list with rationale)
117
+ - πŸ“Š **Vulnerability Summary** with CWE classifications and CVSS scores
118
+ - 🚨 **Detailed Findings** including:
119
+ - File paths and line numbers
120
+ - **CWE Classification** from CVE knowledge base
121
+ - **Severity Assessment** based on CVSS scores from similar CVEs
122
+ - Code snippets and exploitation scenarios
123
+ - **Remediation Strategies** with best practices
124
+ - **Related CVE References** for context
125
+ Use the search_cve_database tool extensively to provide evidence-based analysis grounded in real-world vulnerability data.
126
  """
127
 
128
  # Run the AI agent analysis
 
146
  This intelligent vulnerability scanner leverages cutting-edge AI agents and Model Context Protocol (MCP) tools to perform comprehensive security analysis of GitHub repositories and individual files.
147
 
148
  **Key Features:**
149
+ - **πŸ€– AI-Powered Analysis**: Uses advanced language models with agentic RAG for intelligent vulnerability detection
150
+ - **πŸ“Š CVE Knowledge Base**: Leverages real CVE data to provide CWE classifications and CVSS severity scores
151
+ - **πŸ” Deep Code Analysis**: Scans for SQL injection, XSS, command injection, and framework-specific vulnerabilities
152
+ - **πŸ“ Repository & File Support**: Analyze entire repositories or focus on specific files
153
+ - **πŸ“‹ Enhanced Reports**: Comprehensive security reports with CVE references, CWE mappings, and remediation strategies
154
+ - **πŸ”’ Secure Processing**: Your API keys are used securely and never stored
155
 
156
  **Project Links:**
157
  - πŸ“‚ **Source Code**: [GitHub Repository](https://github.com/banno-0720/vulnerability-scanner)
requirements.txt CHANGED
@@ -6,4 +6,9 @@ smolagents>=0.1.0
6
  requests>=2.28.0
7
  python-dotenv>=1.0.0
8
  pydantic>=2.11,<2.12
 
 
 
 
 
9
  smolagents[mcp]>=0.1.0
 
6
  requests>=2.28.0
7
  python-dotenv>=1.0.0
8
  pydantic>=2.11,<2.12
9
+ pandas>=1.5.0
10
+ langchain>=0.1.0
11
+ langchain-community>=0.0.20
12
+ sentence-transformers>=2.2.0
13
+ rank-bm25>=0.2.2
14
  smolagents[mcp]>=0.1.0