HimanshuGoyal2004 commited on
Commit
ffcbb95
Β·
1 Parent(s): 55cc839

agentic rag test

Browse files
Files changed (2) hide show
  1. app.py +75 -43
  2. requirements.txt +5 -0
app.py CHANGED
@@ -31,7 +31,6 @@ def analyze_vulnerabilities(message, history, hf_token):
31
  return "❌ Please provide a Hugging Face API key. Get one from [Hugging Face](https://huggingface.co/settings/tokens)"
32
 
33
  try:
34
- # Connect to MCP server
35
  mcp_client = MCPClient({
36
  "url": MCP_SERVER_URL,
37
  "timeout": 120
@@ -41,12 +40,12 @@ def analyze_vulnerabilities(message, history, hf_token):
41
  # Initialize AI model with user's token
42
  model = InferenceClientModel(token=hf_token.strip())
43
 
44
- # Create AI agent with GitHub MCP tools
45
  agent = CodeAgent(
46
- tools=[*tools],
47
  model=model,
48
  additional_authorized_imports=["json", "ast", "urllib", "base64", "re"],
49
- max_steps=10
50
  )
51
 
52
  # Parse the GitHub URL
@@ -58,54 +57,86 @@ def analyze_vulnerabilities(message, history, hf_token):
58
  # Generate different prompts based on whether it's a file or repository
59
  if file_path:
60
  enhanced_prompt = f"""
61
- You are a cybersecurity expert. Analyze the specific GitHub file for security vulnerabilities.
62
 
63
  GitHub URL: {message}
64
  Repository: {owner}/{repo}
65
  File Path: {file_path}
66
 
67
- Please:
68
- 1. First, get repository information to verify it exists
69
- 2. Get the content of the specific file: {file_path}
70
- 3. Analyze the file content line by line for security vulnerabilities
71
- 4. Look for these security issues:
 
 
 
 
 
 
 
 
72
  - Command injection: os.system, exec, eval calls
73
- - Input validation: unvalidated user inputs
74
  - Error handling: unhandled exceptions that could leak info
75
- - Hardcoded secrets: API keys, passwords, tokens
76
  - Unsafe operations: file operations without validation
77
-
78
- 5. Create a professional security report with:
79
- - πŸ” File Overview (path, language, size)
80
- - πŸ“Š Vulnerability Summary (counts by severity)
81
- - 🚨 Detailed Findings (line numbers, code snippets, impacts, fixes)
82
-
83
- Use simple string operations and avoid complex regex patterns. Focus on clear, actionable security findings.
 
 
 
 
 
 
 
 
 
84
  """
85
  else:
86
  enhanced_prompt = f"""
87
- You are a cybersecurity expert. Analyze the GitHub repository for security vulnerabilities.
88
 
89
  Repository: {message}
90
 
91
- Please:
92
- 1. First, get repository information to verify it exists
93
- 2. Scan the repository for code files (.py, .js, .ts, .php, .java, .cpp, .c, .cs, .go, .rb, .rs, .swift, .kt, .scala, .sh, .bash, .ps1, .ipynb, .sql, .xml, .yaml, .yml, .json, .config, .ini, .env)
94
- 3. For the first 5-10 most important code files, get their content and analyze for security issues
95
- 4. Look for these security vulnerabilities:
96
- - Command injection: os.system, exec, eval calls
97
- - Input validation: unvalidated user inputs, missing parameter checks
98
- - Error handling: unhandled exceptions, information disclosure
99
- - Hardcoded secrets: API keys, passwords, database credentials
100
- - Unsafe operations: file operations, deserialization without validation
101
-
102
- 5. Generate a comprehensive security report with:
103
- - πŸ” Repository Overview
104
- - πŸ“ Files Analyzed
105
- - πŸ“Š Vulnerability Summary (counts by severity)
106
- - 🚨 Detailed Findings (file paths, line numbers, code snippets, impacts, remediation)
107
-
108
- Use simple string operations and focus on the most critical security issues. Limit analysis to prevent timeouts.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  """
110
 
111
  # Run the AI agent analysis
@@ -129,11 +160,12 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
129
  This intelligent vulnerability scanner leverages cutting-edge AI agents and Model Context Protocol (MCP) tools to perform comprehensive security analysis of GitHub repositories and individual files.
130
 
131
  **Key Features:**
132
- - **Deep Code Analysis**: Scans for common security vulnerabilities including SQL injection, XSS, command injection, and more
133
- - **AI-Powered Detection**: Uses advanced language models to understand code context and identify complex security issues
134
- - **Repository & File Support**: Analyze entire repositories or focus on specific files
135
- - **Detailed Reports**: Get comprehensive security reports with severity levels, line numbers, and remediation suggestions
136
- - **Secure Processing**: Your API keys are used securely and never stored
 
137
 
138
  **Project Links:**
139
  - πŸ“‚ **Source Code**: [GitHub Repository](https://github.com/banno-0720/vulnerability-scanner)
 
31
  return "❌ Please provide a Hugging Face API key. Get one from [Hugging Face](https://huggingface.co/settings/tokens)"
32
 
33
  try:
 
34
  mcp_client = MCPClient({
35
  "url": MCP_SERVER_URL,
36
  "timeout": 120
 
40
  # Initialize AI model with user's token
41
  model = InferenceClientModel(token=hf_token.strip())
42
 
43
+ # Create AI agent with GitHub MCP tools and CVE database
44
  agent = CodeAgent(
45
+ tools=tools,
46
  model=model,
47
  additional_authorized_imports=["json", "ast", "urllib", "base64", "re"],
48
+ max_steps=12
49
  )
50
 
51
  # Parse the GitHub URL
 
57
  # Generate different prompts based on whether it's a file or repository
58
  if file_path:
59
  enhanced_prompt = f"""
60
+ You are a cybersecurity expert with access to a comprehensive CVE knowledge base. Analyze the specific GitHub file for security vulnerabilities.
61
 
62
  GitHub URL: {message}
63
  Repository: {owner}/{repo}
64
  File Path: {file_path}
65
 
66
+ Please follow this enhanced analysis workflow:
67
+
68
+ 1. **Repository & File Analysis**:
69
+ - Get repository information to verify it exists
70
+ - Get the content of the specific file: {file_path}
71
+ - Identify the programming language and framework used
72
+
73
+ 2. **CVE Knowledge Base Research**:
74
+ - Use the search_cve_database tool to search for relevant vulnerability patterns based on the code you find
75
+ - Search for common weaknesses related to the programming language/framework
76
+ - Look up specific vulnerability types you identify in the code
77
+
78
+ 3. **Comprehensive Security Analysis**:
79
  - Command injection: os.system, exec, eval calls
80
+ - Input validation: unvalidated user inputs, missing sanitization
81
  - Error handling: unhandled exceptions that could leak info
82
+ - Hardcoded secrets: API keys, passwords, tokens, database credentials
83
  - Unsafe operations: file operations without validation
84
+ - Authentication/authorization flaws
85
+ - Cross-site scripting (XSS) vulnerabilities
86
+ - SQL injection vulnerabilities
87
+
88
+ 4. **Enhanced Security Report**:
89
+ - πŸ” **File Overview** (path, language, size, framework)
90
+ - πŸ“Š **Vulnerability Summary** (counts by severity with CWE mappings)
91
+ - 🚨 **Detailed Findings** with:
92
+ - Line numbers and code snippets
93
+ - **CWE Classification** from CVE knowledge base
94
+ - **CVSS Severity** based on similar CVEs
95
+ - Security impact and exploitation scenarios
96
+ - **Remediation advice** with best practices
97
+ - **Related CVE examples** from knowledge base
98
+
99
+ Use the search_cve_database tool extensively to provide context-aware analysis based on real-world vulnerability data.
100
  """
101
  else:
102
  enhanced_prompt = f"""
103
+ You are a cybersecurity expert with access to a comprehensive CVE knowledge base. Analyze the GitHub repository for security vulnerabilities.
104
 
105
  Repository: {message}
106
 
107
+ Please follow this enhanced analysis workflow:
108
+
109
+ 1. **Repository Discovery**:
110
+ - Get repository information to verify it exists and understand the tech stack
111
+ - Scan for code files (.py, .js, .ts, .php, .java, .cpp, .c, .cs, .go, .rb, .rs, .swift, .kt, .scala, .sh, .bash, .ps1, .ipynb, .sql, .xml, .yaml, .yml, .json, .config, .ini, .env)
112
+ - Prioritize the most critical files (main application files, configuration files, database schemas)
113
+
114
+ 2. **CVE Knowledge Base Research**:
115
+ - Use the search_cve_database tool to research common vulnerabilities for the identified tech stack
116
+ - Search for framework-specific vulnerabilities (e.g., "Django SQL injection", "React XSS", "Node.js command injection")
117
+ - Look up configuration-related vulnerabilities for the technologies used
118
+
119
+ 3. **Comprehensive Security Analysis** (analyze 5-8 most important files):
120
+ - **Injection Vulnerabilities**: SQL injection, command injection, code injection
121
+ - **Input Validation**: Unvalidated inputs, missing sanitization, parameter tampering
122
+ - **Authentication & Authorization**: Broken access controls, session management
123
+ - **Data Exposure**: Hardcoded secrets, information disclosure, insecure storage
124
+ - **Configuration Issues**: Debug mode, insecure defaults, missing security headers
125
+ - **Framework-Specific**: Technology-specific vulnerability patterns from CVE database
126
+
127
+ 4. **Enhanced Security Report**:
128
+ - πŸ” **Repository Overview** (tech stack, architecture, security posture)
129
+ - πŸ“ **Files Analyzed** (prioritized list with rationale)
130
+ - πŸ“Š **Vulnerability Summary** with CWE classifications and CVSS scores
131
+ - 🚨 **Detailed Findings** including:
132
+ - File paths and line numbers
133
+ - **CWE Classification** from CVE knowledge base
134
+ - **Severity Assessment** based on CVSS scores from similar CVEs
135
+ - Code snippets and exploitation scenarios
136
+ - **Remediation Strategies** with best practices
137
+ - **Related CVE References** for context
138
+
139
+ Use the search_cve_database tool extensively to provide evidence-based analysis grounded in real-world vulnerability data.
140
  """
141
 
142
  # Run the AI agent analysis
 
160
  This intelligent vulnerability scanner leverages cutting-edge AI agents and Model Context Protocol (MCP) tools to perform comprehensive security analysis of GitHub repositories and individual files.
161
 
162
  **Key Features:**
163
+ - **πŸ€– AI-Powered Analysis**: Uses advanced language models with agentic RAG for intelligent vulnerability detection
164
+ - **πŸ“Š CVE Knowledge Base**: Leverages real CVE data to provide CWE classifications and CVSS severity scores
165
+ - **πŸ” Deep Code Analysis**: Scans for SQL injection, XSS, command injection, and framework-specific vulnerabilities
166
+ - **πŸ“ Repository & File Support**: Analyze entire repositories or focus on specific files
167
+ - **πŸ“‹ Enhanced Reports**: Comprehensive security reports with CVE references, CWE mappings, and remediation strategies
168
+ - **πŸ”’ Secure Processing**: Your API keys are used securely and never stored
169
 
170
  **Project Links:**
171
  - πŸ“‚ **Source Code**: [GitHub Repository](https://github.com/banno-0720/vulnerability-scanner)
requirements.txt CHANGED
@@ -6,4 +6,9 @@ smolagents>=0.1.0
6
  requests>=2.28.0
7
  python-dotenv>=1.0.0
8
  pydantic>=2.11,<2.12
 
 
 
 
 
9
  smolagents[mcp]>=0.1.0
 
6
  requests>=2.28.0
7
  python-dotenv>=1.0.0
8
  pydantic>=2.11,<2.12
9
+ pandas>=1.5.0
10
+ langchain>=0.1.0
11
+ langchain-community>=0.0.20
12
+ sentence-transformers>=2.2.0
13
+ rank-bm25>=0.2.2
14
  smolagents[mcp]>=0.1.0