Spaces:

MCP-1st-Birthday
/

simple-security-scanner

Running

App Files Files Community

simple-security-scanner / src /formatter.py

garibong

Translate formatter.py to English

3a62f17 about 2 months ago

raw

history blame contribute delete

13.4 kB

	"""
	Result formatter for security scan results.

	Transforms raw vulnerability data into beginner-friendly explanations.
	"""

	from datetime import datetime
	from typing import Dict, List, Any
	import sys
	from pathlib import Path

	# Import utilities
	sys.path.insert(0, str(Path(__file__).parent))
	from utils import get_severity_order

	# Explanation templates for common vulnerabilities
	EXPLANATION_TEMPLATES = {
	"hardcoded_api_key": {
	"what": "An API key is hardcoded directly in the source code",
	"why": "Source code is stored in version control systems (Git) and can be accessed by multiple developers. "
	"Anyone with access to the code can copy and misuse this API key. "
	"If accidentally pushed to a public repository, it becomes exposed to the entire world. "
	"Attackers can use this key to make unauthorized API calls, incur costs, or steal data.",
	"how_to_fix": "Store API keys in environment variables or separate configuration files (e.g., .env), "
	"and add these files to .gitignore to exclude them from version control.",
	"example": "# Bad example\napi_key = 'sk-1234567890abcdef'\n\n"
	"# Good example\nimport os\napi_key = os.getenv('API_KEY')\n\n"
	"# In .env file\n# API_KEY=sk-1234567890abcdef",
	"references": [
	"https://owasp.org/www-community/vulnerabilities/Use_of_hard-coded_password",
	"https://12factor.net/config"
	]
	},
	"sql_injection": {
	"what": "User input is directly inserted into SQL queries, causing SQL injection vulnerability",
	"why": "If an attacker inputs malicious SQL code, they can query or delete all data in the database. "
	"For example, input like 'admin' OR '1'='1' can bypass authentication, "
	"or input like '; DROP TABLE users--' can delete entire tables.",
	"how_to_fix": "Use parameterized queries (Prepared Statements) to separate user input from SQL code. "
	"Using an ORM (SQLAlchemy, Django ORM, etc.) automatically handles this safely.",
	"example": "# Bad example\nquery = f\"SELECT * FROM users WHERE id={user_id}\"\n\n"
	"# Good example\nquery = \"SELECT * FROM users WHERE id=%s\"\ncursor.execute(query, (user_id,))\n\n"
	"# Using ORM\nuser = User.objects.filter(id=user_id).first()",
	"references": [
	"https://owasp.org/www-community/attacks/SQL_Injection",
	"https://cheatsheetseries.owasp.org/cheatsheets/SQL_Injection_Prevention_Cheat_Sheet.html"
	]
	},
	"password": {
	"what": "A password is stored in plaintext in the source code",
	"why": "Anyone with access to the code can see this password. "
	"It remains permanently in Git history and can be recovered even after deletion. "
	"If the same password is reused on other services, the damage can be even greater.",
	"how_to_fix": "Store passwords in environment variables, and if possible, use a secrets management service (AWS Secrets Manager, HashiCorp Vault, etc.).",
	"example": "# Bad example\npassword = 'MyPassword123'\n\n"
	"# Good example\nimport os\npassword = os.getenv('DB_PASSWORD')",
	"references": [
	"https://owasp.org/www-community/vulnerabilities/Use_of_hard-coded_password"
	]
	},
	"pickle_usage": {
	"what": "Untrusted data is being deserialized using pickle.loads()",
	"why": "Pickle can execute arbitrary code when restoring Python objects. "
	"If an attacker provides maliciously crafted pickle data, "
	"they can execute arbitrary commands on the server or completely take over the system.",
	"how_to_fix": "For untrusted data, use safe serialization formats like JSON, YAML (safe_load), "
	"or Protocol Buffers instead of pickle.",
	"example": "# Bad example\nimport pickle\ndata = pickle.loads(untrusted_input)\n\n"
	"# Good example\nimport json\ndata = json.loads(untrusted_input)",
	"references": [
	"https://docs.python.org/3/library/pickle.html#module-pickle",
	"https://owasp.org/www-community/vulnerabilities/Deserialization_of_untrusted_data"
	]
	},
	"exec_usage": {
	"what": "Code is being dynamically executed using exec() or eval()",
	"why": "If user input or external data is executed with exec()/eval(), "
	"attackers can run arbitrary Python code. "
	"This can lead to catastrophic results such as accessing all server files, "
	"attacking other systems, or installing malware.",
	"how_to_fix": "Avoid using exec() and eval() whenever possible. "
	"If necessary, use ast.literal_eval() for safe evaluation, "
	"or use whitelist-based command mapping.",
	"example": "# Bad example\nexec(user_input)\n\n"
	"# Good example (literals only)\nimport ast\nvalue = ast.literal_eval(user_input)\n\n"
	"# Or use whitelist\nallowed_commands = {'start': start_func, 'stop': stop_func}\ncommand = allowed_commands.get(user_input)",
	"references": [
	"https://docs.python.org/3/library/functions.html#eval",
	"https://nedbatchelder.com/blog/201206/eval_really_is_dangerous.html"
	]
	},
	"shell_injection": {
	"what": "Commands are being executed with shell=True in subprocess",
	"why": "shell=True executes commands through the shell, so if user input contains shell metacharacters "
	"like semicolons (;) or pipes (\|), additional commands can be executed. "
	"For example, input like '; rm -rf /' could delete the entire system.",
	"how_to_fix": "Remove shell=True and pass commands as a list. "
	"Alternatively, escape input using shlex.quote().",
	"example": "# Bad example\nimport subprocess\nsubprocess.call(f'ls {user_dir}', shell=True)\n\n"
	"# Good example\nsubprocess.call(['ls', user_dir])\n\n"
	"# Or use shlex\nimport shlex\nsafe_dir = shlex.quote(user_dir)\nsubprocess.call(f'ls {safe_dir}', shell=True)",
	"references": [
	"https://docs.python.org/3/library/subprocess.html#security-considerations",
	"https://owasp.org/www-community/attacks/Command_Injection"
	]
	}
	}


	def get_explanation(vulnerability: Dict[str, Any]) -> Dict[str, Any]:
	"""
	Generate beginner-friendly explanation for a vulnerability.

	Args:
	vulnerability: Vulnerability dictionary from scanner

	Returns:
	Explanation dictionary with what, why, how_to_fix, example, references
	"""
	vuln_id = vulnerability.get("id", "")
	vuln_type = vulnerability.get("pattern_type", "")

	# Try to find a matching template
	template = None

	# Check by pattern type first
	if vuln_type in EXPLANATION_TEMPLATES:
	template = EXPLANATION_TEMPLATES[vuln_type]
	# Check by ID pattern
	elif "sql-injection" in vuln_id:
	template = EXPLANATION_TEMPLATES["sql_injection"]
	elif "api" in vuln_id.lower() or "api_key" in vuln_type:
	template = EXPLANATION_TEMPLATES["hardcoded_api_key"]
	elif "password" in vuln_id.lower() or "password" in vuln_type:
	template = EXPLANATION_TEMPLATES["password"]
	elif "pickle" in vuln_id.lower() or "B301" in vuln_id:
	template = EXPLANATION_TEMPLATES["pickle_usage"]
	elif "exec" in vuln_id.lower() or "eval" in vuln_id.lower() or "B102" in vuln_id:
	template = EXPLANATION_TEMPLATES["exec_usage"]
	elif "shell" in vuln_id.lower() or "B602" in vuln_id:
	template = EXPLANATION_TEMPLATES["shell_injection"]

	# Use template or create generic explanation
	if template:
	explanation = template.copy()
	else:
	# Generic explanation
	description = vulnerability.get("description", "A security vulnerability was found")
	explanation = {
	"what": description,
	"why": "This pattern can cause security vulnerabilities, and if exploited by attackers, it can damage the system.",
	"how_to_fix": vulnerability.get("recommendation", "Follow security best practices and validate untrusted input."),
	"example": "# Refer to security guides for writing secure code",
	"references": [
	"https://owasp.org/www-project-top-ten/",
	"https://cheatsheetseries.owasp.org/"
	]
	}

	return explanation


	def remove_duplicates(vulnerabilities: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
	"""
	Remove duplicate vulnerabilities based on line number and issue type.

	Args:
	vulnerabilities: List of vulnerability dictionaries

	Returns:
	Deduplicated list
	"""
	seen = set()
	unique = []

	for vuln in vulnerabilities:
	# Create a key based on line number and vulnerability type
	key = (
	vuln.get("line_number"),
	vuln.get("id", "").split("-")[0], # Base ID without suffix
	vuln.get("file_path", "")
	)

	if key not in seen:
	seen.add(key)
	unique.append(vuln)

	return unique


	def sort_vulnerabilities(vulnerabilities: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
	"""
	Sort vulnerabilities by severity and line number.

	Args:
	vulnerabilities: List of vulnerability dictionaries

	Returns:
	Sorted list
	"""
	severity_order = get_severity_order()

	def sort_key(vuln):
	severity = vuln.get("severity", "LOW").upper()
	severity_value = severity_order.get(severity, 99)
	line_number = vuln.get("line_number", 0)
	return (severity_value, line_number)

	return sorted(vulnerabilities, key=sort_key)


	def calculate_summary(vulnerabilities: List[Dict[str, Any]]) -> Dict[str, Any]:
	"""
	Calculate summary statistics for vulnerabilities.

	Args:
	vulnerabilities: List of vulnerability dictionaries

	Returns:
	Summary dictionary with counts
	"""
	summary = {
	"total_issues": len(vulnerabilities),
	"critical": 0,
	"high": 0,
	"medium": 0,
	"low": 0,
	"scan_timestamp": datetime.utcnow().isoformat() + "Z"
	}

	for vuln in vulnerabilities:
	severity = vuln.get("severity", "LOW").lower()
	if severity in summary:
	summary[severity] += 1

	return summary


	def format_results(
	vulnerabilities: List[Dict[str, Any]],
	severity_threshold: str = "MEDIUM"
	) -> Dict[str, Any]:
	"""
	Format scan results into beginner-friendly output.

	Args:
	vulnerabilities: List of vulnerability dictionaries from scanners
	severity_threshold: Minimum severity threshold used

	Returns:
	Formatted results dictionary
	"""
	# Remove duplicates
	unique_vulns = remove_duplicates(vulnerabilities)

	# Sort by severity and line number
	sorted_vulns = sort_vulnerabilities(unique_vulns)

	# Add explanations to each vulnerability
	formatted_vulns = []
	for vuln in sorted_vulns:
	formatted_vuln = vuln.copy()

	# Add explanation if not already present
	if "explanation" not in formatted_vuln:
	formatted_vuln["explanation"] = get_explanation(vuln)

	formatted_vulns.append(formatted_vuln)

	# Calculate summary
	summary = calculate_summary(formatted_vulns)
	summary["severity_threshold"] = severity_threshold

	# Create final result
	result = {
	"summary": summary,
	"vulnerabilities": formatted_vulns
	}

	return result


	def format_for_display(results: Dict[str, Any]) -> str:
	"""
	Format results for console display.

	Args:
	results: Formatted results dictionary

	Returns:
	Human-readable string
	"""
	summary = results.get("summary", {})
	vulns = results.get("vulnerabilities", [])

	output = []
	output.append("\n" + "=" * 70)
	output.append("Security Scan Results")
	output.append("=" * 70)
	output.append(f"\nTotal issues found: {summary.get('total_issues', 0)}")
	output.append(f" - CRITICAL: {summary.get('critical', 0)}")
	output.append(f" - HIGH: {summary.get('high', 0)}")
	output.append(f" - MEDIUM: {summary.get('medium', 0)}")
	output.append(f" - LOW: {summary.get('low', 0)}")
	output.append(f"\nScan timestamp: {summary.get('scan_timestamp', 'N/A')}")
	output.append("\n" + "-" * 70)

	for i, vuln in enumerate(vulns, 1):
	output.append(f"\n[{i}] {vuln.get('title', 'Unknown Issue')}")
	output.append(f"Severity: {vuln.get('severity', 'UNKNOWN')}")
	output.append(f"Location: Line {vuln.get('line_number', 'N/A')}")
	output.append(f"Code: {vuln.get('code_snippet', '')[:60]}...")

	explanation = vuln.get("explanation", {})
	if explanation:
	output.append(f"\nIssue: {explanation.get('what', '')}")
	output.append(f"Risk: {explanation.get('why', '')[:100]}...")
	output.append(f"Fix: {explanation.get('how_to_fix', '')[:100]}...")

	output.append("\n" + "-" * 70)

	return "\n".join(output)