import re from dataclasses import dataclass, field from typing import List, Dict, Any @dataclass class VulnerabilityPattern: name: str description: str patterns: List[str] severity: str # HIGH | MEDIUM | LOW category: str recommendation: str = "" VULNERABILITY_PATTERNS: List[VulnerabilityPattern] = [ VulnerabilityPattern( name="reentrancy", description="Reentrancy vulnerability – external call before state update", patterns=[r"\.call\s*\(", r"\.send\s*\(", r"msg\.sender\.call"], severity="HIGH", category="reentrancy", recommendation=( "Use the checks-effects-interactions pattern: update state before " "external calls. Consider OpenZeppelin's ReentrancyGuard." ), ), VulnerabilityPattern( name="integer_overflow", description="Integer overflow / underflow (Solidity < 0.8)", patterns=[r"pragma solidity\s+\^?0\.[0-7]", r"\+\+", r"--", r"\+=", r"-=", r"\*="], severity="HIGH", category="arithmetic", recommendation="Use Solidity ^0.8.0 (built-in checks) or SafeMath for older versions.", ), VulnerabilityPattern( name="unchecked_call", description="External call return value not checked", patterns=[r"\.call\s*\([^)]*\)\s*;", r"\.send\s*\([^)]*\)\s*;"], severity="MEDIUM", category="unchecked_call", recommendation="Always capture and verify the boolean return of low-level calls.", ), VulnerabilityPattern( name="tx_origin", description="Authorization via tx.origin (phishing risk)", patterns=[r"tx\.origin"], severity="MEDIUM", category="authorization", recommendation="Replace tx.origin with msg.sender for access control checks.", ), VulnerabilityPattern( name="access_control", description="Public function missing access control", patterns=[r"function\s+\w+\s*\([^)]*\)\s*(public|external)(?!\s+view|\s+pure)"], severity="MEDIUM", category="access_control", recommendation="Add onlyOwner or role-based modifiers to sensitive functions.", ), VulnerabilityPattern( name="timestamp_dependency", description="Logic depends on block.timestamp (miner manipulation)", patterns=[r"block\.timestamp", r"\bnow\b", r"block\.number"], severity="LOW", category="timestamp", recommendation="Avoid using block.timestamp for critical decisions; use Chainlink VRF where randomness is needed.", ), ] SEVERITY_SCORE = {"HIGH": 3, "MEDIUM": 2, "LOW": 1} def analyze_with_patterns(solidity_code: str) -> Dict[str, Any]: """Run regex-based vulnerability detection and return structured report.""" lines = solidity_code.split("\n") vulnerabilities: List[Dict[str, Any]] = [] severity_score = 0 for vp in VULNERABILITY_PATTERNS: finding: Dict[str, Any] = { "name": vp.name, "description": vp.description, "severity": vp.severity, "category": vp.category, "recommendation": vp.recommendation, "detected": False, "line_numbers": [], "matches": [], } for i, line in enumerate(lines, start=1): for pat in vp.patterns: for m in re.finditer(pat, line, re.IGNORECASE): finding["detected"] = True finding["line_numbers"].append(i) finding["matches"].append( {"line": i, "text": line.strip(), "match": m.group()} ) if finding["detected"]: finding["line_numbers"] = sorted(set(finding["line_numbers"])) vulnerabilities.append(finding) severity_score += SEVERITY_SCORE[vp.severity] high = [v for v in vulnerabilities if v["severity"] == "HIGH"] medium = [v for v in vulnerabilities if v["severity"] == "MEDIUM"] low = [v for v in vulnerabilities if v["severity"] == "LOW"] if high: risk_level = "CRITICAL" elif medium: risk_level = "MEDIUM" elif low: risk_level = "LOW" else: risk_level = "SAFE" version_match = re.search(r"pragma\s+solidity\s+([^;]+);", solidity_code) solidity_version = version_match.group(1).strip() if version_match else "unknown" return { "analysis_type": "pattern", "solidity_version": solidity_version, "total_lines": len(lines), "risk_level": risk_level, "severity_score": severity_score, "vulnerabilities_count": len(vulnerabilities), "vulnerabilities": vulnerabilities, "high_count": len(high), "medium_count": len(medium), "low_count": len(low), }