"""Shared data model: Finding factory + severity/confidence ordering.""" SEVERITY_RANK = { "CRITICAL": 0, "ERROR": 1, "HIGH": 1, "WARNING": 2, "MEDIUM": 2, "INFO": 3, "LOW": 3, } CONFIDENCE_RANK = {"confirmed": 0, "likely": 1, "possible": 2} TOOL_DEFAULT_CONFIDENCE = { "pip-audit": "confirmed", "gitleaks": "confirmed", "forbidden-file": "confirmed", "detect-secrets": "possible", "Bandit": "possible", "hadolint": "likely", "ruff": "likely", } FORBIDDEN_FILES = [ ".env", ".env.local", ".env.production", "id_rsa", "id_dsa", "id_ecdsa", "id_ed25519", ".git-credentials", ".npmrc", ".pypirc", "credentials", "credentials.json", "service-account.json", "serviceAccountKey.json", "wp-config.php", ] def make_finding( tool: str, rule: str, severity: str, file: str, line: int, message: str, owasp: str | list[str], category: str = "security", confidence: str = None, remediation: str = None, ) -> dict: """Build a normalized finding dict. All scanners produce these.""" # Lazy import to avoid circular dep with report.remediation from report.remediation import REMEDIATION if confidence is None: confidence = TOOL_DEFAULT_CONFIDENCE.get(tool, "possible") if remediation is None: remediation = REMEDIATION.get(rule, "") if isinstance(owasp, str): owasp = [owasp] return { "category": category, "tool": tool, "rule": rule, "severity": severity, "confidence": confidence, "file": file, "line": line, "message": message, "owasp": owasp, "remediation": remediation, } def sort_findings(findings: list[dict]) -> list[dict]: """Sort by severity → confidence → file → line.""" return sorted(findings, key=lambda x: ( SEVERITY_RANK.get(x["severity"], 9), CONFIDENCE_RANK.get(x["confidence"], 9), x["file"], x["line"], )) def dedup_findings(findings: list[dict]) -> list[dict]: """Remove duplicate findings (same tool/file/line/message).""" seen = set() out = [] for f in findings: key = (f["tool"], f["file"], f["line"], f["message"]) if key in seen: continue seen.add(key) out.append(f) return out