File size: 10,064 Bytes
5248e3b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 | """hf-scanner CLI β command-line interface for hf-security-scanner v4.0.
Usage examples:
hf-scanner scan https://huggingface.co/spaces/user/space
hf-scanner scan ./my-project --format both --out ./results
hf-scanner scan . --no-llm --severity-threshold WARNING
hf-scanner scan . --create-baseline baseline.json
hf-scanner scan . --baseline baseline.json --ignore-file .hfscanignore
hf-scanner list-rules
hf-scanner self-test
hf-scanner version
"""
import json
import os
import sys
import tempfile
from pathlib import Path
from typing import Optional
# Ensure project root is on sys.path when run via `python cli.py`
sys.path.insert(0, str(Path(__file__).parent))
import typer
from typing_extensions import Annotated
from core import (
apply_ignore_rules,
bootstrap_binaries,
filter_by_baseline,
have_binary,
load_baseline,
parse_ignore_file,
save_baseline,
)
from core.scanner import scan_repo
from report import generate_html_report, generate_sarif
from rules import ALL_LLM, ALL_PERFORMANCE, ALL_SECURITY
_VERSION = "4.0.0"
app = typer.Typer(
name="hf-scanner",
help="HF Security & Performance Scanner β scan HF Spaces and Git repos for vulnerabilities.",
no_args_is_help=True,
)
# ββ Exit codes ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
EXIT_CLEAN = 0
EXIT_FINDINGS = 1
EXIT_ERROR = 2
EXIT_USAGE = 3
_SEV_RANK = {"ERROR": 4, "HIGH": 3, "WARNING": 2, "INFO": 1}
def _findings_exceed_threshold(findings: list, threshold: str) -> bool:
thresh = _SEV_RANK.get(threshold.upper(), 2)
return any(_SEV_RANK.get(f.get("severity", "INFO").upper(), 0) >= thresh
for f in findings)
# ββ Commands ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
@app.command()
def version() -> None:
"""Print the hf-scanner version."""
typer.echo(f"hf-scanner {_VERSION}")
@app.command("list-rules")
def list_rules() -> None:
"""List all bundled Semgrep rule packs."""
packs = ALL_SECURITY + ALL_PERFORMANCE + ALL_LLM
typer.echo(f"{'Pack':<30} {'Category':<14} Path")
typer.echo("-" * 70)
for label, path, category in packs:
typer.echo(f"{label:<30} {category:<14} {path}")
@app.command("self-test")
def self_test() -> None:
"""Check that all external tools are installed and executable."""
tools = [
("semgrep", "Static analysis (Python, JS, β¦)"),
("bandit", "Python security linter"),
("detect-secrets", "Secret detection"),
("pip-audit", "Dependency CVE scanner"),
("ruff", "Fast Python linter (perf rules)"),
("gitleaks", "Git history secret scanner"),
("hadolint", "Dockerfile linter"),
("agent-audit", "OWASP Agentic Top 10 scanner"),
]
ok = True
typer.echo(f"{'Tool':<18} {'Status':<8} Description")
typer.echo("-" * 60)
for name, desc in tools:
found = have_binary(name)
status = "β ok" if found else "β MISSING"
if not found:
ok = False
typer.echo(f"{name:<18} {status:<8} {desc}")
typer.echo("")
bootstrap_result = bootstrap_binaries()
typer.echo(
f"[bootstrap] gitleaks={bootstrap_result.get('gitleaks')}, "
f"hadolint={bootstrap_result.get('hadolint')}"
)
if not ok:
typer.echo("\nSome tools are missing. Run: pip install hf-security-scanner[full]",
err=True)
raise typer.Exit(code=EXIT_ERROR)
@app.command()
def scan(
target: Annotated[str, typer.Argument(help="URL or local path to scan.")], # noqa: AGENT-034
format: Annotated[str, typer.Option(
"--format", "-f",
help="Output format: html | sarif | json | both (html+sarif).",
)] = "both",
out: Annotated[Optional[str], typer.Option(
"--out", "-o",
help="Output directory or file stem (without extension). Default: temp dir.",
)] = None,
security: Annotated[bool, typer.Option("--security/--no-security")] = True,
llm: Annotated[bool, typer.Option("--llm/--no-llm")] = True,
performance: Annotated[bool, typer.Option("--performance/--no-performance")] = True,
deep_history: Annotated[bool, typer.Option("--deep-history")] = False,
baseline: Annotated[Optional[str], typer.Option(
"--baseline",
help="Path to a baseline JSON file. Known findings will be suppressed.",
)] = None,
create_baseline: Annotated[Optional[str], typer.Option(
"--create-baseline",
help="Save current findings as a new baseline to this path.",
)] = None,
ignore_file: Annotated[str, typer.Option(
"--ignore-file",
help="Path to an .hfscanignore file.",
)] = ".hfscanignore",
severity_threshold: Annotated[str, typer.Option(
"--severity-threshold",
help="Exit 1 only when findings at or above this level exist. "
"Choices: ERROR | WARNING | INFO.",
)] = "WARNING",
hf_token: Annotated[Optional[str], typer.Option(
"--hf-token", envvar="HF_TOKEN",
help="HuggingFace API token for private repos.",
)] = None,
quiet: Annotated[bool, typer.Option("--quiet/--verbose")] = False,
) -> None:
"""Scan a Git repo, HF Space, or local directory for security issues."""
fmt = format.lower()
if fmt not in ("html", "sarif", "json", "both"):
typer.echo(f"Unknown format '{format}'. Choose html, sarif, json, or both.",
err=True)
raise typer.Exit(code=EXIT_USAGE)
if not quiet:
typer.echo(f"[hf-scanner] scanning {target} β¦")
try:
findings, log_lines = scan_repo(
target,
hf_token=hf_token,
deep_history=deep_history,
run_security=security,
run_performance=performance,
run_llm=llm,
)
except Exception as exc: # noqa: BLE001
typer.echo(f"Scan error: {exc}", err=True)
raise typer.Exit(code=EXIT_ERROR) from exc
if not quiet:
for line in log_lines:
typer.echo(f" {line}")
# ββ Baseline suppression ββββββββββββββββββββββββββββββββββββββββββββββββββ
suppressed_count = 0
if baseline and os.path.isfile(baseline):
known = load_baseline(baseline)
findings, suppressed = filter_by_baseline(findings, known)
suppressed_count = len(suppressed)
if not quiet:
typer.echo(f"[baseline] suppressed {suppressed_count} known findings")
# ββ .hfscanignore suppression βββββββββββββββββββββββββββββββββββββββββββββ
ignore_rules = parse_ignore_file(ignore_file)
if ignore_rules:
findings, ignored_count = apply_ignore_rules(findings, ignore_rules)
if not quiet:
typer.echo(f"[ignore] {ignored_count} findings suppressed by {ignore_file}")
# ββ Create baseline βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
if create_baseline:
save_baseline(findings, create_baseline)
if not quiet:
typer.echo(f"[baseline] saved {len(findings)} fingerprints β {create_baseline}")
# ββ Determine output stem βββββββββββββββββββββββββββββββββββββββββββββββββ
if out:
out_path = Path(out)
if out_path.is_dir():
stem = out_path / "scan_report"
else:
stem = out_path
else:
stem = Path(tempfile.gettempdir()) / "scan_report"
scan_meta = {
"target": target,
"n_targets": 1,
"timestamp": __import__("datetime").datetime.now(
__import__("datetime").timezone.utc
).isoformat() + "Z",
"footer": f"hf-scanner {_VERSION}",
}
written = []
if fmt in ("html", "both"):
html_path = str(stem) + ".html"
with open(html_path, "w", encoding="utf-8") as fh:
fh.write(generate_html_report(findings, scan_meta))
written.append(html_path)
if fmt in ("sarif", "both"):
sarif_path = str(stem) + ".sarif"
with open(sarif_path, "w", encoding="utf-8") as fh:
json.dump(generate_sarif(findings, scan_meta), fh, indent=2)
written.append(sarif_path)
if fmt == "json":
json_path = str(stem) + ".json"
with open(json_path, "w", encoding="utf-8") as fh:
json.dump(findings, fh, indent=2, default=str)
written.append(json_path)
# Also print to stdout for piping
typer.echo(json.dumps(findings, indent=2, default=str))
if not quiet:
typer.echo(f"\n[hf-scanner] {len(findings)} findings")
for p in written:
typer.echo(f" β {p}")
# ββ Exit code βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
if _findings_exceed_threshold(findings, severity_threshold):
raise typer.Exit(code=EXIT_FINDINGS)
raise typer.Exit(code=EXIT_CLEAN)
def main() -> None:
app()
if __name__ == "__main__":
main()
|