| """hf-scanner CLI β command-line interface for hf-security-scanner v4.0.
|
|
|
| Usage examples:
|
| hf-scanner scan https://huggingface.co/spaces/user/space
|
| hf-scanner scan ./my-project --format both --out ./results
|
| hf-scanner scan . --no-llm --severity-threshold WARNING
|
| hf-scanner scan . --create-baseline baseline.json
|
| hf-scanner scan . --baseline baseline.json --ignore-file .hfscanignore
|
| hf-scanner list-rules
|
| hf-scanner self-test
|
| hf-scanner version
|
| """
|
| import json
|
| import os
|
| import sys
|
| import tempfile
|
| from pathlib import Path
|
| from typing import Optional
|
|
|
|
|
| sys.path.insert(0, str(Path(__file__).parent))
|
|
|
| import typer
|
| from typing_extensions import Annotated
|
|
|
| from core import (
|
| apply_ignore_rules,
|
| bootstrap_binaries,
|
| filter_by_baseline,
|
| have_binary,
|
| load_baseline,
|
| parse_ignore_file,
|
| save_baseline,
|
| )
|
| from core.scanner import scan_repo
|
| from report import generate_html_report, generate_sarif
|
| from rules import ALL_LLM, ALL_PERFORMANCE, ALL_SECURITY
|
|
|
| _VERSION = "4.0.0"
|
|
|
| app = typer.Typer(
|
| name="hf-scanner",
|
| help="HF Security & Performance Scanner β scan HF Spaces and Git repos for vulnerabilities.",
|
| no_args_is_help=True,
|
| )
|
|
|
|
|
| EXIT_CLEAN = 0
|
| EXIT_FINDINGS = 1
|
| EXIT_ERROR = 2
|
| EXIT_USAGE = 3
|
|
|
| _SEV_RANK = {"ERROR": 4, "HIGH": 3, "WARNING": 2, "INFO": 1}
|
|
|
|
|
| def _findings_exceed_threshold(findings: list, threshold: str) -> bool:
|
| thresh = _SEV_RANK.get(threshold.upper(), 2)
|
| return any(_SEV_RANK.get(f.get("severity", "INFO").upper(), 0) >= thresh
|
| for f in findings)
|
|
|
|
|
|
|
|
|
| @app.command()
|
| def version() -> None:
|
| """Print the hf-scanner version."""
|
| typer.echo(f"hf-scanner {_VERSION}")
|
|
|
|
|
| @app.command("list-rules")
|
| def list_rules() -> None:
|
| """List all bundled Semgrep rule packs."""
|
| packs = ALL_SECURITY + ALL_PERFORMANCE + ALL_LLM
|
| typer.echo(f"{'Pack':<30} {'Category':<14} Path")
|
| typer.echo("-" * 70)
|
| for label, path, category in packs:
|
| typer.echo(f"{label:<30} {category:<14} {path}")
|
|
|
|
|
| @app.command("self-test")
|
| def self_test() -> None:
|
| """Check that all external tools are installed and executable."""
|
| tools = [
|
| ("semgrep", "Static analysis (Python, JS, β¦)"),
|
| ("bandit", "Python security linter"),
|
| ("detect-secrets", "Secret detection"),
|
| ("pip-audit", "Dependency CVE scanner"),
|
| ("ruff", "Fast Python linter (perf rules)"),
|
| ("gitleaks", "Git history secret scanner"),
|
| ("hadolint", "Dockerfile linter"),
|
| ("agent-audit", "OWASP Agentic Top 10 scanner"),
|
| ]
|
| ok = True
|
| typer.echo(f"{'Tool':<18} {'Status':<8} Description")
|
| typer.echo("-" * 60)
|
| for name, desc in tools:
|
| found = have_binary(name)
|
| status = "β ok" if found else "β MISSING"
|
| if not found:
|
| ok = False
|
| typer.echo(f"{name:<18} {status:<8} {desc}")
|
|
|
| typer.echo("")
|
| bootstrap_result = bootstrap_binaries()
|
| typer.echo(
|
| f"[bootstrap] gitleaks={bootstrap_result.get('gitleaks')}, "
|
| f"hadolint={bootstrap_result.get('hadolint')}"
|
| )
|
|
|
| if not ok:
|
| typer.echo("\nSome tools are missing. Run: pip install hf-security-scanner[full]",
|
| err=True)
|
| raise typer.Exit(code=EXIT_ERROR)
|
|
|
|
|
| @app.command()
|
| def scan(
|
| target: Annotated[str, typer.Argument(help="URL or local path to scan.")],
|
| format: Annotated[str, typer.Option(
|
| "--format", "-f",
|
| help="Output format: html | sarif | json | both (html+sarif).",
|
| )] = "both",
|
| out: Annotated[Optional[str], typer.Option(
|
| "--out", "-o",
|
| help="Output directory or file stem (without extension). Default: temp dir.",
|
| )] = None,
|
| security: Annotated[bool, typer.Option("--security/--no-security")] = True,
|
| llm: Annotated[bool, typer.Option("--llm/--no-llm")] = True,
|
| performance: Annotated[bool, typer.Option("--performance/--no-performance")] = True,
|
| deep_history: Annotated[bool, typer.Option("--deep-history")] = False,
|
| baseline: Annotated[Optional[str], typer.Option(
|
| "--baseline",
|
| help="Path to a baseline JSON file. Known findings will be suppressed.",
|
| )] = None,
|
| create_baseline: Annotated[Optional[str], typer.Option(
|
| "--create-baseline",
|
| help="Save current findings as a new baseline to this path.",
|
| )] = None,
|
| ignore_file: Annotated[str, typer.Option(
|
| "--ignore-file",
|
| help="Path to an .hfscanignore file.",
|
| )] = ".hfscanignore",
|
| severity_threshold: Annotated[str, typer.Option(
|
| "--severity-threshold",
|
| help="Exit 1 only when findings at or above this level exist. "
|
| "Choices: ERROR | WARNING | INFO.",
|
| )] = "WARNING",
|
| hf_token: Annotated[Optional[str], typer.Option(
|
| "--hf-token", envvar="HF_TOKEN",
|
| help="HuggingFace API token for private repos.",
|
| )] = None,
|
| quiet: Annotated[bool, typer.Option("--quiet/--verbose")] = False,
|
| ) -> None:
|
| """Scan a Git repo, HF Space, or local directory for security issues."""
|
|
|
| fmt = format.lower()
|
| if fmt not in ("html", "sarif", "json", "both"):
|
| typer.echo(f"Unknown format '{format}'. Choose html, sarif, json, or both.",
|
| err=True)
|
| raise typer.Exit(code=EXIT_USAGE)
|
|
|
| if not quiet:
|
| typer.echo(f"[hf-scanner] scanning {target} β¦")
|
|
|
| try:
|
| findings, log_lines = scan_repo(
|
| target,
|
| hf_token=hf_token,
|
| deep_history=deep_history,
|
| run_security=security,
|
| run_performance=performance,
|
| run_llm=llm,
|
| )
|
| except Exception as exc:
|
| typer.echo(f"Scan error: {exc}", err=True)
|
| raise typer.Exit(code=EXIT_ERROR) from exc
|
|
|
| if not quiet:
|
| for line in log_lines:
|
| typer.echo(f" {line}")
|
|
|
|
|
| suppressed_count = 0
|
| if baseline and os.path.isfile(baseline):
|
| known = load_baseline(baseline)
|
| findings, suppressed = filter_by_baseline(findings, known)
|
| suppressed_count = len(suppressed)
|
| if not quiet:
|
| typer.echo(f"[baseline] suppressed {suppressed_count} known findings")
|
|
|
|
|
| ignore_rules = parse_ignore_file(ignore_file)
|
| if ignore_rules:
|
| findings, ignored_count = apply_ignore_rules(findings, ignore_rules)
|
| if not quiet:
|
| typer.echo(f"[ignore] {ignored_count} findings suppressed by {ignore_file}")
|
|
|
|
|
| if create_baseline:
|
| save_baseline(findings, create_baseline)
|
| if not quiet:
|
| typer.echo(f"[baseline] saved {len(findings)} fingerprints β {create_baseline}")
|
|
|
|
|
| if out:
|
| out_path = Path(out)
|
| if out_path.is_dir():
|
| stem = out_path / "scan_report"
|
| else:
|
| stem = out_path
|
| else:
|
| stem = Path(tempfile.gettempdir()) / "scan_report"
|
|
|
| scan_meta = {
|
| "target": target,
|
| "n_targets": 1,
|
| "timestamp": __import__("datetime").datetime.now(
|
| __import__("datetime").timezone.utc
|
| ).isoformat() + "Z",
|
| "footer": f"hf-scanner {_VERSION}",
|
| }
|
|
|
| written = []
|
|
|
| if fmt in ("html", "both"):
|
| html_path = str(stem) + ".html"
|
| with open(html_path, "w", encoding="utf-8") as fh:
|
| fh.write(generate_html_report(findings, scan_meta))
|
| written.append(html_path)
|
|
|
| if fmt in ("sarif", "both"):
|
| sarif_path = str(stem) + ".sarif"
|
| with open(sarif_path, "w", encoding="utf-8") as fh:
|
| json.dump(generate_sarif(findings, scan_meta), fh, indent=2)
|
| written.append(sarif_path)
|
|
|
| if fmt == "json":
|
| json_path = str(stem) + ".json"
|
| with open(json_path, "w", encoding="utf-8") as fh:
|
| json.dump(findings, fh, indent=2, default=str)
|
| written.append(json_path)
|
|
|
| typer.echo(json.dumps(findings, indent=2, default=str))
|
|
|
| if not quiet:
|
| typer.echo(f"\n[hf-scanner] {len(findings)} findings")
|
| for p in written:
|
| typer.echo(f" β {p}")
|
|
|
|
|
| if _findings_exceed_threshold(findings, severity_threshold):
|
| raise typer.Exit(code=EXIT_FINDINGS)
|
| raise typer.Exit(code=EXIT_CLEAN)
|
|
|
|
|
| def main() -> None:
|
| app()
|
|
|
|
|
| if __name__ == "__main__":
|
| main()
|
|
|