"""hf-scanner CLI — command-line interface for hf-security-scanner v4.0. Usage examples: hf-scanner scan https://huggingface.co/spaces/user/space hf-scanner scan ./my-project --format both --out ./results hf-scanner scan . --no-llm --severity-threshold WARNING hf-scanner scan . --create-baseline baseline.json hf-scanner scan . --baseline baseline.json --ignore-file .hfscanignore hf-scanner list-rules hf-scanner self-test hf-scanner version """ import json import os import sys import tempfile from pathlib import Path from typing import Optional # Ensure project root is on sys.path when run via `python cli.py` sys.path.insert(0, str(Path(__file__).parent)) import typer from typing_extensions import Annotated from core import ( apply_ignore_rules, bootstrap_binaries, filter_by_baseline, have_binary, load_baseline, parse_ignore_file, save_baseline, ) from core.scanner import scan_repo from report import generate_html_report, generate_sarif from rules import ALL_LLM, ALL_PERFORMANCE, ALL_SECURITY _VERSION = "4.0.0" app = typer.Typer( name="hf-scanner", help="HF Security & Performance Scanner — scan HF Spaces and Git repos for vulnerabilities.", no_args_is_help=True, ) # ── Exit codes ──────────────────────────────────────────────────────────────── EXIT_CLEAN = 0 EXIT_FINDINGS = 1 EXIT_ERROR = 2 EXIT_USAGE = 3 _SEV_RANK = {"ERROR": 4, "HIGH": 3, "WARNING": 2, "INFO": 1} def _findings_exceed_threshold(findings: list, threshold: str) -> bool: thresh = _SEV_RANK.get(threshold.upper(), 2) return any(_SEV_RANK.get(f.get("severity", "INFO").upper(), 0) >= thresh for f in findings) # ── Commands ────────────────────────────────────────────────────────────────── @app.command() def version() -> None: """Print the hf-scanner version.""" typer.echo(f"hf-scanner {_VERSION}") @app.command("list-rules") def list_rules() -> None: """List all bundled Semgrep rule packs.""" packs = ALL_SECURITY + ALL_PERFORMANCE + ALL_LLM typer.echo(f"{'Pack':<30} {'Category':<14} Path") typer.echo("-" * 70) for label, path, category in packs: typer.echo(f"{label:<30} {category:<14} {path}") @app.command("self-test") def self_test() -> None: """Check that all external tools are installed and executable.""" tools = [ ("semgrep", "Static analysis (Python, JS, …)"), ("bandit", "Python security linter"), ("detect-secrets", "Secret detection"), ("pip-audit", "Dependency CVE scanner"), ("ruff", "Fast Python linter (perf rules)"), ("gitleaks", "Git history secret scanner"), ("hadolint", "Dockerfile linter"), ("agent-audit", "OWASP Agentic Top 10 scanner"), ] ok = True typer.echo(f"{'Tool':<18} {'Status':<8} Description") typer.echo("-" * 60) for name, desc in tools: found = have_binary(name) status = "✓ ok" if found else "✗ MISSING" if not found: ok = False typer.echo(f"{name:<18} {status:<8} {desc}") typer.echo("") bootstrap_result = bootstrap_binaries() typer.echo( f"[bootstrap] gitleaks={bootstrap_result.get('gitleaks')}, " f"hadolint={bootstrap_result.get('hadolint')}" ) if not ok: typer.echo("\nSome tools are missing. Run: pip install hf-security-scanner[full]", err=True) raise typer.Exit(code=EXIT_ERROR) @app.command() def scan( target: Annotated[str, typer.Argument(help="URL or local path to scan.")], # noqa: AGENT-034 format: Annotated[str, typer.Option( "--format", "-f", help="Output format: html | sarif | json | both (html+sarif).", )] = "both", out: Annotated[Optional[str], typer.Option( "--out", "-o", help="Output directory or file stem (without extension). Default: temp dir.", )] = None, security: Annotated[bool, typer.Option("--security/--no-security")] = True, llm: Annotated[bool, typer.Option("--llm/--no-llm")] = True, performance: Annotated[bool, typer.Option("--performance/--no-performance")] = True, deep_history: Annotated[bool, typer.Option("--deep-history")] = False, baseline: Annotated[Optional[str], typer.Option( "--baseline", help="Path to a baseline JSON file. Known findings will be suppressed.", )] = None, create_baseline: Annotated[Optional[str], typer.Option( "--create-baseline", help="Save current findings as a new baseline to this path.", )] = None, ignore_file: Annotated[str, typer.Option( "--ignore-file", help="Path to an .hfscanignore file.", )] = ".hfscanignore", severity_threshold: Annotated[str, typer.Option( "--severity-threshold", help="Exit 1 only when findings at or above this level exist. " "Choices: ERROR | WARNING | INFO.", )] = "WARNING", hf_token: Annotated[Optional[str], typer.Option( "--hf-token", envvar="HF_TOKEN", help="HuggingFace API token for private repos.", )] = None, quiet: Annotated[bool, typer.Option("--quiet/--verbose")] = False, ) -> None: """Scan a Git repo, HF Space, or local directory for security issues.""" fmt = format.lower() if fmt not in ("html", "sarif", "json", "both"): typer.echo(f"Unknown format '{format}'. Choose html, sarif, json, or both.", err=True) raise typer.Exit(code=EXIT_USAGE) if not quiet: typer.echo(f"[hf-scanner] scanning {target} …") try: findings, log_lines = scan_repo( target, hf_token=hf_token, deep_history=deep_history, run_security=security, run_performance=performance, run_llm=llm, ) except Exception as exc: # noqa: BLE001 typer.echo(f"Scan error: {exc}", err=True) raise typer.Exit(code=EXIT_ERROR) from exc if not quiet: for line in log_lines: typer.echo(f" {line}") # ── Baseline suppression ────────────────────────────────────────────────── suppressed_count = 0 if baseline and os.path.isfile(baseline): known = load_baseline(baseline) findings, suppressed = filter_by_baseline(findings, known) suppressed_count = len(suppressed) if not quiet: typer.echo(f"[baseline] suppressed {suppressed_count} known findings") # ── .hfscanignore suppression ───────────────────────────────────────────── ignore_rules = parse_ignore_file(ignore_file) if ignore_rules: findings, ignored_count = apply_ignore_rules(findings, ignore_rules) if not quiet: typer.echo(f"[ignore] {ignored_count} findings suppressed by {ignore_file}") # ── Create baseline ─────────────────────────────────────────────────────── if create_baseline: save_baseline(findings, create_baseline) if not quiet: typer.echo(f"[baseline] saved {len(findings)} fingerprints → {create_baseline}") # ── Determine output stem ───────────────────────────────────────────────── if out: out_path = Path(out) if out_path.is_dir(): stem = out_path / "scan_report" else: stem = out_path else: stem = Path(tempfile.gettempdir()) / "scan_report" scan_meta = { "target": target, "n_targets": 1, "timestamp": __import__("datetime").datetime.now( __import__("datetime").timezone.utc ).isoformat() + "Z", "footer": f"hf-scanner {_VERSION}", } written = [] if fmt in ("html", "both"): html_path = str(stem) + ".html" with open(html_path, "w", encoding="utf-8") as fh: fh.write(generate_html_report(findings, scan_meta)) written.append(html_path) if fmt in ("sarif", "both"): sarif_path = str(stem) + ".sarif" with open(sarif_path, "w", encoding="utf-8") as fh: json.dump(generate_sarif(findings, scan_meta), fh, indent=2) written.append(sarif_path) if fmt == "json": json_path = str(stem) + ".json" with open(json_path, "w", encoding="utf-8") as fh: json.dump(findings, fh, indent=2, default=str) written.append(json_path) # Also print to stdout for piping typer.echo(json.dumps(findings, indent=2, default=str)) if not quiet: typer.echo(f"\n[hf-scanner] {len(findings)} findings") for p in written: typer.echo(f" → {p}") # ── Exit code ───────────────────────────────────────────────────────────── if _findings_exceed_threshold(findings, severity_threshold): raise typer.Exit(code=EXIT_FINDINGS) raise typer.Exit(code=EXIT_CLEAN) def main() -> None: app() if __name__ == "__main__": main()