autoscan / cli.py
Chris4K's picture
Initial commit v5.0.0.
5248e3b verified
"""hf-scanner CLI β€” command-line interface for hf-security-scanner v4.0.
Usage examples:
hf-scanner scan https://huggingface.co/spaces/user/space
hf-scanner scan ./my-project --format both --out ./results
hf-scanner scan . --no-llm --severity-threshold WARNING
hf-scanner scan . --create-baseline baseline.json
hf-scanner scan . --baseline baseline.json --ignore-file .hfscanignore
hf-scanner list-rules
hf-scanner self-test
hf-scanner version
"""
import json
import os
import sys
import tempfile
from pathlib import Path
from typing import Optional
# Ensure project root is on sys.path when run via `python cli.py`
sys.path.insert(0, str(Path(__file__).parent))
import typer
from typing_extensions import Annotated
from core import (
apply_ignore_rules,
bootstrap_binaries,
filter_by_baseline,
have_binary,
load_baseline,
parse_ignore_file,
save_baseline,
)
from core.scanner import scan_repo
from report import generate_html_report, generate_sarif
from rules import ALL_LLM, ALL_PERFORMANCE, ALL_SECURITY
_VERSION = "4.0.0"
app = typer.Typer(
name="hf-scanner",
help="HF Security & Performance Scanner β€” scan HF Spaces and Git repos for vulnerabilities.",
no_args_is_help=True,
)
# ── Exit codes ────────────────────────────────────────────────────────────────
EXIT_CLEAN = 0
EXIT_FINDINGS = 1
EXIT_ERROR = 2
EXIT_USAGE = 3
_SEV_RANK = {"ERROR": 4, "HIGH": 3, "WARNING": 2, "INFO": 1}
def _findings_exceed_threshold(findings: list, threshold: str) -> bool:
thresh = _SEV_RANK.get(threshold.upper(), 2)
return any(_SEV_RANK.get(f.get("severity", "INFO").upper(), 0) >= thresh
for f in findings)
# ── Commands ──────────────────────────────────────────────────────────────────
@app.command()
def version() -> None:
"""Print the hf-scanner version."""
typer.echo(f"hf-scanner {_VERSION}")
@app.command("list-rules")
def list_rules() -> None:
"""List all bundled Semgrep rule packs."""
packs = ALL_SECURITY + ALL_PERFORMANCE + ALL_LLM
typer.echo(f"{'Pack':<30} {'Category':<14} Path")
typer.echo("-" * 70)
for label, path, category in packs:
typer.echo(f"{label:<30} {category:<14} {path}")
@app.command("self-test")
def self_test() -> None:
"""Check that all external tools are installed and executable."""
tools = [
("semgrep", "Static analysis (Python, JS, …)"),
("bandit", "Python security linter"),
("detect-secrets", "Secret detection"),
("pip-audit", "Dependency CVE scanner"),
("ruff", "Fast Python linter (perf rules)"),
("gitleaks", "Git history secret scanner"),
("hadolint", "Dockerfile linter"),
("agent-audit", "OWASP Agentic Top 10 scanner"),
]
ok = True
typer.echo(f"{'Tool':<18} {'Status':<8} Description")
typer.echo("-" * 60)
for name, desc in tools:
found = have_binary(name)
status = "βœ“ ok" if found else "βœ— MISSING"
if not found:
ok = False
typer.echo(f"{name:<18} {status:<8} {desc}")
typer.echo("")
bootstrap_result = bootstrap_binaries()
typer.echo(
f"[bootstrap] gitleaks={bootstrap_result.get('gitleaks')}, "
f"hadolint={bootstrap_result.get('hadolint')}"
)
if not ok:
typer.echo("\nSome tools are missing. Run: pip install hf-security-scanner[full]",
err=True)
raise typer.Exit(code=EXIT_ERROR)
@app.command()
def scan(
target: Annotated[str, typer.Argument(help="URL or local path to scan.")], # noqa: AGENT-034
format: Annotated[str, typer.Option(
"--format", "-f",
help="Output format: html | sarif | json | both (html+sarif).",
)] = "both",
out: Annotated[Optional[str], typer.Option(
"--out", "-o",
help="Output directory or file stem (without extension). Default: temp dir.",
)] = None,
security: Annotated[bool, typer.Option("--security/--no-security")] = True,
llm: Annotated[bool, typer.Option("--llm/--no-llm")] = True,
performance: Annotated[bool, typer.Option("--performance/--no-performance")] = True,
deep_history: Annotated[bool, typer.Option("--deep-history")] = False,
baseline: Annotated[Optional[str], typer.Option(
"--baseline",
help="Path to a baseline JSON file. Known findings will be suppressed.",
)] = None,
create_baseline: Annotated[Optional[str], typer.Option(
"--create-baseline",
help="Save current findings as a new baseline to this path.",
)] = None,
ignore_file: Annotated[str, typer.Option(
"--ignore-file",
help="Path to an .hfscanignore file.",
)] = ".hfscanignore",
severity_threshold: Annotated[str, typer.Option(
"--severity-threshold",
help="Exit 1 only when findings at or above this level exist. "
"Choices: ERROR | WARNING | INFO.",
)] = "WARNING",
hf_token: Annotated[Optional[str], typer.Option(
"--hf-token", envvar="HF_TOKEN",
help="HuggingFace API token for private repos.",
)] = None,
quiet: Annotated[bool, typer.Option("--quiet/--verbose")] = False,
) -> None:
"""Scan a Git repo, HF Space, or local directory for security issues."""
fmt = format.lower()
if fmt not in ("html", "sarif", "json", "both"):
typer.echo(f"Unknown format '{format}'. Choose html, sarif, json, or both.",
err=True)
raise typer.Exit(code=EXIT_USAGE)
if not quiet:
typer.echo(f"[hf-scanner] scanning {target} …")
try:
findings, log_lines = scan_repo(
target,
hf_token=hf_token,
deep_history=deep_history,
run_security=security,
run_performance=performance,
run_llm=llm,
)
except Exception as exc: # noqa: BLE001
typer.echo(f"Scan error: {exc}", err=True)
raise typer.Exit(code=EXIT_ERROR) from exc
if not quiet:
for line in log_lines:
typer.echo(f" {line}")
# ── Baseline suppression ──────────────────────────────────────────────────
suppressed_count = 0
if baseline and os.path.isfile(baseline):
known = load_baseline(baseline)
findings, suppressed = filter_by_baseline(findings, known)
suppressed_count = len(suppressed)
if not quiet:
typer.echo(f"[baseline] suppressed {suppressed_count} known findings")
# ── .hfscanignore suppression ─────────────────────────────────────────────
ignore_rules = parse_ignore_file(ignore_file)
if ignore_rules:
findings, ignored_count = apply_ignore_rules(findings, ignore_rules)
if not quiet:
typer.echo(f"[ignore] {ignored_count} findings suppressed by {ignore_file}")
# ── Create baseline ───────────────────────────────────────────────────────
if create_baseline:
save_baseline(findings, create_baseline)
if not quiet:
typer.echo(f"[baseline] saved {len(findings)} fingerprints β†’ {create_baseline}")
# ── Determine output stem ─────────────────────────────────────────────────
if out:
out_path = Path(out)
if out_path.is_dir():
stem = out_path / "scan_report"
else:
stem = out_path
else:
stem = Path(tempfile.gettempdir()) / "scan_report"
scan_meta = {
"target": target,
"n_targets": 1,
"timestamp": __import__("datetime").datetime.now(
__import__("datetime").timezone.utc
).isoformat() + "Z",
"footer": f"hf-scanner {_VERSION}",
}
written = []
if fmt in ("html", "both"):
html_path = str(stem) + ".html"
with open(html_path, "w", encoding="utf-8") as fh:
fh.write(generate_html_report(findings, scan_meta))
written.append(html_path)
if fmt in ("sarif", "both"):
sarif_path = str(stem) + ".sarif"
with open(sarif_path, "w", encoding="utf-8") as fh:
json.dump(generate_sarif(findings, scan_meta), fh, indent=2)
written.append(sarif_path)
if fmt == "json":
json_path = str(stem) + ".json"
with open(json_path, "w", encoding="utf-8") as fh:
json.dump(findings, fh, indent=2, default=str)
written.append(json_path)
# Also print to stdout for piping
typer.echo(json.dumps(findings, indent=2, default=str))
if not quiet:
typer.echo(f"\n[hf-scanner] {len(findings)} findings")
for p in written:
typer.echo(f" β†’ {p}")
# ── Exit code ─────────────────────────────────────────────────────────────
if _findings_exceed_threshold(findings, severity_threshold):
raise typer.Exit(code=EXIT_FINDINGS)
raise typer.Exit(code=EXIT_CLEAN)
def main() -> None:
app()
if __name__ == "__main__":
main()