LightOnOCR-1B-Demo / download_model.py
DocUA's picture
feat: update ggml kernels, webui components, model templates, and build configurations
eb133b8
#!/usr/bin/env python3
import argparse
import os
import sys
from pathlib import Path
from huggingface_hub import HfApi, snapshot_download
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
description="Download a model repository from Hugging Face Hub."
)
parser.add_argument(
"model_id",
nargs="?",
default="lightonai/LightOnOCR-1B-1025",
help="Model repository to download (default: %(default)s)",
)
parser.add_argument(
"--revision",
default=None,
help="Specific git revision (branch/tag/commit) to download.",
)
parser.add_argument(
"--cache-dir",
default=None,
help="Cache directory where the model snapshot will be stored.",
)
parser.add_argument(
"--local-dir",
default=None,
help="Optional local directory to copy the snapshot into after download.",
)
parser.add_argument(
"--token",
default=None,
help="Hugging Face access token; defaults to HF_TOKEN or HUGGINGFACEHUB_API_TOKEN env vars.",
)
parser.add_argument(
"--allow-pattern",
action="append",
default=None,
help="File glob pattern(s) to include when downloading.",
)
parser.add_argument(
"--ignore-pattern",
action="append",
default=None,
help="File glob pattern(s) to exclude when downloading.",
)
parser.add_argument(
"--offline",
action="store_true",
help="Run in offline mode, using only the local cache.",
)
return parser.parse_args()
def resolve_token(user_token: str | None) -> str | None:
if user_token:
return user_token
return os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACEHUB_API_TOKEN")
def ensure_auth(token: str | None) -> None:
if token:
return
try:
api = HfApi()
if api.whoami():
return
except Exception:
pass
raise RuntimeError(
"Hugging Face token not provided. Set HF_TOKEN or run `huggingface-cli login`."
)
def main() -> None:
args = parse_args()
token = resolve_token(args.token)
if not args.offline:
ensure_auth(token)
try:
snapshot_path = snapshot_download(
repo_id=args.model_id,
revision=args.revision,
cache_dir=args.cache_dir,
local_dir=args.local_dir,
allow_patterns=args.allow_pattern,
ignore_patterns=args.ignore_pattern,
token=token,
local_files_only=args.offline,
)
except Exception as exc:
print(f"Failed to download {args.model_id}: {exc}", file=sys.stderr)
sys.exit(1)
print(f"Model snapshot available at: {Path(snapshot_path).resolve()}")
if __name__ == "__main__":
main()