vrom-hub / vrom_hub /cli.py
philipp-zettl's picture
Add vrom_hub/cli.py
79ade56 verified
#!/usr/bin/env python3
"""
vROM Hub CLI β€” build and publish vROM packages from the command line.
Usage:
# Build from markdown files
python -m vrom_hub.cli build my-docs \
--name "My Docs" \
--description "My project documentation" \
--files docs/*.md \
--output ./vrom_output
# Build from URLs
python -m vrom_hub.cli build my-docs \
--name "My Docs" \
--urls https://example.com/docs/page1 https://example.com/docs/page2
# Build + upload to registry
python -m vrom_hub.cli submit my-docs \
--name "My Docs" \
--description "My project docs" \
--files docs/*.md \
--tags my-project api
# List registry
python -m vrom_hub.cli list
# Info about a specific vROM
python -m vrom_hub.cli info my-docs
"""
import argparse
import glob
import json
import logging
import sys
from pathlib import Path
def cmd_build(args):
"""Build a vROM locally (no upload)."""
from vrom_hub import VromHubBackend
hub = VromHubBackend(
registry_repo=args.registry,
max_chunk_tokens=args.max_tokens,
)
pages = _collect_pages(args)
if not pages:
print("Error: No pages found. Provide --files or --urls.", file=sys.stderr)
sys.exit(1)
result = hub.build_vrom(
pages=pages,
vrom_id=args.vrom_id,
version=args.version or "1.0.0",
description=args.description or "",
output_dir=args.output or f"./vrom-{args.vrom_id}",
)
print(json.dumps({
"vrom_id": result["vrom_id"],
"output_dir": result["output_dir"],
"stats": result["stats"],
}, indent=2))
def cmd_submit(args):
"""Build and upload a vROM to the registry."""
from vrom_hub import VromHubBackend
hub = VromHubBackend(
registry_repo=args.registry,
max_chunk_tokens=args.max_tokens,
)
pages = _collect_pages(args)
if not pages:
print("Error: No pages found. Provide --files or --urls.", file=sys.stderr)
sys.exit(1)
result = hub.submit_project(
vrom_id=args.vrom_id,
name=args.name or args.vrom_id,
description=args.description or "",
version=args.version or "1.0.0",
pages=pages,
tags=args.tags,
official=args.official,
output_dir=args.output,
upload=True,
)
print(json.dumps({
"vrom_id": result["vrom_id"],
"hub_url": result.get("hub_url"),
"stats": result["stats"],
}, indent=2))
def cmd_list(args):
"""List all vROMs in the registry."""
from vrom_hub import VromHubBackend
hub = VromHubBackend(registry_repo=args.registry)
vroms = hub.list_vroms()
if not vroms:
print("No vROMs found in registry.")
return
for v in vroms:
tags = ", ".join(v.get("tags", []))
print(f" {v['id']:30s} v{v['version']:8s} {v['vectors']:5d} vectors {v.get('size_mb', '?'):>5}MB [{tags}]")
print(f" {v['description']}")
print()
def cmd_info(args):
"""Show details for a specific vROM."""
from vrom_hub import VromHubBackend
hub = VromHubBackend(registry_repo=args.registry)
vroms = hub.list_vroms()
for v in vroms:
if v["id"] == args.vrom_id:
print(json.dumps(v, indent=2))
return
print(f"vROM '{args.vrom_id}' not found in registry.", file=sys.stderr)
sys.exit(1)
def _collect_pages(args) -> list[dict]:
"""Collect documentation pages from CLI arguments."""
pages = []
# From files
if hasattr(args, 'files') and args.files:
for pattern in args.files:
for filepath in sorted(glob.glob(pattern)):
path = Path(filepath)
content = path.read_text(encoding="utf-8")
pages.append({
"content": content,
"source_file": str(path),
"url": "",
"title": None,
})
# From URLs (not fetched here β€” the backend's fetcher handles it)
if hasattr(args, 'urls') and args.urls:
from vrom_hub.fetcher import DocFetcher
fetcher = DocFetcher()
for url in args.urls:
try:
doc_page = fetcher.fetch_url(url)
pages.append({
"content": doc_page.content,
"source_file": doc_page.source_file,
"url": doc_page.url,
"title": doc_page.title,
})
except Exception as e:
print(f"Warning: Failed to fetch {url}: {e}", file=sys.stderr)
return pages
def main():
parser = argparse.ArgumentParser(
prog="vrom-hub",
description="vROM Hub Backend β€” build and publish vROM packages",
)
parser.add_argument(
"--registry", default="philipp-zettl/vrom-registry",
help="HF dataset repo for the vROM registry",
)
parser.add_argument(
"--verbose", "-v", action="store_true",
help="Enable verbose logging",
)
sub = parser.add_subparsers(dest="command", required=True)
# ── build ─────────────────────────────────────────────
p_build = sub.add_parser("build", help="Build a vROM locally")
p_build.add_argument("vrom_id", help="Unique vROM identifier")
p_build.add_argument("--name", help="Human-readable name")
p_build.add_argument("--description", "-d", help="Description")
p_build.add_argument("--version", default="1.0.0", help="Version (default: 1.0.0)")
p_build.add_argument("--files", nargs="+", help="Markdown file glob patterns")
p_build.add_argument("--urls", nargs="+", help="URLs to fetch")
p_build.add_argument("--output", "-o", help="Output directory")
p_build.add_argument("--max-tokens", type=int, default=256, help="Max tokens per chunk")
p_build.set_defaults(func=cmd_build)
# ── submit ────────────────────────────────────────────
p_submit = sub.add_parser("submit", help="Build + upload to registry")
p_submit.add_argument("vrom_id", help="Unique vROM identifier")
p_submit.add_argument("--name", help="Human-readable name")
p_submit.add_argument("--description", "-d", help="Description")
p_submit.add_argument("--version", default="1.0.0", help="Version (default: 1.0.0)")
p_submit.add_argument("--files", nargs="+", help="Markdown file glob patterns")
p_submit.add_argument("--urls", nargs="+", help="URLs to fetch")
p_submit.add_argument("--output", "-o", help="Output directory (optional)")
p_submit.add_argument("--tags", nargs="+", default=[], help="Tags for the vROM")
p_submit.add_argument("--official", action="store_true", help="Mark as official")
p_submit.add_argument("--max-tokens", type=int, default=256, help="Max tokens per chunk")
p_submit.set_defaults(func=cmd_submit)
# ── list ──────────────────────────────────────────────
p_list = sub.add_parser("list", help="List all vROMs in registry")
p_list.set_defaults(func=cmd_list)
# ── info ──────────────────────────────────────────────
p_info = sub.add_parser("info", help="Show details for a vROM")
p_info.add_argument("vrom_id", help="vROM identifier")
p_info.set_defaults(func=cmd_info)
args = parser.parse_args()
level = logging.DEBUG if args.verbose else logging.INFO
logging.basicConfig(level=level, format="%(name)s | %(message)s")
args.func(args)
if __name__ == "__main__":
main()