philipp-zettl commited on
Commit
79ade56
Β·
verified Β·
1 Parent(s): 0b9041c

Add vrom_hub/cli.py

Browse files
Files changed (1) hide show
  1. vrom_hub/cli.py +232 -0
vrom_hub/cli.py ADDED
@@ -0,0 +1,232 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ vROM Hub CLI β€” build and publish vROM packages from the command line.
4
+
5
+ Usage:
6
+ # Build from markdown files
7
+ python -m vrom_hub.cli build my-docs \
8
+ --name "My Docs" \
9
+ --description "My project documentation" \
10
+ --files docs/*.md \
11
+ --output ./vrom_output
12
+
13
+ # Build from URLs
14
+ python -m vrom_hub.cli build my-docs \
15
+ --name "My Docs" \
16
+ --urls https://example.com/docs/page1 https://example.com/docs/page2
17
+
18
+ # Build + upload to registry
19
+ python -m vrom_hub.cli submit my-docs \
20
+ --name "My Docs" \
21
+ --description "My project docs" \
22
+ --files docs/*.md \
23
+ --tags my-project api
24
+
25
+ # List registry
26
+ python -m vrom_hub.cli list
27
+
28
+ # Info about a specific vROM
29
+ python -m vrom_hub.cli info my-docs
30
+ """
31
+
32
+ import argparse
33
+ import glob
34
+ import json
35
+ import logging
36
+ import sys
37
+ from pathlib import Path
38
+
39
+
40
+ def cmd_build(args):
41
+ """Build a vROM locally (no upload)."""
42
+ from vrom_hub import VromHubBackend
43
+
44
+ hub = VromHubBackend(
45
+ registry_repo=args.registry,
46
+ max_chunk_tokens=args.max_tokens,
47
+ )
48
+
49
+ pages = _collect_pages(args)
50
+ if not pages:
51
+ print("Error: No pages found. Provide --files or --urls.", file=sys.stderr)
52
+ sys.exit(1)
53
+
54
+ result = hub.build_vrom(
55
+ pages=pages,
56
+ vrom_id=args.vrom_id,
57
+ version=args.version or "1.0.0",
58
+ description=args.description or "",
59
+ output_dir=args.output or f"./vrom-{args.vrom_id}",
60
+ )
61
+
62
+ print(json.dumps({
63
+ "vrom_id": result["vrom_id"],
64
+ "output_dir": result["output_dir"],
65
+ "stats": result["stats"],
66
+ }, indent=2))
67
+
68
+
69
+ def cmd_submit(args):
70
+ """Build and upload a vROM to the registry."""
71
+ from vrom_hub import VromHubBackend
72
+
73
+ hub = VromHubBackend(
74
+ registry_repo=args.registry,
75
+ max_chunk_tokens=args.max_tokens,
76
+ )
77
+
78
+ pages = _collect_pages(args)
79
+ if not pages:
80
+ print("Error: No pages found. Provide --files or --urls.", file=sys.stderr)
81
+ sys.exit(1)
82
+
83
+ result = hub.submit_project(
84
+ vrom_id=args.vrom_id,
85
+ name=args.name or args.vrom_id,
86
+ description=args.description or "",
87
+ version=args.version or "1.0.0",
88
+ pages=pages,
89
+ tags=args.tags,
90
+ official=args.official,
91
+ output_dir=args.output,
92
+ upload=True,
93
+ )
94
+
95
+ print(json.dumps({
96
+ "vrom_id": result["vrom_id"],
97
+ "hub_url": result.get("hub_url"),
98
+ "stats": result["stats"],
99
+ }, indent=2))
100
+
101
+
102
+ def cmd_list(args):
103
+ """List all vROMs in the registry."""
104
+ from vrom_hub import VromHubBackend
105
+
106
+ hub = VromHubBackend(registry_repo=args.registry)
107
+ vroms = hub.list_vroms()
108
+
109
+ if not vroms:
110
+ print("No vROMs found in registry.")
111
+ return
112
+
113
+ for v in vroms:
114
+ tags = ", ".join(v.get("tags", []))
115
+ print(f" {v['id']:30s} v{v['version']:8s} {v['vectors']:5d} vectors {v.get('size_mb', '?'):>5}MB [{tags}]")
116
+ print(f" {v['description']}")
117
+ print()
118
+
119
+
120
+ def cmd_info(args):
121
+ """Show details for a specific vROM."""
122
+ from vrom_hub import VromHubBackend
123
+
124
+ hub = VromHubBackend(registry_repo=args.registry)
125
+ vroms = hub.list_vroms()
126
+
127
+ for v in vroms:
128
+ if v["id"] == args.vrom_id:
129
+ print(json.dumps(v, indent=2))
130
+ return
131
+
132
+ print(f"vROM '{args.vrom_id}' not found in registry.", file=sys.stderr)
133
+ sys.exit(1)
134
+
135
+
136
+ def _collect_pages(args) -> list[dict]:
137
+ """Collect documentation pages from CLI arguments."""
138
+ pages = []
139
+
140
+ # From files
141
+ if hasattr(args, 'files') and args.files:
142
+ for pattern in args.files:
143
+ for filepath in sorted(glob.glob(pattern)):
144
+ path = Path(filepath)
145
+ content = path.read_text(encoding="utf-8")
146
+ pages.append({
147
+ "content": content,
148
+ "source_file": str(path),
149
+ "url": "",
150
+ "title": None,
151
+ })
152
+
153
+ # From URLs (not fetched here β€” the backend's fetcher handles it)
154
+ if hasattr(args, 'urls') and args.urls:
155
+ from vrom_hub.fetcher import DocFetcher
156
+ fetcher = DocFetcher()
157
+ for url in args.urls:
158
+ try:
159
+ doc_page = fetcher.fetch_url(url)
160
+ pages.append({
161
+ "content": doc_page.content,
162
+ "source_file": doc_page.source_file,
163
+ "url": doc_page.url,
164
+ "title": doc_page.title,
165
+ })
166
+ except Exception as e:
167
+ print(f"Warning: Failed to fetch {url}: {e}", file=sys.stderr)
168
+
169
+ return pages
170
+
171
+
172
+ def main():
173
+ parser = argparse.ArgumentParser(
174
+ prog="vrom-hub",
175
+ description="vROM Hub Backend β€” build and publish vROM packages",
176
+ )
177
+ parser.add_argument(
178
+ "--registry", default="philipp-zettl/vrom-registry",
179
+ help="HF dataset repo for the vROM registry",
180
+ )
181
+ parser.add_argument(
182
+ "--verbose", "-v", action="store_true",
183
+ help="Enable verbose logging",
184
+ )
185
+
186
+ sub = parser.add_subparsers(dest="command", required=True)
187
+
188
+ # ── build ─────────────────────────────────────────────
189
+ p_build = sub.add_parser("build", help="Build a vROM locally")
190
+ p_build.add_argument("vrom_id", help="Unique vROM identifier")
191
+ p_build.add_argument("--name", help="Human-readable name")
192
+ p_build.add_argument("--description", "-d", help="Description")
193
+ p_build.add_argument("--version", default="1.0.0", help="Version (default: 1.0.0)")
194
+ p_build.add_argument("--files", nargs="+", help="Markdown file glob patterns")
195
+ p_build.add_argument("--urls", nargs="+", help="URLs to fetch")
196
+ p_build.add_argument("--output", "-o", help="Output directory")
197
+ p_build.add_argument("--max-tokens", type=int, default=256, help="Max tokens per chunk")
198
+ p_build.set_defaults(func=cmd_build)
199
+
200
+ # ── submit ────────────────────────────────────────────
201
+ p_submit = sub.add_parser("submit", help="Build + upload to registry")
202
+ p_submit.add_argument("vrom_id", help="Unique vROM identifier")
203
+ p_submit.add_argument("--name", help="Human-readable name")
204
+ p_submit.add_argument("--description", "-d", help="Description")
205
+ p_submit.add_argument("--version", default="1.0.0", help="Version (default: 1.0.0)")
206
+ p_submit.add_argument("--files", nargs="+", help="Markdown file glob patterns")
207
+ p_submit.add_argument("--urls", nargs="+", help="URLs to fetch")
208
+ p_submit.add_argument("--output", "-o", help="Output directory (optional)")
209
+ p_submit.add_argument("--tags", nargs="+", default=[], help="Tags for the vROM")
210
+ p_submit.add_argument("--official", action="store_true", help="Mark as official")
211
+ p_submit.add_argument("--max-tokens", type=int, default=256, help="Max tokens per chunk")
212
+ p_submit.set_defaults(func=cmd_submit)
213
+
214
+ # ── list ──────────────────────────────────────────────
215
+ p_list = sub.add_parser("list", help="List all vROMs in registry")
216
+ p_list.set_defaults(func=cmd_list)
217
+
218
+ # ── info ──────────────────────────────────────────────
219
+ p_info = sub.add_parser("info", help="Show details for a vROM")
220
+ p_info.add_argument("vrom_id", help="vROM identifier")
221
+ p_info.set_defaults(func=cmd_info)
222
+
223
+ args = parser.parse_args()
224
+
225
+ level = logging.DEBUG if args.verbose else logging.INFO
226
+ logging.basicConfig(level=level, format="%(name)s | %(message)s")
227
+
228
+ args.func(args)
229
+
230
+
231
+ if __name__ == "__main__":
232
+ main()