""" CLI client for the ragstudio backend. Talks HTTP to the long-running server so model weights stay loaded in one process. Usage: python query.py "your query" [top_k] [-m semantic-text|bm25|image|text|...] `text` is a group alias expanding to `semantic-text` + `bm25`. Set RAGSTUDIO_URL to point at a non-default backend (default `http://127.0.0.1:8000`). If the backend is not reachable, the CLI exits with a clear instruction to start it. """ import argparse import json import os import sys import urllib.error import urllib.parse import urllib.request DEFAULT_URL = os.environ.get("RAGSTUDIO_URL", "http://127.0.0.1:8000") def _http_json(url: str) -> dict: try: with urllib.request.urlopen(url, timeout=120) as r: return json.loads(r.read().decode("utf-8")) except urllib.error.HTTPError as e: body = e.read().decode("utf-8", "replace") try: return json.loads(body) except Exception: raise SystemExit(f"backend error {e.code}: {body}") from None except urllib.error.URLError as e: raise SystemExit( f"backend not reachable at {DEFAULT_URL} ({e.reason}).\n" f"Start it with: python backend/server.py" ) from None def _modalities() -> tuple[list[str], dict[str, list[str]]]: data = _http_json(f"{DEFAULT_URL}/api/modalities") return data["modalities"], data.get("groups", {}) def _expand(modalities, groups): out: list[str] = [] for m in modalities: for name in groups.get(m, (m,)): if name not in out: out.append(name) return out def search( query: str, top_k: int = 5, modalities=None, ) -> dict[str, list[tuple[float, str]]]: all_mods, groups = _modalities() selected = _expand(modalities, groups) if modalities is not None else list(all_mods) results: dict[str, list[tuple[float, str]]] = {} for name in selected: if name not in all_mods: print(f" ! unknown modality: {name} (known: {sorted(all_mods)})") continue url = ( f"{DEFAULT_URL}/api/search/{urllib.parse.quote(name)}?" + urllib.parse.urlencode({"q": query, "k": top_k}) ) data = _http_json(url) if "error" in data and not data.get("hits"): print(f"\n=== {name}: error ===\n {data['error']}") results[name] = [] continue hits = [(float(h["score"]), h["path"]) for h in data.get("hits", [])] results[name] = hits print(f"\n=== Top {len(hits)} {name} matches ===") for score, item in hits: print(f" {score:.3f} {item}") return results def main() -> None: parser = argparse.ArgumentParser(description=__doc__.strip()) parser.add_argument("query") parser.add_argument("top_k", nargs="?", type=int, default=5) parser.add_argument( "-m", "--modalities", help="Comma-separated modalities or group names (default: all).", ) args = parser.parse_args() mods = args.modalities.split(",") if args.modalities else None search(args.query, args.top_k, mods) if __name__ == "__main__": main()