"""Build the jina FAISS index on a free Colab/Kaggle GPU and push it to the Space. Run this in a GPU Colab notebook (Runtime -> Change runtime type -> T4 GPU). It pulls chunks.jsonl from your Space repo, embeds all chunks with jina-embeddings-v2-base-code on the GPU (~minutes), builds the FAISS index in the exact format rag.py expects (cosine / IndexIDMap2, faiss_id == chunk id), and uploads embeddings.faiss + id_map.json back to the Space — so the ~280 MB index never touches your local machine. USAGE (paste into a Colab cell, or upload this file and `%run` it): 1) Set SPACE_REPO and HF_TOKEN below (token: https://huggingface.co/settings/tokens, write). 2) Run. When it finishes, the Space restarts with full RAG. Cell 0 (install): !pip install -q "transformers<5" sentence-transformers einops faiss-cpu huggingface_hub """ import os os.environ.setdefault("PYTORCH_CUDA_ALLOC_CONF", "expandable_segments:True") import json import faiss import numpy as np from huggingface_hub import hf_hub_download, login, upload_file from sentence_transformers import SentenceTransformer # ─── CONFIG ──────────────────────────────────────────────────────────────── SPACE_REPO = os.environ.get("SPACE_REPO", "/gdscript-assistant") # <-- set HF_TOKEN = os.environ.get("HF_TOKEN", "") # <-- set (write) MODEL = "jinaai/jina-embeddings-v2-base-code" BATCH = 32 # small batch + capped length avoids T4 OOM on long chunks MAX_LEN = 1024 # chunks are mostly tiny (p90 ~242 tokens); cap bounds memory # ─────────────────────────────────────────────────────────────────────────── login(token=HF_TOKEN) # 1. Pull chunks.jsonl from the Space repo (fast on Colab's connection). chunks_path = hf_hub_download( repo_id=SPACE_REPO, repo_type="space", filename="data/chunks.jsonl") ids, texts, meta = [], [], {} with open(chunks_path, encoding="utf-8") as f: for line in f: if not line.strip(): continue r = json.loads(line) ids.append(int(r["id"])) texts.append(r["text"]) meta[str(r["id"])] = {"origin_url": r.get("origin_url", ""), "repo": r.get("repo", "")} print(f"Loaded {len(ids)} chunks") # 2. Embed on GPU (normalized -> cosine via inner product). model = SentenceTransformer(MODEL, trust_remote_code=True, device="cuda") model.max_seq_length = MAX_LEN vecs = model.encode(texts, batch_size=BATCH, normalize_embeddings=True, convert_to_numpy=True, show_progress_bar=True) vecs = vecs.astype(np.float32) print("Embedded:", vecs.shape) # 3. Build FAISS index — IDMap2(FlatIP), faiss_id == chunk id (matches rag.py). index = faiss.IndexIDMap2(faiss.IndexFlatIP(vecs.shape[1])) index.add_with_ids(vecs, np.asarray(ids, dtype=np.int64)) faiss.write_index(index, "embeddings.faiss") with open("id_map.json", "w", encoding="utf-8") as f: json.dump(meta, f) print("Index built:", index.ntotal, "vectors") # 4. Push the index back to the Space repo (Colab -> HF; not your machine). for fn in ("embeddings.faiss", "id_map.json"): upload_file(path_or_fileobj=fn, path_in_repo=f"data/{fn}", repo_id=SPACE_REPO, repo_type="space", commit_message="Add jina FAISS index (built on GPU)") print("Done — Space will restart with full RAG.")