vivekchakraverty commited on
Commit
c314e63
Β·
verified Β·
1 Parent(s): 777ea0e

Fix Colab OOM: cap seq length + smaller batch

Browse files
Files changed (1) hide show
  1. colab_build_index.py +6 -2
colab_build_index.py CHANGED
@@ -14,8 +14,10 @@ USAGE (paste into a Colab cell, or upload this file and `%run` it):
14
  Cell 0 (install):
15
  !pip install -q "transformers<5" sentence-transformers einops faiss-cpu huggingface_hub
16
  """
17
- import json
18
  import os
 
 
 
19
 
20
  import faiss
21
  import numpy as np
@@ -26,7 +28,8 @@ from sentence_transformers import SentenceTransformer
26
  SPACE_REPO = os.environ.get("SPACE_REPO", "<user>/gdscript-assistant") # <-- set
27
  HF_TOKEN = os.environ.get("HF_TOKEN", "") # <-- set (write)
28
  MODEL = "jinaai/jina-embeddings-v2-base-code"
29
- BATCH = 256
 
30
  # ───────────────────────────────────────────────────────────────────────────
31
 
32
  login(token=HF_TOKEN)
@@ -49,6 +52,7 @@ print(f"Loaded {len(ids)} chunks")
49
 
50
  # 2. Embed on GPU (normalized -> cosine via inner product).
51
  model = SentenceTransformer(MODEL, trust_remote_code=True, device="cuda")
 
52
  vecs = model.encode(texts, batch_size=BATCH, normalize_embeddings=True,
53
  convert_to_numpy=True, show_progress_bar=True)
54
  vecs = vecs.astype(np.float32)
 
14
  Cell 0 (install):
15
  !pip install -q "transformers<5" sentence-transformers einops faiss-cpu huggingface_hub
16
  """
 
17
  import os
18
+ os.environ.setdefault("PYTORCH_CUDA_ALLOC_CONF", "expandable_segments:True")
19
+
20
+ import json
21
 
22
  import faiss
23
  import numpy as np
 
28
  SPACE_REPO = os.environ.get("SPACE_REPO", "<user>/gdscript-assistant") # <-- set
29
  HF_TOKEN = os.environ.get("HF_TOKEN", "") # <-- set (write)
30
  MODEL = "jinaai/jina-embeddings-v2-base-code"
31
+ BATCH = 32 # small batch + capped length avoids T4 OOM on long chunks
32
+ MAX_LEN = 1024 # chunks are mostly tiny (p90 ~242 tokens); cap bounds memory
33
  # ───────────────────────────────────────────────────────────────────────────
34
 
35
  login(token=HF_TOKEN)
 
52
 
53
  # 2. Embed on GPU (normalized -> cosine via inner product).
54
  model = SentenceTransformer(MODEL, trust_remote_code=True, device="cuda")
55
+ model.max_seq_length = MAX_LEN
56
  vecs = model.encode(texts, batch_size=BATCH, normalize_embeddings=True,
57
  convert_to_numpy=True, show_progress_bar=True)
58
  vecs = vecs.astype(np.float32)