Spaces:
Running on Zero
Running on Zero
Fix Colab OOM: cap seq length + smaller batch
Browse files- colab_build_index.py +6 -2
colab_build_index.py
CHANGED
|
@@ -14,8 +14,10 @@ USAGE (paste into a Colab cell, or upload this file and `%run` it):
|
|
| 14 |
Cell 0 (install):
|
| 15 |
!pip install -q "transformers<5" sentence-transformers einops faiss-cpu huggingface_hub
|
| 16 |
"""
|
| 17 |
-
import json
|
| 18 |
import os
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
import faiss
|
| 21 |
import numpy as np
|
|
@@ -26,7 +28,8 @@ from sentence_transformers import SentenceTransformer
|
|
| 26 |
SPACE_REPO = os.environ.get("SPACE_REPO", "<user>/gdscript-assistant") # <-- set
|
| 27 |
HF_TOKEN = os.environ.get("HF_TOKEN", "") # <-- set (write)
|
| 28 |
MODEL = "jinaai/jina-embeddings-v2-base-code"
|
| 29 |
-
BATCH =
|
|
|
|
| 30 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 31 |
|
| 32 |
login(token=HF_TOKEN)
|
|
@@ -49,6 +52,7 @@ print(f"Loaded {len(ids)} chunks")
|
|
| 49 |
|
| 50 |
# 2. Embed on GPU (normalized -> cosine via inner product).
|
| 51 |
model = SentenceTransformer(MODEL, trust_remote_code=True, device="cuda")
|
|
|
|
| 52 |
vecs = model.encode(texts, batch_size=BATCH, normalize_embeddings=True,
|
| 53 |
convert_to_numpy=True, show_progress_bar=True)
|
| 54 |
vecs = vecs.astype(np.float32)
|
|
|
|
| 14 |
Cell 0 (install):
|
| 15 |
!pip install -q "transformers<5" sentence-transformers einops faiss-cpu huggingface_hub
|
| 16 |
"""
|
|
|
|
| 17 |
import os
|
| 18 |
+
os.environ.setdefault("PYTORCH_CUDA_ALLOC_CONF", "expandable_segments:True")
|
| 19 |
+
|
| 20 |
+
import json
|
| 21 |
|
| 22 |
import faiss
|
| 23 |
import numpy as np
|
|
|
|
| 28 |
SPACE_REPO = os.environ.get("SPACE_REPO", "<user>/gdscript-assistant") # <-- set
|
| 29 |
HF_TOKEN = os.environ.get("HF_TOKEN", "") # <-- set (write)
|
| 30 |
MODEL = "jinaai/jina-embeddings-v2-base-code"
|
| 31 |
+
BATCH = 32 # small batch + capped length avoids T4 OOM on long chunks
|
| 32 |
+
MAX_LEN = 1024 # chunks are mostly tiny (p90 ~242 tokens); cap bounds memory
|
| 33 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 34 |
|
| 35 |
login(token=HF_TOKEN)
|
|
|
|
| 52 |
|
| 53 |
# 2. Embed on GPU (normalized -> cosine via inner product).
|
| 54 |
model = SentenceTransformer(MODEL, trust_remote_code=True, device="cuda")
|
| 55 |
+
model.max_seq_length = MAX_LEN
|
| 56 |
vecs = model.encode(texts, batch_size=BATCH, normalize_embeddings=True,
|
| 57 |
convert_to_numpy=True, show_progress_bar=True)
|
| 58 |
vecs = vecs.astype(np.float32)
|