Sarisha Das commited on
Commit
468fa48
·
1 Parent(s): 2bf862f
Files changed (2) hide show
  1. utils/bm25.py +14 -1
  2. utils/{utils.py → helpers.py} +0 -0
utils/bm25.py CHANGED
@@ -26,7 +26,7 @@ from langchain_core.documents import Document
26
  ROOT_FOLDER = Path(__file__).resolve().parent.parent
27
 
28
  sys.path.append(str(ROOT_FOLDER))
29
- from utils.utils import simple_tokenize
30
  from utils.eda_helpers import get_best_reviews
31
 
32
 
@@ -368,8 +368,21 @@ def load(index_path: str | Path = "data/processed/bm25_index.pkl") -> BM25Retrie
368
  f"BM25 index not found at '{index_path}'.\n"
369
  "Run build_and_save() from your notebook first."
370
  )
 
 
 
 
 
 
 
 
 
 
 
 
371
  with open(index_path, "rb") as f:
372
  retriever = pickle.load(f)
 
373
  print(f"BM25 index loaded ← {index_path}")
374
  return retriever
375
 
 
26
  ROOT_FOLDER = Path(__file__).resolve().parent.parent
27
 
28
  sys.path.append(str(ROOT_FOLDER))
29
+ from utils.helpers import simple_tokenize
30
  from utils.eda_helpers import get_best_reviews
31
 
32
 
 
368
  f"BM25 index not found at '{index_path}'.\n"
369
  "Run build_and_save() from your notebook first."
370
  )
371
+
372
+ # Patch: pickle saved simple_tokenize under 'utils' top-level namespace,
373
+ # but it now lives in utils.bm25 — register it where pickle expects it
374
+ import sys
375
+ import types
376
+ from utils import bm25 as bm25_module
377
+
378
+ if "utils" not in sys.modules or not hasattr(sys.modules["utils"], "simple_tokenize"):
379
+ fake_utils = types.ModuleType("utils")
380
+ fake_utils.simple_tokenize = bm25_module.simple_tokenize
381
+ sys.modules["utils"] = fake_utils
382
+
383
  with open(index_path, "rb") as f:
384
  retriever = pickle.load(f)
385
+
386
  print(f"BM25 index loaded ← {index_path}")
387
  return retriever
388
 
utils/{utils.py → helpers.py} RENAMED
File without changes