tarakjc2c commited on
Commit
019fa63
·
1 Parent(s): bdf83a5

Fix pickle error: Dense index uses its own caching

Browse files
Files changed (1) hide show
  1. app_retrieval_cached.py +7 -21
app_retrieval_cached.py CHANGED
@@ -80,7 +80,7 @@ def _get_cache_key(corpora_config: Dict[str, dict]) -> str:
80
 
81
 
82
  class CachedRetriever:
83
- """Retriever with disk caching for BM25 and Dense indexes"""
84
 
85
  def __init__(self, corpora_config: Dict[str, dict], use_reranker: bool = False):
86
  self.corpora_config = corpora_config
@@ -89,8 +89,8 @@ class CachedRetriever:
89
 
90
  # Cache file paths
91
  self.bm25_cache = CACHE_DIR / f"bm25_{self.cache_key}.pkl"
92
- self.dense_cache = CACHE_DIR / f"dense_{self.cache_key}.pkl"
93
  self.docs_cache = CACHE_DIR / f"docs_{self.cache_key}.pkl"
 
94
 
95
  # Load or build indexes
96
  self.docs_all = self._load_or_build_docs()
@@ -148,26 +148,12 @@ class CachedRetriever:
148
  return bm25_index
149
 
150
  def _load_or_build_dense(self) -> DenseIndex:
151
- """Load Dense index from cache or build from scratch"""
152
- if self.dense_cache.exists():
153
- print(f"Loading Dense index from cache... ({self.dense_cache.name})")
154
- try:
155
- with open(self.dense_cache, 'rb') as f:
156
- dense_index = pickle.load(f)
157
- print(f" ✓ Dense index loaded from cache")
158
- return dense_index
159
- except Exception as e:
160
- print(f" ✗ Cache load failed: {e}")
161
- print(" → Rebuilding Dense index...")
162
-
163
- print("Building Dense index from scratch (this takes 5-8 minutes)...")
164
  dense_index = DenseIndex(self.docs_all)
165
-
166
- # Save to cache
167
- print(f"Saving Dense index to cache...")
168
- with open(self.dense_cache, 'wb') as f:
169
- pickle.dump(dense_index, f)
170
-
171
  return dense_index
172
 
173
 
 
80
 
81
 
82
  class CachedRetriever:
83
+ """Retriever with disk caching for BM25 and documents (Dense has its own caching)"""
84
 
85
  def __init__(self, corpora_config: Dict[str, dict], use_reranker: bool = False):
86
  self.corpora_config = corpora_config
 
89
 
90
  # Cache file paths
91
  self.bm25_cache = CACHE_DIR / f"bm25_{self.cache_key}.pkl"
 
92
  self.docs_cache = CACHE_DIR / f"docs_{self.cache_key}.pkl"
93
+ # Note: Dense index uses its own caching in .cache/embeddings/
94
 
95
  # Load or build indexes
96
  self.docs_all = self._load_or_build_docs()
 
148
  return bm25_index
149
 
150
  def _load_or_build_dense(self) -> DenseIndex:
151
+ """Build Dense index (note: Dense index has its own internal caching)"""
152
+ print("Initializing Dense index (uses internal caching)...")
153
+ # DenseIndex has its own caching system in .cache/embeddings/
154
+ # We don't need to pickle it - just let it build/load from its own cache
 
 
 
 
 
 
 
 
 
155
  dense_index = DenseIndex(self.docs_all)
156
+ print(f" ✓ Dense index ready")
 
 
 
 
 
157
  return dense_index
158
 
159