update gemini
Browse files- rag_core/retriever.py +6 -2
rag_core/retriever.py
CHANGED
|
@@ -31,7 +31,7 @@ class Retriever:
|
|
| 31 |
embeddings.append(emb)
|
| 32 |
valid_texts.append(t)
|
| 33 |
except Exception as e:
|
| 34 |
-
logging.warning(f"❌ Lỗi embedding chunk {i}: {e}")
|
| 35 |
if not embeddings:
|
| 36 |
raise RuntimeError("Không có embedding nào thành công!")
|
| 37 |
|
|
@@ -47,12 +47,16 @@ class Retriever:
|
|
| 47 |
|
| 48 |
@log_timed("truy vấn FAISS")
|
| 49 |
def query(self, query_text, embed_fn, k=3):
|
|
|
|
|
|
|
| 50 |
q_emb = np.array([embed_fn(query_text)]).astype("float32")
|
| 51 |
D, I = self.index.search(q_emb, k)
|
| 52 |
return [self.texts[i] for i in I[0]]
|
| 53 |
|
| 54 |
@log_timed("bổ sung embedding bị thiếu")
|
| 55 |
def rescan_and_append(self, full_texts, embed_fn):
|
|
|
|
|
|
|
| 56 |
existing_set = set(self.texts)
|
| 57 |
new_texts = [t for t in full_texts if t not in existing_set]
|
| 58 |
if not new_texts:
|
|
@@ -65,7 +69,7 @@ class Retriever:
|
|
| 65 |
new_embeddings.append(emb)
|
| 66 |
self.texts.append(t)
|
| 67 |
except Exception as e:
|
| 68 |
-
logging.warning(f"❌ Lỗi embedding chunk mới {i}: {e}")
|
| 69 |
if new_embeddings:
|
| 70 |
self.index.add(np.array(new_embeddings).astype("float32"))
|
| 71 |
faiss.write_index(self.index, INDEX_PATH)
|
|
|
|
| 31 |
embeddings.append(emb)
|
| 32 |
valid_texts.append(t)
|
| 33 |
except Exception as e:
|
| 34 |
+
logging.warning(f"❌ Lỗi embedding chunk {i}: {e}\nNội dung chunk: {t[:300]}{'...' if len(t) > 300 else ''}")
|
| 35 |
if not embeddings:
|
| 36 |
raise RuntimeError("Không có embedding nào thành công!")
|
| 37 |
|
|
|
|
| 47 |
|
| 48 |
@log_timed("truy vấn FAISS")
|
| 49 |
def query(self, query_text, embed_fn, k=3):
|
| 50 |
+
if self.index is None:
|
| 51 |
+
raise RuntimeError("FAISS index chưa được xây dựng. Hãy build index trước khi truy vấn.")
|
| 52 |
q_emb = np.array([embed_fn(query_text)]).astype("float32")
|
| 53 |
D, I = self.index.search(q_emb, k)
|
| 54 |
return [self.texts[i] for i in I[0]]
|
| 55 |
|
| 56 |
@log_timed("bổ sung embedding bị thiếu")
|
| 57 |
def rescan_and_append(self, full_texts, embed_fn):
|
| 58 |
+
if self.index is None:
|
| 59 |
+
raise RuntimeError("FAISS index chưa được xây dựng. Hãy build index trước khi bổ sung embedding.")
|
| 60 |
existing_set = set(self.texts)
|
| 61 |
new_texts = [t for t in full_texts if t not in existing_set]
|
| 62 |
if not new_texts:
|
|
|
|
| 69 |
new_embeddings.append(emb)
|
| 70 |
self.texts.append(t)
|
| 71 |
except Exception as e:
|
| 72 |
+
logging.warning(f"❌ Lỗi embedding chunk mới {i}: {e}\nNội dung chunk: {t[:300]}{'...' if len(t) > 300 else ''}")
|
| 73 |
if new_embeddings:
|
| 74 |
self.index.add(np.array(new_embeddings).astype("float32"))
|
| 75 |
faiss.write_index(self.index, INDEX_PATH)
|