Spaces:
Paused
Paused
| from typing import Optional | |
| from core.rag.datasource.keyword.keyword_factory import Keyword | |
| from core.rag.datasource.vdb.vector_factory import Vector | |
| from core.rag.models.document import Document | |
| from models.dataset import Dataset, DocumentSegment | |
| class VectorService: | |
| def create_segments_vector( | |
| cls, keywords_list: Optional[list[list[str]]], segments: list[DocumentSegment], dataset: Dataset | |
| ): | |
| documents = [] | |
| for segment in segments: | |
| document = Document( | |
| page_content=segment.content, | |
| metadata={ | |
| "doc_id": segment.index_node_id, | |
| "doc_hash": segment.index_node_hash, | |
| "document_id": segment.document_id, | |
| "dataset_id": segment.dataset_id, | |
| }, | |
| ) | |
| documents.append(document) | |
| if dataset.indexing_technique == "high_quality": | |
| # save vector index | |
| vector = Vector(dataset=dataset) | |
| vector.add_texts(documents, duplicate_check=True) | |
| # save keyword index | |
| keyword = Keyword(dataset) | |
| if keywords_list and len(keywords_list) > 0: | |
| keyword.add_texts(documents, keywords_list=keywords_list) | |
| else: | |
| keyword.add_texts(documents) | |
| def update_segment_vector(cls, keywords: Optional[list[str]], segment: DocumentSegment, dataset: Dataset): | |
| # update segment index task | |
| # format new index | |
| document = Document( | |
| page_content=segment.content, | |
| metadata={ | |
| "doc_id": segment.index_node_id, | |
| "doc_hash": segment.index_node_hash, | |
| "document_id": segment.document_id, | |
| "dataset_id": segment.dataset_id, | |
| }, | |
| ) | |
| if dataset.indexing_technique == "high_quality": | |
| # update vector index | |
| vector = Vector(dataset=dataset) | |
| vector.delete_by_ids([segment.index_node_id]) | |
| vector.add_texts([document], duplicate_check=True) | |
| # update keyword index | |
| keyword = Keyword(dataset) | |
| keyword.delete_by_ids([segment.index_node_id]) | |
| # save keyword index | |
| if keywords and len(keywords) > 0: | |
| keyword.add_texts([document], keywords_list=[keywords]) | |
| else: | |
| keyword.add_texts([document]) | |