Spaces:
Sleeping
Sleeping
| from app.infrastructure.models.my_models import ChunksResponse | |
| from app.infrastructure.repository.document_handeler_repository import ( | |
| DocumentHandelerRepository, | |
| ) | |
| class GetAllChunkedTextFeature: | |
| def __init__(self, document_handeler_repository: DocumentHandelerRepository): | |
| self.document_handeler_repository = document_handeler_repository | |
| async def get_all_chunked_text(self): | |
| qdrant_response = self.document_handeler_repository.get_all_documents() | |
| transformed_data = {} | |
| for document in qdrant_response[0]: | |
| document_id = document.payload["document_id"] | |
| chunk_index = document.payload["chunk_index"] | |
| text = document.payload["chunk-text"] | |
| if document_id not in transformed_data: | |
| transformed_data[document_id] = [] | |
| transformed_data[document_id].append({"index": chunk_index, "text": text}) | |
| for doc in transformed_data: | |
| transformed_data[doc] = sorted( | |
| transformed_data[doc], key=lambda x: x["index"] | |
| ) | |
| return ChunksResponse(data=transformed_data) | |