Spaces:
Running
Running
| from pinecone_utilsA import index_pdf as index_pdf_A | |
| from pinecone_utilsB import * | |
| from pdf_processing import get_existing_pdf, load_and_preprocess_pdf, split_text | |
| def index_documents(): | |
| # Charger et prétraiter les PDF | |
| pdf_files = get_existing_pdf() | |
| texts = [] | |
| for pdf_file in pdf_files: | |
| text = load_and_preprocess_pdf(pdf_file) | |
| texts.extend(split_text(text)) | |
| # Indexer dans l'index dense (utilisant pinecone_utilsA) | |
| index_pdf_A(texts) | |
| # Indexer dans l'index sparse (utilisant pinecone_utilsB) | |
| index_pdf_B(texts) | |
| print("Indexation des documents terminée.") | |
| if __name__ == "__main__": | |
| index_documents() |