RAG_architectures / index_documents.py
Aidahaouas's picture
Dynamic Search Parameters Added
438d4f9
raw
history blame contribute delete
664 Bytes
from pinecone_utilsA import index_pdf as index_pdf_A
from pinecone_utilsB import *
from pdf_processing import get_existing_pdf, load_and_preprocess_pdf, split_text
def index_documents():
# Charger et prétraiter les PDF
pdf_files = get_existing_pdf()
texts = []
for pdf_file in pdf_files:
text = load_and_preprocess_pdf(pdf_file)
texts.extend(split_text(text))
# Indexer dans l'index dense (utilisant pinecone_utilsA)
index_pdf_A(texts)
# Indexer dans l'index sparse (utilisant pinecone_utilsB)
index_pdf_B(texts)
print("Indexation des documents terminée.")
if __name__ == "__main__":
index_documents()