| from dotenv import load_dotenv | |
| import os | |
| from pinecone import Pinecone | |
| from langchain_pinecone import PineconeVectorStore | |
| from src.helper import load_pdf_file, filter_to_minimal_docs, text_split, download_hugging_face_embeddings | |
| # Load environment variables from .env file | |
| load_dotenv() | |
| # Retrieve Pinecone API key from environment variables | |
| pinecone_api_key = os.environ.get("PINECONE_API_KEY") | |
| if not pinecone_api_key: | |
| raise ValueError("PINECONE_API_KEY not found in environment variables") | |
| # Initialize Pinecone client | |
| pc = Pinecone(api_key=pinecone_api_key) | |
| # Set up index | |
| index_name = "portfolio" | |
| index = pc.Index(index_name) | |
| # Load and process documents | |
| extracted_data = load_pdf_file(data='data/') | |
| filter_data = filter_to_minimal_docs(extracted_data) | |
| text_chunks = text_split(filter_data) | |
| # Download embeddings | |
| embeddings = download_hugging_face_embeddings() | |
| # Create Pinecone vector store | |
| docsearch = PineconeVectorStore.from_documents( | |
| documents=text_chunks, | |
| index_name=index_name, | |
| embedding=embeddings, | |
| ) |