from dotenv import load_dotenv import os from src.helper import load_file, filtering, chunking, download_embeddings from pinecone import Pinecone from pinecone import ServerlessSpec from langchain_pinecone import PineconeVectorStore load_dotenv() PINECONE_API_KEY = os.getenv("PINECONE_API_KEY") GROQ_API_KEY = os.getenv("GROQ_API_KEY") os.environ["PINECONE_API_KEY"] = PINECONE_API_KEY os.environ["GROQ_API_KEY"] = GROQ_API_KEY file_path= "./Medical_book.pdf" data = load_file(file_path) docs = filtering(data) chunks = chunking(docs) embeddings = download_embeddings() pinecone_api_key = PINECONE_API_KEY pc = Pinecone(api_key=pinecone_api_key) index_name = "virtual-doc" if not pc.has_index(index_name): pc.create_index(name=index_name, dimension=384, metric="cosine", spec=ServerlessSpec(cloud="aws", region="us-east-1")) index = pc.Index(index_name) docsearch = PineconeVectorStore.from_documents(documents=chunks,embedding=embeddings,index_name=index_name)