| from dotenv import load_dotenv | |
| import os | |
| from src.helper import load_file, filtering, chunking, download_embeddings | |
| from pinecone import Pinecone | |
| from pinecone import ServerlessSpec | |
| from langchain_pinecone import PineconeVectorStore | |
| load_dotenv() | |
| PINECONE_API_KEY = os.getenv("PINECONE_API_KEY") | |
| GROQ_API_KEY = os.getenv("GROQ_API_KEY") | |
| os.environ["PINECONE_API_KEY"] = PINECONE_API_KEY | |
| os.environ["GROQ_API_KEY"] = GROQ_API_KEY | |
| file_path= "./Medical_book.pdf" | |
| data = load_file(file_path) | |
| docs = filtering(data) | |
| chunks = chunking(docs) | |
| embeddings = download_embeddings() | |
| pinecone_api_key = PINECONE_API_KEY | |
| pc = Pinecone(api_key=pinecone_api_key) | |
| index_name = "virtual-doc" | |
| if not pc.has_index(index_name): | |
| pc.create_index(name=index_name, dimension=384, metric="cosine", spec=ServerlessSpec(cloud="aws", region="us-east-1")) | |
| index = pc.Index(index_name) | |
| docsearch = PineconeVectorStore.from_documents(documents=chunks,embedding=embeddings,index_name=index_name) |