Medical_Chatbot / src /store_index.py
rakib72642's picture
Add all files including PDFs with Git LFS
b4f404b
from dotenv import load_dotenv
import os
from pinecone import Pinecone
from langchain_pinecone import PineconeVectorStore
from src.helper import load_pdf_file, filter_to_minimal_docs, text_split, download_hugging_face_embeddings
# Load environment variables from .env file
load_dotenv()
# Retrieve Pinecone API key from environment variables
pinecone_api_key = os.environ.get("PINECONE_API_KEY")
if not pinecone_api_key:
raise ValueError("PINECONE_API_KEY not found in environment variables")
# Initialize Pinecone client
pc = Pinecone(api_key=pinecone_api_key)
# Set up index
index_name = "portfolio"
index = pc.Index(index_name)
# Load and process documents
extracted_data = load_pdf_file(data='data/')
filter_data = filter_to_minimal_docs(extracted_data)
text_chunks = text_split(filter_data)
# Download embeddings
embeddings = download_hugging_face_embeddings()
# Create Pinecone vector store
docsearch = PineconeVectorStore.from_documents(
documents=text_chunks,
index_name=index_name,
embedding=embeddings,
)