WorkflowApproval1 / ingest.py
stephenmccartney1234's picture
Create ingest.py
c2b1920 verified
raw
history blame contribute delete
775 Bytes
import os
import pinecone
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Pinecone
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY")
PINECONE_ENV = os.environ.get("PINECONE_ENV")
pinecone.init(api_key=PINECONE_API_KEY, environment=PINECONE_ENV)
index_name = "workflow-helper-index"
embedding = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
loader = PyPDFLoader("your_doc.pdf")
pages = loader.load_and_split()
vectorstore = Pinecone.from_documents(pages, embedding, index_name=index_name)
print(f"Uploaded {len(pages)} pages to Pinecone index '{index_name}'")