stephenmccartney1234 commited on
Commit
c2b1920
·
verified ·
1 Parent(s): 490d216

Create ingest.py

Browse files
Files changed (1) hide show
  1. ingest.py +21 -0
ingest.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pinecone
3
+ from langchain.document_loaders import PyPDFLoader
4
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
5
+ from langchain.embeddings import OpenAIEmbeddings
6
+ from langchain.vectorstores import Pinecone
7
+
8
+ OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
9
+ PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY")
10
+ PINECONE_ENV = os.environ.get("PINECONE_ENV")
11
+
12
+ pinecone.init(api_key=PINECONE_API_KEY, environment=PINECONE_ENV)
13
+ index_name = "workflow-helper-index"
14
+
15
+ embedding = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
16
+
17
+ loader = PyPDFLoader("your_doc.pdf")
18
+ pages = loader.load_and_split()
19
+
20
+ vectorstore = Pinecone.from_documents(pages, embedding, index_name=index_name)
21
+ print(f"Uploaded {len(pages)} pages to Pinecone index '{index_name}'")