Spaces:
Sleeping
Sleeping
| # import basics | |
| import os | |
| import time | |
| from dotenv import load_dotenv | |
| # import pinecone | |
| from pinecone import Pinecone, ServerlessSpec | |
| # import langchain | |
| from langchain_pinecone import PineconeVectorStore | |
| from langchain_google_genai import GoogleGenerativeAIEmbeddings | |
| from langchain_core.documents import Document | |
| load_dotenv() | |
| pc = Pinecone(api_key=os.environ.get("PINECONE_API_KEY")) | |
| # initialize pinecone database | |
| index_name = "sample-index" # change if desired | |
| # check whether index exists, and create if not | |
| existing_indexes = [index_info["name"] for index_info in pc.list_indexes()] | |
| if index_name not in existing_indexes: | |
| pc.create_index( | |
| name=index_name, | |
| dimension=3072, | |
| metric="cosine", | |
| spec=ServerlessSpec(cloud="aws", region="us-east-1"), | |
| ) | |
| while not pc.describe_index(index_name).status["ready"]: | |
| time.sleep(1) | |
| index = pc.Index(index_name) | |
| # initialize embeddings model + vector store | |
| embeddings = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-001") | |
| vector_store = PineconeVectorStore(index=index, embedding=embeddings) | |
| # adding the documents | |
| document_1 = Document( | |
| page_content="I had chocalate chip pancakes and scrambled eggs for breakfast this morning.", | |
| metadata={"source": "tweet"}, | |
| ) | |
| document_2 = Document( | |
| page_content="The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.", | |
| metadata={"source": "news"}, | |
| ) | |
| document_3 = Document( | |
| page_content="Building an exciting new project with LangChain - come check it out!", | |
| metadata={"source": "tweet"}, | |
| ) | |
| document_4 = Document( | |
| page_content="Robbers broke into the city bank and stole $1 million in cash.", | |
| metadata={"source": "news"}, | |
| ) | |
| document_5 = Document( | |
| page_content="Wow! That was an amazing movie. I can't wait to see it again.", | |
| metadata={"source": "tweet"}, | |
| ) | |
| document_6 = Document( | |
| page_content="Is the new iPhone worth the price? Read this review to find out.", | |
| metadata={"source": "website"}, | |
| ) | |
| document_7 = Document( | |
| page_content="The top 10 soccer players in the world right now.", | |
| metadata={"source": "website"}, | |
| ) | |
| document_8 = Document( | |
| page_content="LangGraph is the best framework for building stateful, agentic applications!", | |
| metadata={"source": "tweet"}, | |
| ) | |
| document_9 = Document( | |
| page_content="The stock market is down 500 points today due to fears of a recession.", | |
| metadata={"source": "news"}, | |
| ) | |
| document_10 = Document( | |
| page_content="I have a bad feeling I am going to get deleted :(", | |
| metadata={"source": "tweet"}, | |
| ) | |
| documents = [ | |
| document_1, | |
| document_2, | |
| document_3, | |
| document_4, | |
| document_5, | |
| document_6, | |
| document_7, | |
| document_8, | |
| document_9, | |
| document_10, | |
| ] | |
| # generate unique id's | |
| i = 0 | |
| uuids = [] | |
| while i < len(documents): | |
| i += 1 | |
| uuids.append(f"id{i}") | |
| # add to database | |
| vector_store.add_documents(documents=documents, ids=uuids) |