shubhendu-ghosh commited on
Commit
355ade4
·
verified ·
1 Parent(s): 759c4da

Update vector_handler.py

Browse files
Files changed (1) hide show
  1. vector_handler.py +30 -15
vector_handler.py CHANGED
@@ -2,40 +2,55 @@ import time
2
  import threading
3
  from pinecone import Pinecone, ServerlessSpec
4
  from langchain_google_genai import GoogleGenerativeAIEmbeddings
5
- from langchain.vectorstores.pinecone import Pinecone as LangchainPinecone
 
6
  from config import PINECONE_API_KEY
7
 
8
  # Initialize Pinecone client
9
  pc = Pinecone(api_key=PINECONE_API_KEY)
10
 
11
- # Initialize Google embedding model
12
  embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
13
 
14
- # Create vector store (index) using session_id
15
  def create_vector_store(session_id, texts):
16
  index_name = session_id
17
- if not pc.has_index(index_name):
 
 
 
18
  pc.create_index(
19
  name=index_name,
20
- dimension=768, # should match embedding size
 
21
  spec=ServerlessSpec(cloud="aws", region="us-east-1")
22
  )
23
- # Wait for the index to become ready
24
- while True:
25
- description = pc.describe_index(index_name)
26
- if description.status['ready']:
27
- break
28
  time.sleep(2)
29
 
30
- # Add documents to index using Langchain wrapper
31
- vectorstore = LangchainPinecone.from_texts(texts, embedding_model, index_name=index_name)
 
 
 
 
 
 
 
32
 
33
  # Query vector store
34
  def query_vector_store(session_id, question):
35
  index_name = session_id
36
- vectorstore = LangchainPinecone.from_existing_index(index_name, embedding_model)
37
- chain = get_chain() # Make sure you have this function
38
- docs = vectorstore.similarity_search(question)
 
 
 
 
 
 
 
39
  result = chain({"input_documents": docs, "question": question}, return_only_outputs=True)
40
  return result["output_text"]
41
 
 
2
  import threading
3
  from pinecone import Pinecone, ServerlessSpec
4
  from langchain_google_genai import GoogleGenerativeAIEmbeddings
5
+ from langchain_pinecone import PineconeVectorStore
6
+ from langchain_core.documents import Document
7
  from config import PINECONE_API_KEY
8
 
9
  # Initialize Pinecone client
10
  pc = Pinecone(api_key=PINECONE_API_KEY)
11
 
12
+ # Initialize embedding model
13
  embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
14
 
15
+ # Create vector store using session_id
16
  def create_vector_store(session_id, texts):
17
  index_name = session_id
18
+
19
+ # Create index if not exists
20
+ existing_indexes = [index["name"] for index in pc.list_indexes()]
21
+ if index_name not in existing_indexes:
22
  pc.create_index(
23
  name=index_name,
24
+ dimension=768, # Adjust this if your model outputs a different dimension
25
+ metric="cosine",
26
  spec=ServerlessSpec(cloud="aws", region="us-east-1")
27
  )
28
+ while not pc.describe_index(index_name).status["ready"]:
 
 
 
 
29
  time.sleep(2)
30
 
31
+ # Get index
32
+ index = pc.Index(index_name)
33
+
34
+ # Convert texts into Document format
35
+ documents = [Document(page_content=text) for text in texts]
36
+
37
+ # Create vector store and add documents
38
+ vectorstore = PineconeVectorStore(index=index, embedding=embedding_model)
39
+ vectorstore.add_documents(documents=documents)
40
 
41
  # Query vector store
42
  def query_vector_store(session_id, question):
43
  index_name = session_id
44
+ index = pc.Index(index_name)
45
+
46
+ vectorstore = PineconeVectorStore(index=index, embedding=embedding_model)
47
+ retriever = vectorstore.as_retriever(
48
+ search_type="similarity_score_threshold",
49
+ search_kwargs={"k": 3, "score_threshold": 0.5},
50
+ )
51
+
52
+ docs = retriever.invoke(question)
53
+ chain = get_chain() # Make sure you define this in your code
54
  result = chain({"input_documents": docs, "question": question}, return_only_outputs=True)
55
  return result["output_text"]
56