smart_chat_assisstant / vector_search.py
Prajwal3009's picture
Upload 30 files
ccd7971 verified
from pinecone.grpc import PineconeGRPC as pinecone
from pinecone import ServerlessSpec
from langchain_pinecone import PineconeVectorStore
from langchain_community.embeddings import SentenceTransformerEmbeddings
from sentence_transformers import SentenceTransformer
from dotenv import load_dotenv
import time
import os
load_dotenv()
PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')
PINECONE_API_ENV = os.getenv('PINECONE_API_ENV')
index_name = "sample3"
model = SentenceTransformer('all-MiniLM-L6-v2')
embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
pc = pinecone(api_key=PINECONE_API_KEY)
def ensure_index_exists():
if index_name not in pc.list_indexes().names():
pc.create_index(
name=index_name,
dimension=384,
metric="cosine",
spec=ServerlessSpec(
cloud='aws',
region='us-east-1'
)
)
ensure_index_exists()
index = pc.Index(index_name)
vectorstore = PineconeVectorStore(index_name=index_name, embedding=SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2"))
class Document:
def __init__(self, page_content, metadata=None):
self.page_content = page_content
self.metadata = metadata or {}
def encodeaddData(corpusData, url, pdf, pdf2, uns2):
documents = []
if url or pdf or pdf2:
for text in corpusData:
documents.append(Document(text))
elif uns2:
documents.append(Document(corpusData))
vectorstore = PineconeVectorStore.from_documents(documents, embeddings, index_name=index_name)
def delete():
pc.delete_index(index_name)
def find_k_best_match1(query):
time.sleep(5)
vectorstore = PineconeVectorStore(index_name=index_name, embedding=embeddings)
docs = vectorstore.similarity_search(query, k=2)
return docs