Spaces:
Sleeping
Sleeping
| from pinecone.grpc import PineconeGRPC as pinecone | |
| from pinecone import ServerlessSpec | |
| from langchain_pinecone import PineconeVectorStore | |
| from langchain_community.embeddings import SentenceTransformerEmbeddings | |
| from sentence_transformers import SentenceTransformer | |
| from dotenv import load_dotenv | |
| import time | |
| import os | |
| load_dotenv() | |
| PINECONE_API_KEY = os.getenv('PINECONE_API_KEY') | |
| PINECONE_API_ENV = os.getenv('PINECONE_API_ENV') | |
| index_name = "sample3" | |
| model = SentenceTransformer('all-MiniLM-L6-v2') | |
| embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2") | |
| pc = pinecone(api_key=PINECONE_API_KEY) | |
| def ensure_index_exists(): | |
| if index_name not in pc.list_indexes().names(): | |
| pc.create_index( | |
| name=index_name, | |
| dimension=384, | |
| metric="cosine", | |
| spec=ServerlessSpec( | |
| cloud='aws', | |
| region='us-east-1' | |
| ) | |
| ) | |
| ensure_index_exists() | |
| index = pc.Index(index_name) | |
| vectorstore = PineconeVectorStore(index_name=index_name, embedding=SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")) | |
| class Document: | |
| def __init__(self, page_content, metadata=None): | |
| self.page_content = page_content | |
| self.metadata = metadata or {} | |
| def encodeaddData(corpusData, url, pdf, pdf2, uns2): | |
| documents = [] | |
| if url or pdf or pdf2: | |
| for text in corpusData: | |
| documents.append(Document(text)) | |
| elif uns2: | |
| documents.append(Document(corpusData)) | |
| vectorstore = PineconeVectorStore.from_documents(documents, embeddings, index_name=index_name) | |
| def delete(): | |
| pc.delete_index(index_name) | |
| def find_k_best_match1(query): | |
| time.sleep(5) | |
| vectorstore = PineconeVectorStore(index_name=index_name, embedding=embeddings) | |
| docs = vectorstore.similarity_search(query, k=2) | |
| return docs | |