File size: 1,949 Bytes
6ba8078
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
from dotenv import load_dotenv
import pandas as pd
import os
import pymongo
from langchain_mongodb.vectorstores import MongoDBAtlasVectorSearch
from langchain_huggingface import HuggingFaceEmbeddings

__author__ = "Chirag Kamble"

class DBConnect:
    """
    Class to connect to the database
    """
    @staticmethod
    def connect_db():
        """
        Static method to connect to the database and create a vector store
        :return: mongodb_vector_store: MongoDB Atlas Vector Store instance connected to the required mongodb collection
        :return: movies: dataframe containing all movies in the database
        """
        load_dotenv()

        mongodb_connection_url = os.getenv("MONGODB_CONNECTION_URL")
        mongodb_db_name: str = os.getenv("MONGODB_DB_NAME")
        mongodb_collection_name: str = os.getenv("MONGODB_COLLECTION_NAME")
        mongodb_vector_index: str = os.getenv("MONGODB_VECTOR_INDEX_NAME")
        text_key: str = os.getenv("TEXT_KEY")
        embedding_key: str = os.getenv("EMBEDDING_KEY")
        relevance_score_fn = os.getenv("RELEVANCE_SCORE_FN")

        client = pymongo.MongoClient(mongodb_connection_url)
        db = client[mongodb_db_name]
        collection = db[mongodb_collection_name]

        mongodb_vector_store = MongoDBAtlasVectorSearch(collection=collection,
                                                        embedding=HuggingFaceEmbeddings(),
                                                        index_name=mongodb_vector_index,
                                                        relevance_score_fn=relevance_score_fn,
                                                        text_key=text_key,
                                                        embedding_key=embedding_key,
                                                        )

        movies_docs = collection.find()
        movies = pd.DataFrame(movies_docs)

        return mongodb_vector_store, movies