|
|
from dotenv import load_dotenv |
|
|
import pandas as pd |
|
|
import os |
|
|
import pymongo |
|
|
from langchain_mongodb.vectorstores import MongoDBAtlasVectorSearch |
|
|
from langchain_huggingface import HuggingFaceEmbeddings |
|
|
|
|
|
__author__ = "Chirag Kamble" |
|
|
|
|
|
class DBConnect: |
|
|
""" |
|
|
Class to connect to the database |
|
|
""" |
|
|
@staticmethod |
|
|
def connect_db(): |
|
|
""" |
|
|
Static method to connect to the database and create a vector store |
|
|
:return: mongodb_vector_store: MongoDB Atlas Vector Store instance connected to the required mongodb collection |
|
|
:return: movies: dataframe containing all movies in the database |
|
|
""" |
|
|
load_dotenv() |
|
|
|
|
|
mongodb_connection_url = os.getenv("MONGODB_CONNECTION_URL") |
|
|
mongodb_db_name: str = os.getenv("MONGODB_DB_NAME") |
|
|
mongodb_collection_name: str = os.getenv("MONGODB_COLLECTION_NAME") |
|
|
mongodb_vector_index: str = os.getenv("MONGODB_VECTOR_INDEX_NAME") |
|
|
text_key: str = os.getenv("TEXT_KEY") |
|
|
embedding_key: str = os.getenv("EMBEDDING_KEY") |
|
|
relevance_score_fn = os.getenv("RELEVANCE_SCORE_FN") |
|
|
|
|
|
client = pymongo.MongoClient(mongodb_connection_url) |
|
|
db = client[mongodb_db_name] |
|
|
collection = db[mongodb_collection_name] |
|
|
|
|
|
mongodb_vector_store = MongoDBAtlasVectorSearch(collection=collection, |
|
|
embedding=HuggingFaceEmbeddings(), |
|
|
index_name=mongodb_vector_index, |
|
|
relevance_score_fn=relevance_score_fn, |
|
|
text_key=text_key, |
|
|
embedding_key=embedding_key, |
|
|
) |
|
|
|
|
|
movies_docs = collection.find() |
|
|
movies = pd.DataFrame(movies_docs) |
|
|
|
|
|
return mongodb_vector_store, movies |
|
|
|