chirag0107 commited on
Commit
21a32b2
·
verified ·
1 Parent(s): ff871f8

Update langchain_movie_search.py

Browse files
Files changed (1) hide show
  1. langchain_movie_search.py +9 -51
langchain_movie_search.py CHANGED
@@ -1,26 +1,18 @@
1
  import os
2
  from typing import List
3
- import argparse
4
  from dotenv import load_dotenv
5
  import pymongo
6
- import certifi
7
  from langchain_huggingface import HuggingFaceEndpoint, HuggingFaceEmbeddings
8
  from langchain_mongodb.vectorstores import MongoDBAtlasVectorSearch
9
  from langchain.chains import create_retrieval_chain
10
  from langchain.chains.combine_documents import create_stuff_documents_chain
11
- from langchain_core.documents import Document
12
  from langchain_core.prompts import PromptTemplate
13
  import gradio as gr
14
  from gradio.themes.base import Base
15
- from flask import Flask
16
 
17
  __author__ = "Chirag Kamble"
18
 
19
 
20
- # Flask App
21
- # app = Flask(__name__)
22
-
23
-
24
  class MoviesSearch:
25
  """
26
  Class to perform Vector Index Search using MongoDB and LLM search using Langchain on Movies
@@ -44,13 +36,10 @@ class MoviesSearch:
44
  self.client: pymongo.synchronous.mongo_client.MongoClient = pymongo.MongoClient(mongodb_connection_url,
45
  serverSelectionTimeoutMS=60000,
46
  tls=True,
47
- # tlsCAFile=certifi.where(),
48
  connect=False,
49
  tlsAllowInvalidCertificates=True,
50
  directConnection=False,
51
- # tlsInsecure=True,
52
  maxPoolSize=100,
53
- # minPoolSize=0,
54
  maxIdleTimeMS=60000,
55
  waitQueueTimeoutMS=60000,
56
  connectTimeoutMS=60000,
@@ -80,20 +69,15 @@ class MoviesSearch:
80
  Generate vector embeddings
81
  """
82
  new_doc_list: List[Document] = []
83
- for doc in self.sample_movies_collection.find({"fullplot": {"$exists": True}}).limit(1000):
84
- new_doc: Document = Document(
85
- page_content=doc["fullplot"],
86
- metadata={"source": "Collection sample_mflix",
87
- "movie-title": doc["title"],
88
- "movie-plot": doc["fullplot"],
89
- "text": doc["fullplot"]}
90
- )
91
- new_doc_list.append(new_doc)
92
- self.retrieve_vector_store.from_documents(
93
- documents=new_doc_list,
94
- embedding=self.hf_plot_embedding,
95
- collection=self.langchain_movies_collection
96
- )
97
 
98
  def query_data(self, query: str):
99
  """
@@ -111,7 +95,6 @@ class MoviesSearch:
111
  return_full_text=True,
112
  )
113
 
114
- # retriever = self.retrieve_vector_store.as_retriever()
115
  retriever = self.retrieve_vector_store.as_retriever()
116
 
117
  prompt = PromptTemplate.from_template(template="{context}", template_format="f-string")
@@ -142,36 +125,11 @@ class MoviesSearch:
142
  self.client.close()
143
 
144
 
145
- # @app.route("/", methods=["GET"])
146
  def gradio_interface(cmd=None):
147
  movie_search = MoviesSearch()
148
  # movie_search.generate_insert_embeddings()
149
  movie_search.run_website()
150
 
151
- # if cmd == "generate_embeddings":
152
- # movie_search.generate_insert_embeddings()
153
- # elif cmd == "run":
154
- # movie_search.run_website()
155
-
156
 
157
  if __name__ == "__main__":
158
- # Create the parser
159
- # parser = argparse.ArgumentParser(description='Script to suggest movies based on user description/query')
160
- #
161
- # # Add arguments
162
- # parser.add_argument("-g", "--generate_embeddings", action="store_true", help="Generate/Re-generate Embeddings")
163
- # parser.add_argument("-r", "--run", action="store_true", help="Age of the person")
164
- #
165
- # # Parse arguments
166
- # args = parser.parse_args()
167
- #
168
- # if args.generate_embeddings:
169
- # gradio_interface(cmd="generate_embeddings")
170
- # elif args.run:
171
- # gradio_interface(cmd="run")
172
-
173
- # app.run(host="0.0.0.0", port=os.getenv("PORT", 5000), debug=True)
174
- # app.run(host="0.0.0.0", debug=True)
175
- # app.run(debug=True)
176
-
177
  gradio_interface()
 
1
  import os
2
  from typing import List
 
3
  from dotenv import load_dotenv
4
  import pymongo
 
5
  from langchain_huggingface import HuggingFaceEndpoint, HuggingFaceEmbeddings
6
  from langchain_mongodb.vectorstores import MongoDBAtlasVectorSearch
7
  from langchain.chains import create_retrieval_chain
8
  from langchain.chains.combine_documents import create_stuff_documents_chain
 
9
  from langchain_core.prompts import PromptTemplate
10
  import gradio as gr
11
  from gradio.themes.base import Base
 
12
 
13
  __author__ = "Chirag Kamble"
14
 
15
 
 
 
 
 
16
  class MoviesSearch:
17
  """
18
  Class to perform Vector Index Search using MongoDB and LLM search using Langchain on Movies
 
36
  self.client: pymongo.synchronous.mongo_client.MongoClient = pymongo.MongoClient(mongodb_connection_url,
37
  serverSelectionTimeoutMS=60000,
38
  tls=True,
 
39
  connect=False,
40
  tlsAllowInvalidCertificates=True,
41
  directConnection=False,
 
42
  maxPoolSize=100,
 
43
  maxIdleTimeMS=60000,
44
  waitQueueTimeoutMS=60000,
45
  connectTimeoutMS=60000,
 
69
  Generate vector embeddings
70
  """
71
  new_doc_list: List[Document] = []
72
+ for doc in self.sample_movies_collection.find({"fullplot": {"$exists": True}}).limit(9000):
73
+ new_doc_list.append({
74
+ "movie-title": doc["title"],
75
+ "movie-plot": doc["fullplot"],
76
+ "text": doc["fullplot"],
77
+ "embedding": self.hf_plot_embedding.embed_query(doc["fullplot"])
78
+ })
79
+
80
+ self.langchain_movies_collection.insert_many(new_doc_list)
 
 
 
 
 
81
 
82
  def query_data(self, query: str):
83
  """
 
95
  return_full_text=True,
96
  )
97
 
 
98
  retriever = self.retrieve_vector_store.as_retriever()
99
 
100
  prompt = PromptTemplate.from_template(template="{context}", template_format="f-string")
 
125
  self.client.close()
126
 
127
 
 
128
  def gradio_interface(cmd=None):
129
  movie_search = MoviesSearch()
130
  # movie_search.generate_insert_embeddings()
131
  movie_search.run_website()
132
 
 
 
 
 
 
133
 
134
  if __name__ == "__main__":
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  gradio_interface()