Spaces:

chirag0107
/

Movie-Script-Generator

Sleeping

App Files Files Community

chirag0107 commited on Feb 3, 2025

Commit

509a17c

verified ·

1 Parent(s): bad0d8d

Update langchain_movie_search.py

Browse files

Files changed (1) hide show

langchain_movie_search.py +37 -49

langchain_movie_search.py CHANGED Viewed

@@ -1,14 +1,18 @@
 import os
 from typing import List
 from dotenv import load_dotenv
 import pymongo
 from langchain_huggingface import HuggingFaceEndpoint, HuggingFaceEmbeddings
 from langchain_mongodb.vectorstores import MongoDBAtlasVectorSearch
 from langchain.chains import create_retrieval_chain
 from langchain.chains.combine_documents import create_stuff_documents_chain
 from langchain_core.prompts import PromptTemplate
 import gradio as gr
 from gradio.themes.base import Base
 __author__ = "Chirag Kamble"
@@ -24,44 +28,28 @@ class MoviesSearch:
         """
         # Load environment variables
         load_dotenv()
-        transformer_model_name: str = os.getenv("TRANSFORMER_MODEL_NAME")
         mongodb_connection_url: str = os.getenv("MONGODB_CONNECTION_URL")
         mongodb_db_name: str = os.getenv("MONGODB_DB_NAME")
         mongodb_collection_name: str = os.getenv("MONGODB_COLLECTION_NAME")
-        self.huggingface_repo: str = os.getenv("HF_REPO")
         self.huggingface_api_token: str = os.getenv("HF_TOKEN")
         self.huggingface_text_generation_model: str = os.getenv("HUGGINGFACE_TEXT_GENERATION_MODEL")
         # Setup MongoDB connection
-        self.client: pymongo.synchronous.mongo_client.MongoClient = pymongo.MongoClient(mongodb_connection_url,
-                                                                                        serverSelectionTimeoutMS=60000,
-                                                                                        tls=True,
-                                                                                        connect=False,
-                                                                                        tlsAllowInvalidCertificates=True,
-                                                                                        directConnection=False,
-                                                                                        maxPoolSize=100,
-                                                                                        maxIdleTimeMS=60000,
-                                                                                        waitQueueTimeoutMS=60000,
-                                                                                        connectTimeoutMS=60000,
-                                                                                        retryWrites=True,
-                                                                                        retryReads=True,
-                                                                                       )
         db: str = mongodb_db_name
         collection_name: str = mongodb_collection_name
         self.langchain_movies_collection: pymongo.synchronous.collection.Collection = self.client[db][collection_name]
         self.sample_movies_collection: pymongo.synchronous.collection.Collection = self.client.sample_mflix.movies
-        self.hf_plot_embedding = HuggingFaceEmbeddings(
-            model_name=transformer_model_name,
-            show_progress=True,
-        )
         self.retrieve_vector_store = MongoDBAtlasVectorSearch(collection=self.langchain_movies_collection,
                                                               embedding=self.hf_plot_embedding,
                                                               embedding_key="embedding",
-                                                              index_name="langchain_movies_vector_index",
-                                                              text_key="text",
                                                               )
     def generate_insert_embeddings(self):
@@ -88,48 +76,48 @@ class MoviesSearch:
         hf_llm: HuggingFaceEndpoint = HuggingFaceEndpoint(
             repo_id=self.huggingface_text_generation_model,
             huggingfacehub_api_token=self.huggingface_api_token,
-            # temperature=0.1,
             task="text-generation",
-            # max_new_tokens=100,
-            verbose=True,
-            return_full_text=True,
         )
-        retriever = self.retrieve_vector_store.as_retriever()
-        prompt = PromptTemplate.from_template(template="{context}", template_format="f-string")
-        combine_docs = create_stuff_documents_chain(llm=hf_llm, prompt=prompt, )
-        retrival_chain = create_retrieval_chain(retriever=retriever, combine_docs_chain=combine_docs)
-        hf_llm_retriever_output = retrival_chain.invoke({"input": query})
-        llm_answer = hf_llm_retriever_output.get("answer")
         return llm_answer
     def run_website(self):
-        with gr.Blocks(theme=Base(), title="Movie plot search App using Vector Search + RAG") as v_search:
-            gr.Markdown("Movie plot search App using Vector Search + RAG")
-            textbox = gr.Textbox(label="Enter your question:", lines=1)
             with gr.Row():
-                button = gr.Button("Submit", variant="primary")
             with gr.Column():
-                output = gr.Textbox(lines=1, autoscroll=False, interactive=False,
-                                    label="""Output generated by chaining Atlas Vector Search with Langchain's RAG""",)
             button.click(fn=self.query_data, inputs=textbox, outputs=[output])
-        v_search.launch(share=True)
-    def close_client(self):
-        self.client.close()
-def gradio_interface(cmd=None):
     movie_search = MoviesSearch()
-    # movie_search.generate_insert_embeddings()
     movie_search.run_website()
-if __name__ == "__main__":
-    gradio_interface()

 import os
 from typing import List
+import argparse
+import certifi
 from dotenv import load_dotenv
 import pymongo
 from langchain_huggingface import HuggingFaceEndpoint, HuggingFaceEmbeddings
 from langchain_mongodb.vectorstores import MongoDBAtlasVectorSearch
 from langchain.chains import create_retrieval_chain
 from langchain.chains.combine_documents import create_stuff_documents_chain
+from langchain_core.documents import Document
 from langchain_core.prompts import PromptTemplate
 import gradio as gr
 from gradio.themes.base import Base
+from flask import Flask
 __author__ = "Chirag Kamble"
         """
         # Load environment variables
         load_dotenv()
         mongodb_connection_url: str = os.getenv("MONGODB_CONNECTION_URL")
         mongodb_db_name: str = os.getenv("MONGODB_DB_NAME")
         mongodb_collection_name: str = os.getenv("MONGODB_COLLECTION_NAME")
         self.huggingface_api_token: str = os.getenv("HF_TOKEN")
         self.huggingface_text_generation_model: str = os.getenv("HUGGINGFACE_TEXT_GENERATION_MODEL")
         # Setup MongoDB connection
+        self.client: pymongo.synchronous.mongo_client.MongoClient = pymongo.MongoClient(mongodb_connection_url)
         db: str = mongodb_db_name
         collection_name: str = mongodb_collection_name
         self.langchain_movies_collection: pymongo.synchronous.collection.Collection = self.client[db][collection_name]
         self.sample_movies_collection: pymongo.synchronous.collection.Collection = self.client.sample_mflix.movies
+        self.hf_plot_embedding = HuggingFaceEmbeddings()
         self.retrieve_vector_store = MongoDBAtlasVectorSearch(collection=self.langchain_movies_collection,
                                                               embedding=self.hf_plot_embedding,
                                                               embedding_key="embedding",
+                                                              index_name="movies_data_12k_vector_index",
+                                                              text_key="uuid_plot",
                                                               )
     def generate_insert_embeddings(self):
         hf_llm: HuggingFaceEndpoint = HuggingFaceEndpoint(
             repo_id=self.huggingface_text_generation_model,
             huggingfacehub_api_token=self.huggingface_api_token,
+            temperature=0.1,
             task="text-generation",
+            repetition_penalty=1.03,
+            top_k=10,
+            top_p=0.95,
+            typical_p=0.95,
         )
+        prompt = PromptTemplate.from_template(
+            template="Generate a movie plot based on the below description.\nBe creative but stay true to the "
+                     "description provided.\nDescription:{context}",
+        )
+        formatted_prompt = prompt.format(context=query)
+        llm_answer = hf_llm.invoke(formatted_prompt)
+        llm_answer = llm_answer.split("\n", 1)[1]
+        print(llm_answer)
         return llm_answer
     def run_website(self):
+        theme = gr.themes.Ocean()
+        with gr.Blocks(theme=theme, title="Movie Plot Generation using Vector Search + RAG") as dashboard:
+            gr.Markdown("# Generate Movie Plot using Vector Search + RAG")
+            with gr.Row():
+                textbox = gr.Textbox(label="Enter your prompt here:", lines=1,
+                                     placeholder="e.g. Generate a movie of a couple discovering love in war")
             with gr.Row():
+                button = gr.Button("Generate")
             with gr.Column():
+                output = gr.Textbox(interactive=False,
+                                    label="Here is a Movie Plot for you. Don't forget to invite us to the premier!",
+                                    autoscroll=False,
+                                    show_label=True,
+                                    show_copy_button=True,
+                                    )
             button.click(fn=self.query_data, inputs=textbox, outputs=[output])
+        dashboard.launch(debug=True)
+if __name__ == "__main__":
     movie_search = MoviesSearch()
     movie_search.run_website()