Spaces:

chirag0107
/

Movie-Recommender-And-Script-Generator

Sleeping

App Files Files Community

chirag0107 commited on Feb 8, 2025

Commit

6ba8078

1 Parent(s): 8595104

Added required files

Browse files

Files changed (3) hide show

app.py +195 -0
connect.py +46 -0
requirements.txt +90 -0

app.py ADDED Viewed

	@@ -0,0 +1,195 @@

+import pandas as pd
+import gradio as gr
+import os
+from dotenv import load_dotenv
+from langchain_huggingface import HuggingFaceEndpoint
+from langchain_core.prompts import PromptTemplate
+from connect import DBConnect
+__author__ = "Chirag Kamble"
+class GradioDashboard:
+    """
+    Class to generate a simple Gradio Dashboard
+    """
+    def __init__(self):
+        """
+        Initialize variable instances and methods
+        """
+        load_dotenv()
+        self.mongodb_vector_store, self.movies = DBConnect().connect_db()
+        self.genres = ["All"] + sorted(self.movies["genre"].apply(lambda x: x.capitalize()).unique())
+        self.vibe = ["Neutral", "Happy", "Mind-Bending", "Scary", "In the feels..."]
+        self.huggingface_text_generation_model: str = os.getenv("HUGGINGFACE_TEXT_GENERATION_MODEL")
+        self.huggingface_api_token: str = os.getenv("HF_TOKEN")
+        self.generate_dashboard()
+    def query_data(self, query: str):
+        """
+        Movie Script Generation method to Query data from Atlas Vector Search
+        :param query: A user query to search
+        :return llm_answer: String answer generated by the LLM
+        """
+        if len(query) == 0:
+            raise gr.Error("Enter a prompt to generate a response !", duration=5)
+        hf_llm: HuggingFaceEndpoint = HuggingFaceEndpoint(
+            repo_id=self.huggingface_text_generation_model,
+            huggingfacehub_api_token=self.huggingface_api_token,
+            temperature=0.1,
+            task="text-generation",
+            repetition_penalty=1.03,
+            top_k=10,
+            top_p=0.95,
+            typical_p=0.95,
+        )
+        prompt = PromptTemplate.from_template(
+            template="Generate a movie plot based on the below user query.\nBe creative but stay true to the "
+                     "description provided.\nUser Query:{context}",
+        )
+        formatted_prompt = prompt.format(context=query)
+        llm_answer = hf_llm.invoke(formatted_prompt)
+        llm_answer = llm_answer.split("\n", 1)[1]
+        return llm_answer
+    def retrieve_recommendations(self, query, genre, vibe, initial_top_k=50, final_top_k=10) -> pd.DataFrame:
+        """
+        Method to retrieve the recommendation from the vector database
+        :param query: User query
+        :param genre: List of genres available
+        :param vibe: List of vibes options available
+        :param initial_top_k: Initial number of searched and selected movies
+        :param final_top_k: Final number of recommended movies
+        :return movies_recs: Final Dataframe of recommended movies
+        """
+        recs = self.mongodb_vector_store.similarity_search(query, k=initial_top_k)
+        movies_list = [rec.page_content.strip('"').split()[0] for rec in recs]
+        movies_recs = self.movies[self.movies["uuid"].isin(movies_list)].head(initial_top_k)
+        if genre != "All":
+            movies_recs = movies_recs[movies_recs["genre"] == genre][: final_top_k]
+        else:
+            movies_recs = movies_recs.head(final_top_k)
+        if vibe == "Balanced":
+            movies_recs.sort_values(by="neutral", ascending=False, inplace=True)
+        elif vibe == "Happy":
+            movies_recs.sort_values(by="joy", ascending=False, inplace=True)
+        elif vibe == "Mind-Bending":
+            movies_recs.sort_values(by="surprise", ascending=False, inplace=True)
+        # elif vibe == "Rage":
+        #     movies_recs.sort_values(by="anger", ascending=False, inplace=True)
+        elif vibe == "Scary":
+            movies_recs.sort_values(by="fear", ascending=False, inplace=True)
+        elif vibe == "In the feels":
+            movies_recs.sort_values(by="sadness", ascending=False, inplace=True)
+        # elif vibe == "Gruesome":
+        #     movies_recs.sort_values(by="disgust", ascending=False, inplace=True)
+        return movies_recs
+    def recommend_movies(self, query: str, genre: str, vibe: str) -> str:
+        """
+        Method to generate a string with the list of selected movies recommended
+        :param query: User query
+        :param genre: List of Genres available
+        :param vibe: List of Vibe options available
+        :return output: String with the list of recommended movies
+        """
+        recommendations = self.retrieve_recommendations(query, genre, vibe)
+        results = []
+        for i in range(len(recommendations)):
+            row = recommendations.iloc[i]
+            plot_split = row["plot"].split()
+            truncated_plot = " ".join(plot_split[:30]) + "..."
+            director_split = row["director"].split(",")
+            if len(director_split) > 2:
+                directors = f"{', '.join(director_split[:-1])} and {director_split[-1]}"
+            elif len(director_split) == 2:
+                directors = "and".join(director_split)
+            else:
+                directors = row["director"]
+            caption = f"{i+1}. {row['title']} by {directors}: {truncated_plot}"
+            results.append(caption)
+        if len(results) == 0:
+            output = "Sorry, our database movies does not have recommendations for the chosen Genre and Vibe :("
+        else:
+            output = "\n\n\n".join(results)
+        return output
+    def generate_dashboard(self):
+        theme = gr.themes.Citrus()
+        with gr.Blocks(theme=theme) as dashboard:
+            gr.Markdown("# Get Movies Recommendations or Generate Your Own Movie Script !!!")
+            with gr.Tab(label="Movies Recommender"):
+                gr.Markdown("# Movies Recommender")
+                with gr.Row():
+                    with gr.Column():
+                        genre_dropdown = gr.Dropdown(choices=self.genres, label="Select A Genre", value="All")
+                    with gr.Column():
+                        vibe_dropdown = gr.Dropdown(choices=self.vibe, label="Choose Your Vibe", value="Neutral")
+                with gr.Row():
+                    user_query = gr.Textbox(label="Please enter a description of the movie you would like to watch:",
+                                            placeholder="e.g. A story about love in war")
+                with gr.Row():
+                    submit_button = gr.Button("Recommend")
+                gr.Markdown("## Recommendations")
+                with gr.Row():
+                    output = gr.TextArea(interactive=False,
+                                        label="Your recommendations will be displayed below:",
+                                        autoscroll=False,
+                                        show_label=True,
+                                        show_copy_button=True, )
+                submit_button.click(fn=self.recommend_movies,
+                                    inputs=[user_query, genre_dropdown, vibe_dropdown],
+                                    outputs=[output], )
+            with gr.Tab("Movie Script Generator"):
+                gr.Markdown("# Movie Script Generator")
+                with gr.Row():
+                    script_gen_query_textbox = gr.Textbox(label="Enter your prompt here:", lines=1,
+                                                          placeholder="e.g. Generate a movie where a couple "
+                                                                      "discovers love during a war")
+                with gr.Row():
+                    button = gr.Button("Generate")
+                with gr.Column():
+                    output = gr.TextArea(interactive=False,
+                                         placeholder="Your Movie Plot will be displayed here. "
+                                                     "Don't forget to invite us to your movie premier! :)",
+                                         autoscroll=False,
+                                         show_label=False,
+                                         )
+                button.click(fn=self.query_data, inputs=[script_gen_query_textbox], outputs=[output])
+        dashboard.launch(debug=True)
+if __name__ == "__main__":
+    GradioDashboard()

connect.py ADDED Viewed

	@@ -0,0 +1,46 @@

+from dotenv import load_dotenv
+import pandas as pd
+import os
+import pymongo
+from langchain_mongodb.vectorstores import MongoDBAtlasVectorSearch
+from langchain_huggingface import HuggingFaceEmbeddings
+__author__ = "Chirag Kamble"
+class DBConnect:
+    """
+    Class to connect to the database
+    """
+    @staticmethod
+    def connect_db():
+        """
+        Static method to connect to the database and create a vector store
+        :return: mongodb_vector_store: MongoDB Atlas Vector Store instance connected to the required mongodb collection
+        :return: movies: dataframe containing all movies in the database
+        """
+        load_dotenv()
+        mongodb_connection_url = os.getenv("MONGODB_CONNECTION_URL")
+        mongodb_db_name: str = os.getenv("MONGODB_DB_NAME")
+        mongodb_collection_name: str = os.getenv("MONGODB_COLLECTION_NAME")
+        mongodb_vector_index: str = os.getenv("MONGODB_VECTOR_INDEX_NAME")
+        text_key: str = os.getenv("TEXT_KEY")
+        embedding_key: str = os.getenv("EMBEDDING_KEY")
+        relevance_score_fn = os.getenv("RELEVANCE_SCORE_FN")
+        client = pymongo.MongoClient(mongodb_connection_url)
+        db = client[mongodb_db_name]
+        collection = db[mongodb_collection_name]
+        mongodb_vector_store = MongoDBAtlasVectorSearch(collection=collection,
+                                                        embedding=HuggingFaceEmbeddings(),
+                                                        index_name=mongodb_vector_index,
+                                                        relevance_score_fn=relevance_score_fn,
+                                                        text_key=text_key,
+                                                        embedding_key=embedding_key,
+                                                        )
+        movies_docs = collection.find()
+        movies = pd.DataFrame(movies_docs)
+        return mongodb_vector_store, movies

requirements.txt ADDED Viewed

	@@ -0,0 +1,90 @@

+aiofiles==23.2.1
+aiohappyeyeballs==2.4.6
+aiohttp==3.11.12
+aiosignal==1.3.2
+annotated-types==0.7.0
+anyio==4.8.0
+attrs==25.1.0
+certifi==2025.1.31
+charset-normalizer==3.4.1
+click==8.1.8
+colorama==0.4.6
+dnspython==2.7.0
+fastapi==0.115.8
+ffmpy==0.5.0
+filelock==3.17.0
+frozenlist==1.5.0
+fsspec==2025.2.0
+gradio==5.15.0
+gradio_client==1.7.0
+greenlet==3.1.1
+h11==0.14.0
+httpcore==1.0.7
+httpx==0.28.1
+huggingface-hub==0.28.1
+idna==3.10
+Jinja2==3.1.5
+joblib==1.4.2
+jsonpatch==1.33
+jsonpointer==3.0.0
+langchain==0.3.18
+langchain-core==0.3.34
+langchain-huggingface==0.1.2
+langchain-mongodb==0.4.0
+langchain-text-splitters==0.3.6
+langsmith==0.3.7
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+mdurl==0.1.2
+mpmath==1.3.0
+multidict==6.1.0
+networkx==3.4.2
+numpy==2.2.2
+orjson==3.10.15
+packaging==24.2
+pandas==2.2.3
+pillow==11.1.0
+propcache==0.2.1
+pydantic==2.10.6
+pydantic_core==2.27.2
+pydub==0.25.1
+Pygments==2.19.1
+pymongo==4.11
+python-dateutil==2.9.0.post0
+python-dotenv==1.0.1
+python-multipart==0.0.20
+pytz==2025.1
+PyYAML==6.0.2
+regex==2024.11.6
+requests==2.32.3
+requests-toolbelt==1.0.0
+rich==13.9.4
+ruff==0.9.5
+safehttpx==0.1.6
+safetensors==0.5.2
+scikit-learn==1.6.1
+scipy==1.15.1
+semantic-version==2.10.0
+sentence-transformers==3.4.1
+setuptools==75.8.0
+shellingham==1.5.4
+six==1.17.0
+sniffio==1.3.1
+SQLAlchemy==2.0.38
+starlette==0.45.3
+sympy==1.13.1
+tenacity==9.0.0
+threadpoolctl==3.5.0
+tokenizers==0.21.0
+tomlkit==0.13.2
+torch==2.6.0
+tqdm==4.67.1
+transformers==4.48.3
+typer==0.15.1
+typing_extensions==4.12.2
+tzdata==2025.1
+urllib3==2.3.0
+uvicorn==0.34.0
+websockets==14.2
+yarl==1.18.3
+zstandard==0.23.0