| import gradio as gr |
| import pandas as pd |
| from sklearn.feature_extraction.text import TfidfVectorizer |
| from sklearn.metrics.pairwise import cosine_similarity |
|
|
| df = pd.read_csv("movies.csv") |
| features = ["keywords", "cast", "genres", "director"] |
|
|
| for feature in features: |
| df[feature] = df[feature].fillna('') |
|
|
| def combined_features(row): |
| return row['keywords']+" "+row['cast']+" "+row['genres']+" "+row['director'] |
|
|
| df["combined_features"] = df.apply(combined_features, axis=1) |
|
|
| Tfidf_vect = TfidfVectorizer() |
| vector_matrix = Tfidf_vect.fit_transform(df["combined_features"]) |
| vector_matrix.toarray() |
|
|
| cosine_sim = cosine_similarity(vector_matrix) |
|
|
| def get_index_from_title(title): |
| return df[df.title == title]["index"].values[0] |
|
|
| def get_title_from_index(index): |
| return df[df.index == index]["title"].values[0] |
|
|
| def check_movie(m_name): |
| movie_index = get_index_from_title(m_name) |
| similar_movies= list(enumerate(cosine_sim[movie_index])) |
| sorted_similar_movies = sorted(similar_movies, key=lambda x:x[1], reverse=True) |
| mv = get_suggestions(sorted_similar_movies) |
| return mv |
|
|
| def get_suggestions(sorted_similar_movies): |
| i=0 |
| movies = "" |
| for movie in sorted_similar_movies: |
| t = get_title_from_index(movie[0]) |
| movies = movies + t +"\n" |
| |
| i=i+1 |
| if i>10: |
| print(movies) |
| return movies |
|
|
| def check(enter_movie_name): |
| mvs = check_movie(enter_movie_name) |
| return mvs |
|
|
|
|
| movie = gr.Interface(fn=check, inputs="text", outputs="text") |
| movie.launch(share=True) |