| from sentence_transformers import SentenceTransformer
|
| import sys
|
| import os
|
| import pandas as pd
|
| from pathlib import Path
|
| import numpy as np
|
| from sklearn.metrics.pairwise import cosine_similarity
|
| import streamlit as st
|
| import time
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| movies = pd.read_csv('movies_final.csv')
|
| st.text(movies.shape)
|
| st.dataframe(movies.head())
|
|
|
| start = time.time()
|
|
|
| model = SentenceTransformer('all-MiniLM-L6-v2')
|
| movies['bert_vector'] = model.encode(movies['tokens'].tolist(), show_progress_bar=True).tolist()
|
|
|
| def recommend_bert(title, top_n=5):
|
| if title not in movies['title'].values:
|
| return "Movie not found."
|
|
|
| idx = movies[movies['title'] == title].index[0]
|
| query_vector = np.array(movies.loc[idx, 'bert_vector']).reshape(1, -1)
|
| all_vectors = np.vstack(movies['bert_vector'].values)
|
|
|
| similarities = cosine_similarity(query_vector, all_vectors)[0]
|
| sim_scores = list(enumerate(similarities))
|
| sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
|
|
|
|
|
| recommended = [movies.iloc[i]['title'] for i, score in sim_scores[1:top_n+1]]
|
| return recommended
|
|
|
| out = recommend_bert('Schindler\'s List')
|
| st.text(out)
|
|
|
| end = time.time()
|
|
|
| st.text(end-start) |