SnehaJais's picture
Initial Commit
c37645b verified
from sentence_transformers import SentenceTransformer
import sys
import os
import pandas as pd
from pathlib import Path
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import streamlit as st
import time
# sys.path.append(os.path.dirname(__file__))
# data_dir = Path(__file__).parent.parent / 'data'
# # links dataset
# movies = data_dir / 'movies_final.csv'
movies = pd.read_csv('movies_final.csv')
st.text(movies.shape)
st.dataframe(movies.head())
start = time.time()
model = SentenceTransformer('all-MiniLM-L6-v2')
movies['bert_vector'] = model.encode(movies['tokens'].tolist(), show_progress_bar=True).tolist()
def recommend_bert(title, top_n=5):
if title not in movies['title'].values:
return "Movie not found."
idx = movies[movies['title'] == title].index[0]
query_vector = np.array(movies.loc[idx, 'bert_vector']).reshape(1, -1)
all_vectors = np.vstack(movies['bert_vector'].values)
similarities = cosine_similarity(query_vector, all_vectors)[0]
sim_scores = list(enumerate(similarities))
sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
# Skip the first result (it’s the movie itself)
recommended = [movies.iloc[i]['title'] for i, score in sim_scores[1:top_n+1]]
return recommended
out = recommend_bert('Schindler\'s List')
st.text(out)
end = time.time()
st.text(end-start)