SnehaJais's picture
Initial Commit
c37645b verified
import pandas as pd
import numpy as np
import streamlit as st
from pathlib import Path
import sys
import os
import time
import matplotlib.pyplot as plt
# ====================== Streamlit Setup ======================
st.set_page_config(layout="wide")
st.title("Popularity Based Filtering")
st.markdown('---')
# ====================== Data Extraction ======================
sys.path.append(os.path.dirname(__file__))
data_dir = Path(__file__).parent / 'data'
movies = 'movies_final.csv'
movies = pd.read_csv(movies)
movies = movies[['movieId','title']]
# ratings dataset
ratings = data_dir / 'ratings.csv'
ratings = st.session_state['ratings_df']
# ratings = ratings.drop(columns=['timestamp'])
ratings = ratings.drop_duplicates(subset=['userId', 'movieId'], keep='first')
st.subheader('πŸ‘€ Glance of the dataset')
col1 ,col2 = st.columns(2)
with col1:
st.markdown("Ratings Dataset")
st.dataframe(ratings.head())
with col2:
st.markdown("Movies Dataset")
st.dataframe(movies.head())
st.markdown('---')
# ============================= Most Rated Movies ====================================================
col1, col2 = st.columns(2)
most_rated = ratings.groupby('movieId').count()['userId'].sort_values(ascending=False).head()
most_rated = most_rated.reset_index().rename(columns={'userId': 'No_of_Ratings'})
most_rated_final = pd.merge(most_rated, movies, how='left', on='movieId')
movie_ids = most_rated_final['movieId'].tolist()
avg_ratings = ratings[ratings['movieId'].isin(movie_ids)]
avg_ratings = avg_ratings.groupby('movieId')['rating'].mean().reset_index().rename(columns={'rating': 'Avg_Rating'})
most_rated_final = pd.merge(most_rated_final, avg_ratings, how='left', on='movieId')
with col1:
st.subheader('πŸ“ˆ Top 5 Most Rated Movies')
st.dataframe(most_rated_final[['title', 'No_of_Ratings', 'Avg_Rating']])
# ============================= Least Rated Movies ====================================================
most_rated = ratings.groupby('movieId').agg({'rating': 'mean', 'userId': 'count'}).rename(
columns={'rating': 'Avg_Rating', 'userId': 'No_of_Ratings'}
)
most_rated = most_rated.sort_values(['No_of_Ratings','Avg_Rating']).head()
most_rated_final = pd.merge(most_rated, movies, how='left', on='movieId')
with col2:
st.subheader('πŸ“‰ Top 5 Least Rated Movies')
st.dataframe(most_rated_final[['title', 'No_of_Ratings', 'Avg_Rating']])
# ============================= Highest Average Rated Movies ==========================================
st.markdown('---')
col1, col2 = st.columns(2)
filter_ratings = ratings[ratings['rating'] == 5]
most_rated = filter_ratings.groupby('movieId').count()['userId'].sort_values(ascending=False).head()
most_rated = most_rated.reset_index().rename(columns={'userId': 'No_of_Ratings'})
most_rated_final = pd.merge(most_rated, movies, how='left', on='movieId')
filter_ratings = ratings[ratings['rating'] == 0.5]
most_rated = filter_ratings.groupby('movieId').count()['userId'].sort_values(ascending=False).head()
most_rated = most_rated.reset_index().rename(columns={'userId': 'No_of_Ratings'})
least_most_rated_final = pd.merge(most_rated, movies, how='left', on='movieId')
with col1:
st.subheader('Highest Rated Movies with 5⭐')
st.dataframe(most_rated_final[['title', 'No_of_Ratings']])
st.subheader('Highest Rated Movies with 0.5⭐')
st.dataframe(least_most_rated_final[['title', 'No_of_Ratings']])
# ============================= Ratings Distribution Graph ============================================
import matplotlib.pyplot as plt
# col1, col2 = st.columns(2)
with col2:
st.subheader('πŸ“Š Ratings Distribution')
graph = ratings.groupby('rating').count()['userId']
fig, ax = plt.subplots()
ax.bar(graph.index, graph.values)
ax.set_xlabel('Rating')
ax.set_ylabel('Number of Ratings')
ax.set_title('Number of Ratings by Rating Value')
st.pyplot(fig)
st.markdown('---')
# ============================= User Selected Ratings ===================================================
st.subheader('🎯 Movies Filtered by Selected Rating')
ratings_selected = st.selectbox(
'Select the Rating',
pd.Series(ratings['rating'].unique()).sort_values(ascending=False).tolist()
)
filter_ratings = ratings[ratings['rating'] == ratings_selected]
col1, col2 = st.columns(2)
with col1:
st.markdown('**Most Rated for Selected Rating**')
most_rated = filter_ratings.groupby('movieId').count()['userId'].sort_values(ascending=False).head()
most_rated = most_rated.reset_index().rename(columns={'userId': 'No_of_Ratings'})
most_rated_final = pd.merge(most_rated, movies, how='left', on='movieId')
st.dataframe(most_rated_final[['title', 'No_of_Ratings']])
with col2:
st.markdown('**Least Rated for Selected Rating**')
most_rated = filter_ratings.groupby('movieId').count()['userId'].sort_values().head()
most_rated = most_rated.reset_index().rename(columns={'userId': 'No_of_Ratings'})
most_rated_final = pd.merge(most_rated, movies, how='left', on='movieId')
st.dataframe(most_rated_final[['title', 'No_of_Ratings']])