| import pandas as pd
|
| import numpy as np
|
| import streamlit as st
|
| from pathlib import Path
|
| import sys
|
| import os
|
| import time
|
| import matplotlib.pyplot as plt
|
|
|
|
|
|
|
| st.set_page_config(layout="wide")
|
| st.title("Popularity Based Filtering")
|
| st.markdown('---')
|
|
|
|
|
|
|
| sys.path.append(os.path.dirname(__file__))
|
| data_dir = Path(__file__).parent / 'data'
|
|
|
| movies = 'movies_final.csv'
|
| movies = pd.read_csv(movies)
|
| movies = movies[['movieId','title']]
|
|
|
|
|
| ratings = data_dir / 'ratings.csv'
|
| ratings = st.session_state['ratings_df']
|
|
|
| ratings = ratings.drop_duplicates(subset=['userId', 'movieId'], keep='first')
|
|
|
|
|
| st.subheader('π Glance of the dataset')
|
| col1 ,col2 = st.columns(2)
|
| with col1:
|
| st.markdown("Ratings Dataset")
|
| st.dataframe(ratings.head())
|
| with col2:
|
| st.markdown("Movies Dataset")
|
| st.dataframe(movies.head())
|
| st.markdown('---')
|
|
|
|
|
| col1, col2 = st.columns(2)
|
|
|
| most_rated = ratings.groupby('movieId').count()['userId'].sort_values(ascending=False).head()
|
| most_rated = most_rated.reset_index().rename(columns={'userId': 'No_of_Ratings'})
|
| most_rated_final = pd.merge(most_rated, movies, how='left', on='movieId')
|
| movie_ids = most_rated_final['movieId'].tolist()
|
|
|
| avg_ratings = ratings[ratings['movieId'].isin(movie_ids)]
|
| avg_ratings = avg_ratings.groupby('movieId')['rating'].mean().reset_index().rename(columns={'rating': 'Avg_Rating'})
|
| most_rated_final = pd.merge(most_rated_final, avg_ratings, how='left', on='movieId')
|
|
|
| with col1:
|
| st.subheader('π Top 5 Most Rated Movies')
|
| st.dataframe(most_rated_final[['title', 'No_of_Ratings', 'Avg_Rating']])
|
|
|
|
|
| most_rated = ratings.groupby('movieId').agg({'rating': 'mean', 'userId': 'count'}).rename(
|
| columns={'rating': 'Avg_Rating', 'userId': 'No_of_Ratings'}
|
| )
|
| most_rated = most_rated.sort_values(['No_of_Ratings','Avg_Rating']).head()
|
| most_rated_final = pd.merge(most_rated, movies, how='left', on='movieId')
|
|
|
| with col2:
|
| st.subheader('π Top 5 Least Rated Movies')
|
| st.dataframe(most_rated_final[['title', 'No_of_Ratings', 'Avg_Rating']])
|
|
|
|
|
| st.markdown('---')
|
|
|
| col1, col2 = st.columns(2)
|
|
|
| filter_ratings = ratings[ratings['rating'] == 5]
|
| most_rated = filter_ratings.groupby('movieId').count()['userId'].sort_values(ascending=False).head()
|
| most_rated = most_rated.reset_index().rename(columns={'userId': 'No_of_Ratings'})
|
| most_rated_final = pd.merge(most_rated, movies, how='left', on='movieId')
|
|
|
| filter_ratings = ratings[ratings['rating'] == 0.5]
|
| most_rated = filter_ratings.groupby('movieId').count()['userId'].sort_values(ascending=False).head()
|
| most_rated = most_rated.reset_index().rename(columns={'userId': 'No_of_Ratings'})
|
| least_most_rated_final = pd.merge(most_rated, movies, how='left', on='movieId')
|
|
|
|
|
| with col1:
|
| st.subheader('Highest Rated Movies with 5β')
|
| st.dataframe(most_rated_final[['title', 'No_of_Ratings']])
|
| st.subheader('Highest Rated Movies with 0.5β')
|
| st.dataframe(least_most_rated_final[['title', 'No_of_Ratings']])
|
|
|
|
|
| import matplotlib.pyplot as plt
|
|
|
|
|
|
|
| with col2:
|
| st.subheader('π Ratings Distribution')
|
| graph = ratings.groupby('rating').count()['userId']
|
| fig, ax = plt.subplots()
|
| ax.bar(graph.index, graph.values)
|
| ax.set_xlabel('Rating')
|
| ax.set_ylabel('Number of Ratings')
|
| ax.set_title('Number of Ratings by Rating Value')
|
| st.pyplot(fig)
|
|
|
| st.markdown('---')
|
|
|
|
|
| st.subheader('π― Movies Filtered by Selected Rating')
|
| ratings_selected = st.selectbox(
|
| 'Select the Rating',
|
| pd.Series(ratings['rating'].unique()).sort_values(ascending=False).tolist()
|
| )
|
| filter_ratings = ratings[ratings['rating'] == ratings_selected]
|
|
|
| col1, col2 = st.columns(2)
|
|
|
| with col1:
|
| st.markdown('**Most Rated for Selected Rating**')
|
| most_rated = filter_ratings.groupby('movieId').count()['userId'].sort_values(ascending=False).head()
|
| most_rated = most_rated.reset_index().rename(columns={'userId': 'No_of_Ratings'})
|
| most_rated_final = pd.merge(most_rated, movies, how='left', on='movieId')
|
| st.dataframe(most_rated_final[['title', 'No_of_Ratings']])
|
| with col2:
|
| st.markdown('**Least Rated for Selected Rating**')
|
| most_rated = filter_ratings.groupby('movieId').count()['userId'].sort_values().head()
|
| most_rated = most_rated.reset_index().rename(columns={'userId': 'No_of_Ratings'})
|
| most_rated_final = pd.merge(most_rated, movies, how='left', on='movieId')
|
| st.dataframe(most_rated_final[['title', 'No_of_Ratings']])
|
|
|