Spaces:

Penguni
/

Movie_dashboard

Sleeping

App Files Files Community

Movie_dashboard / app.py

Penguni

Update app.py

0084a36 verified over 1 year ago

raw

history blame contribute delete

7.55 kB

	import streamlit as st
	import pandas as pd
	import plotly.express as px

	# Country mapping dictionary
	country_mapping = {
	'United States': 'USA',
	'United Kingdom': 'GBR',
	'France': 'FRA',
	'Canada': 'CAN',
	'Germany': 'DEU',
	'Japan': 'JPN',
	'India': 'IND',
	'Australia': 'AUS',
	'China': 'CHN',
	'Italy': 'ITA',
	'Spain': 'ESP',
	'Mexico': 'MEX',
	'Hong Kong': 'HKG',
	'Sweden': 'SWE',
	'Denmark': 'DNK',
	'New Zealand': 'NZL',
	'Belgium': 'BEL',
	'South Korea': 'KOR',
	'Ireland': 'IRL',
	'Czech Republic': 'CZE',
	'Switzerland': 'CHE',
	'Hungary': 'HUN',
	'Norway': 'NOR',
	'United Arab Emirates': 'ARE',
	'Netherlands': 'NLD',
	'South Africa': 'ZAF',
	'Poland': 'POL',
	'Austria': 'AUT',
	'Turkey': 'TUR',
	'Brazil': 'BRA',
	'Russia': 'RUS',
	'Argentina': 'ARG',
	'Singapore': 'SGP',
	'Thailand': 'THA',
	'Portugal': 'PRT',
	'Greece': 'GRC',
	'Egypt': 'EGY',
	'Vietnam': 'VNM',
	'Malaysia': 'MYS',
	'Philippines': 'PHL',
	'Taiwan': 'TWN',
	'Israel': 'ISR',
	'Saudi Arabia': 'SAU',
	'Indonesia': 'IDN',
	'Pakistan': 'PAK',
	'Iran': 'IRN',
	'Iraq': 'IRQ',
	'Syria': 'SYR',
	'Lebanon': 'LBN',
	'Jordan': 'JOR',
	'Qatar': 'QAT',
	'Oman': 'OMN',
	'Kuwait': 'KWT',
	'Bahrain': 'BHR',
	'Yemen': 'YEM',
	'Morocco': 'MAR',
	'Tunisia': 'TUN',
	'Algeria': 'DZA',
	'Libya': 'LBY',
	'Sudan': 'SDN',
	'Kenya': 'KEN',
	'Nigeria': 'NGA',
	'Ghana': 'GHA',
	'Ethiopia': 'ETH',
	'Botswana': 'BWA',
	'Namibia': 'NAM',
	'Zimbabwe': 'ZWE',
	'Zambia': 'ZMB',
	'Uganda': 'UGA',
	'Rwanda': 'RWA',
	'Burundi': 'BDI',
	'Tanzania': 'TZA',
	'Angola': 'AGO',
	'Mozambique': 'MOZ',
	'Madagascar': 'MDG',
	'Mauritius': 'MUS',
	'Somalia': 'SOM',
	'Somaliland': 'SOM',
	'Senegal': 'SEN',
	'Ivory Coast': 'CIV',
	'Cameroon': 'CMR',
	'Benin': 'BEN',
	'Togo': 'TGO',
	'Gambia': 'GMB',
	'Guinea': 'GIN',
	'Guinea-Bissau': 'GNB',
	'Equatorial Guinea': 'GNQ',
	'Gabon': 'GAB',
	'Congo': 'COG',
	'Democratic Republic of the Congo': 'COD',
	'Central African Republic': 'CAF',
	'Chad': 'TCD',
	'Niger': 'NER',
	'Mali': 'MLI',
	'Burkina Faso': 'BFA',
	'Mauritania': 'MRT',
	'Western Sahara': 'ESH',
	'Sierra Leone': 'SLE',
	'Liberia': 'LBR',
	'Cape Verde': 'CPV',
	'Seychelles': 'SYC',
	'Comoros': 'COM',
	'Maldives': 'MDV'
	}

	# Load your dataframes
	df_movies = pd.read_csv('movie_after_cleaning.csv')
	df_tv_series = pd.read_csv('series_after_cleaning.csv')

	# Splitting genres and countries
	df_movies['genre'] = df_movies['genre'].str.split(',')
	df_tv_series['genre'] = df_tv_series['genre'].str.split(',')
	df_movies['country'] = df_movies['country'].str.split(',')
	df_tv_series['country'] = df_tv_series['country'].str.split(',')

	# Function to create treemap
	def create_treemap(df, title):
	fig = px.treemap(df, path=['parentalguide'], title=title)
	return fig

	def create_best_genres_line_chart(df, title):
	# Combine genres data from both movies and TV series
	df_genres = df.explode('genre')

	# Group by year and genre to count occurrences
	genre_counts = df_genres.groupby(['year', 'genre']).size().reset_index(name='count')

	# Find the most popular genre by count for each year
	idx = genre_counts.groupby(['year'])['count'].transform(max) == genre_counts['count']
	best_genres = genre_counts[idx]

	# Plotly line chart for best genres over the years
	fig_best_genres = px.line(best_genres, x='year', y='count', color='genre', title=title)
	fig_best_genres.update_layout(xaxis_title='Year', yaxis_title='Number of Works', legend_title='Genre')

	return fig_best_genres


	# Function to create genre bar chart
	def create_genre_bar_chart(df, title):
	df_exploded = df.explode('genre')
	genre_counts = df_exploded['genre'].value_counts().reset_index()
	genre_counts.columns = ['genre', 'count']
	genre_counts = genre_counts.head(10).sort_values('count', ascending=True) # Top 10 genres sorted with the largest on top
	fig = px.bar(genre_counts, x='count', y='genre', orientation='h', title=title)
	return fig

	# Function to create choropleth map
	def create_country_map(df, title):
	df_exploded = df.explode('country')
	country_counts = df_exploded['country'].value_counts().reset_index()
	country_counts.columns = ['country', 'count']

	# Map country names to ISO codes
	country_counts['country'] = country_counts['country'].map(country_mapping)

	fig = px.choropleth(country_counts,
	locations="country",
	color="count",
	hover_name="country",
	title=title,
	projection="natural earth")

	return fig
	def display_summary_metrics(df):
	num_works = len(df)
	num_languages = df['language'].nunique()
	num_countries = df.explode('country')['country'].nunique()
	num_votes = df['votes'].sum()

	col1, col2, col3, col4 = st.columns(4)
	col1.metric("Number of Works", num_works)
	col2.metric("Number of Languages", num_languages)
	col3.metric("Number of Countries", num_countries)
	col4.metric("Number of Votes", num_votes)

	# Function to create rating distribution box chart
	def create_rating_box_chart(df, title):
	fig = px.box(df, x="rating", title=title)
	fig.update_traces(marker=dict(opacity=0.6)) # Show points by default
	return fig

	# Streamlit app
	st.title('Parental Guide Analysis')

	# Display two charts per row
	col1, col2 = st.columns(2)
	selection_movies = col1.button('Movies')
	selection_tv_series = col2.button('TV Series')
	if not selection_movies and not selection_tv_series:
	selection_movies = True

	# Displaying charts in a customized layout based on selection
	if selection_movies:
	st.subheader('Movies')
	display_summary_metrics(df_movies)
	col1_1, col1_2 = st.columns(2)
	with col1_1:
	st.plotly_chart(create_treemap(df_movies, 'Parental Guide - Movies'), use_container_width=True)
	with col1_2:
	st.plotly_chart(create_genre_bar_chart(df_movies, 'Top 10 Genres - Movies'), use_container_width=True)
	col2_1, col2_2 = st.columns(2)
	with col2_1:
	st.plotly_chart(create_country_map(df_movies, 'Global Distribution of Movies'), use_container_width=True)
	with col2_2:
	st.plotly_chart(create_rating_box_chart(df_movies, 'Rating Distribution - Movies'), use_container_width=True)
	st.plotly_chart(create_best_genres_line_chart(df_movies, 'Best Genres Over the Years - Movies'), use_container_width=True)
	elif selection_tv_series:
	st.subheader('TV Series')
	display_summary_metrics(df_tv_series)
	col1_1, col1_2 = st.columns(2)
	with col1_1:
	st.plotly_chart(create_treemap(df_tv_series, 'Parental Guide - TV Series'), use_container_width=True)
	with col1_2:
	st.plotly_chart(create_genre_bar_chart(df_tv_series, 'Top 10 Genres - TV Series'), use_container_width=True)
	col2_1, col2_2 = st.columns(2)
	with col2_1:
	st.plotly_chart(create_country_map(df_tv_series, 'Global Distribution of TV Series'), use_container_width=True)
	with col2_2:
	st.plotly_chart(create_rating_box_chart(df_tv_series, 'Rating Distribution - TV Series'), use_container_width=True)
	st.plotly_chart(create_best_genres_line_chart(df_tv_series, 'Best Genres Over the Years - TV Series'), use_container_width=True)