Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import plotly.express as px | |
| # Country mapping dictionary | |
| country_mapping = { | |
| 'United States': 'USA', | |
| 'United Kingdom': 'GBR', | |
| 'France': 'FRA', | |
| 'Canada': 'CAN', | |
| 'Germany': 'DEU', | |
| 'Japan': 'JPN', | |
| 'India': 'IND', | |
| 'Australia': 'AUS', | |
| 'China': 'CHN', | |
| 'Italy': 'ITA', | |
| 'Spain': 'ESP', | |
| 'Mexico': 'MEX', | |
| 'Hong Kong': 'HKG', | |
| 'Sweden': 'SWE', | |
| 'Denmark': 'DNK', | |
| 'New Zealand': 'NZL', | |
| 'Belgium': 'BEL', | |
| 'South Korea': 'KOR', | |
| 'Ireland': 'IRL', | |
| 'Czech Republic': 'CZE', | |
| 'Switzerland': 'CHE', | |
| 'Hungary': 'HUN', | |
| 'Norway': 'NOR', | |
| 'United Arab Emirates': 'ARE', | |
| 'Netherlands': 'NLD', | |
| 'South Africa': 'ZAF', | |
| 'Poland': 'POL', | |
| 'Austria': 'AUT', | |
| 'Turkey': 'TUR', | |
| 'Brazil': 'BRA', | |
| 'Russia': 'RUS', | |
| 'Argentina': 'ARG', | |
| 'Singapore': 'SGP', | |
| 'Thailand': 'THA', | |
| 'Portugal': 'PRT', | |
| 'Greece': 'GRC', | |
| 'Egypt': 'EGY', | |
| 'Vietnam': 'VNM', | |
| 'Malaysia': 'MYS', | |
| 'Philippines': 'PHL', | |
| 'Taiwan': 'TWN', | |
| 'Israel': 'ISR', | |
| 'Saudi Arabia': 'SAU', | |
| 'Indonesia': 'IDN', | |
| 'Pakistan': 'PAK', | |
| 'Iran': 'IRN', | |
| 'Iraq': 'IRQ', | |
| 'Syria': 'SYR', | |
| 'Lebanon': 'LBN', | |
| 'Jordan': 'JOR', | |
| 'Qatar': 'QAT', | |
| 'Oman': 'OMN', | |
| 'Kuwait': 'KWT', | |
| 'Bahrain': 'BHR', | |
| 'Yemen': 'YEM', | |
| 'Morocco': 'MAR', | |
| 'Tunisia': 'TUN', | |
| 'Algeria': 'DZA', | |
| 'Libya': 'LBY', | |
| 'Sudan': 'SDN', | |
| 'Kenya': 'KEN', | |
| 'Nigeria': 'NGA', | |
| 'Ghana': 'GHA', | |
| 'Ethiopia': 'ETH', | |
| 'Botswana': 'BWA', | |
| 'Namibia': 'NAM', | |
| 'Zimbabwe': 'ZWE', | |
| 'Zambia': 'ZMB', | |
| 'Uganda': 'UGA', | |
| 'Rwanda': 'RWA', | |
| 'Burundi': 'BDI', | |
| 'Tanzania': 'TZA', | |
| 'Angola': 'AGO', | |
| 'Mozambique': 'MOZ', | |
| 'Madagascar': 'MDG', | |
| 'Mauritius': 'MUS', | |
| 'Somalia': 'SOM', | |
| 'Somaliland': 'SOM', | |
| 'Senegal': 'SEN', | |
| 'Ivory Coast': 'CIV', | |
| 'Cameroon': 'CMR', | |
| 'Benin': 'BEN', | |
| 'Togo': 'TGO', | |
| 'Gambia': 'GMB', | |
| 'Guinea': 'GIN', | |
| 'Guinea-Bissau': 'GNB', | |
| 'Equatorial Guinea': 'GNQ', | |
| 'Gabon': 'GAB', | |
| 'Congo': 'COG', | |
| 'Democratic Republic of the Congo': 'COD', | |
| 'Central African Republic': 'CAF', | |
| 'Chad': 'TCD', | |
| 'Niger': 'NER', | |
| 'Mali': 'MLI', | |
| 'Burkina Faso': 'BFA', | |
| 'Mauritania': 'MRT', | |
| 'Western Sahara': 'ESH', | |
| 'Sierra Leone': 'SLE', | |
| 'Liberia': 'LBR', | |
| 'Cape Verde': 'CPV', | |
| 'Seychelles': 'SYC', | |
| 'Comoros': 'COM', | |
| 'Maldives': 'MDV' | |
| } | |
| # Load your dataframes | |
| df_movies = pd.read_csv('movie_after_cleaning.csv') | |
| df_tv_series = pd.read_csv('series_after_cleaning.csv') | |
| # Splitting genres and countries | |
| df_movies['genre'] = df_movies['genre'].str.split(',') | |
| df_tv_series['genre'] = df_tv_series['genre'].str.split(',') | |
| df_movies['country'] = df_movies['country'].str.split(',') | |
| df_tv_series['country'] = df_tv_series['country'].str.split(',') | |
| # Function to create treemap | |
| def create_treemap(df, title): | |
| fig = px.treemap(df, path=['parentalguide'], title=title) | |
| return fig | |
| def create_best_genres_line_chart(df, title): | |
| # Combine genres data from both movies and TV series | |
| df_genres = df.explode('genre') | |
| # Group by year and genre to count occurrences | |
| genre_counts = df_genres.groupby(['year', 'genre']).size().reset_index(name='count') | |
| # Find the most popular genre by count for each year | |
| idx = genre_counts.groupby(['year'])['count'].transform(max) == genre_counts['count'] | |
| best_genres = genre_counts[idx] | |
| # Plotly line chart for best genres over the years | |
| fig_best_genres = px.line(best_genres, x='year', y='count', color='genre', title=title) | |
| fig_best_genres.update_layout(xaxis_title='Year', yaxis_title='Number of Works', legend_title='Genre') | |
| return fig_best_genres | |
| # Function to create genre bar chart | |
| def create_genre_bar_chart(df, title): | |
| df_exploded = df.explode('genre') | |
| genre_counts = df_exploded['genre'].value_counts().reset_index() | |
| genre_counts.columns = ['genre', 'count'] | |
| genre_counts = genre_counts.head(10).sort_values('count', ascending=True) # Top 10 genres sorted with the largest on top | |
| fig = px.bar(genre_counts, x='count', y='genre', orientation='h', title=title) | |
| return fig | |
| # Function to create choropleth map | |
| def create_country_map(df, title): | |
| df_exploded = df.explode('country') | |
| country_counts = df_exploded['country'].value_counts().reset_index() | |
| country_counts.columns = ['country', 'count'] | |
| # Map country names to ISO codes | |
| country_counts['country'] = country_counts['country'].map(country_mapping) | |
| fig = px.choropleth(country_counts, | |
| locations="country", | |
| color="count", | |
| hover_name="country", | |
| title=title, | |
| projection="natural earth") | |
| return fig | |
| def display_summary_metrics(df): | |
| num_works = len(df) | |
| num_languages = df['language'].nunique() | |
| num_countries = df.explode('country')['country'].nunique() | |
| num_votes = df['votes'].sum() | |
| col1, col2, col3, col4 = st.columns(4) | |
| col1.metric("Number of Works", num_works) | |
| col2.metric("Number of Languages", num_languages) | |
| col3.metric("Number of Countries", num_countries) | |
| col4.metric("Number of Votes", num_votes) | |
| # Function to create rating distribution box chart | |
| def create_rating_box_chart(df, title): | |
| fig = px.box(df, x="rating", title=title) | |
| fig.update_traces(marker=dict(opacity=0.6)) # Show points by default | |
| return fig | |
| # Streamlit app | |
| st.title('Parental Guide Analysis') | |
| # Display two charts per row | |
| col1, col2 = st.columns(2) | |
| selection_movies = col1.button('Movies') | |
| selection_tv_series = col2.button('TV Series') | |
| if not selection_movies and not selection_tv_series: | |
| selection_movies = True | |
| # Displaying charts in a customized layout based on selection | |
| if selection_movies: | |
| st.subheader('Movies') | |
| display_summary_metrics(df_movies) | |
| col1_1, col1_2 = st.columns(2) | |
| with col1_1: | |
| st.plotly_chart(create_treemap(df_movies, 'Parental Guide - Movies'), use_container_width=True) | |
| with col1_2: | |
| st.plotly_chart(create_genre_bar_chart(df_movies, 'Top 10 Genres - Movies'), use_container_width=True) | |
| col2_1, col2_2 = st.columns(2) | |
| with col2_1: | |
| st.plotly_chart(create_country_map(df_movies, 'Global Distribution of Movies'), use_container_width=True) | |
| with col2_2: | |
| st.plotly_chart(create_rating_box_chart(df_movies, 'Rating Distribution - Movies'), use_container_width=True) | |
| st.plotly_chart(create_best_genres_line_chart(df_movies, 'Best Genres Over the Years - Movies'), use_container_width=True) | |
| elif selection_tv_series: | |
| st.subheader('TV Series') | |
| display_summary_metrics(df_tv_series) | |
| col1_1, col1_2 = st.columns(2) | |
| with col1_1: | |
| st.plotly_chart(create_treemap(df_tv_series, 'Parental Guide - TV Series'), use_container_width=True) | |
| with col1_2: | |
| st.plotly_chart(create_genre_bar_chart(df_tv_series, 'Top 10 Genres - TV Series'), use_container_width=True) | |
| col2_1, col2_2 = st.columns(2) | |
| with col2_1: | |
| st.plotly_chart(create_country_map(df_tv_series, 'Global Distribution of TV Series'), use_container_width=True) | |
| with col2_2: | |
| st.plotly_chart(create_rating_box_chart(df_tv_series, 'Rating Distribution - TV Series'), use_container_width=True) | |
| st.plotly_chart(create_best_genres_line_chart(df_tv_series, 'Best Genres Over the Years - TV Series'), use_container_width=True) |