Spaces:
Running
Running
| from google.analytics.data_v1beta import BetaAnalyticsDataClient | |
| from google.analytics.data_v1beta.types import ( | |
| DateRange, | |
| Dimension, | |
| Metric, | |
| RunReportRequest, | |
| RunRealtimeReportRequest | |
| ) | |
| import gradio as gr | |
| import os | |
| import json | |
| import time | |
| import pandas as pd | |
| import plotly.express as px | |
| FINISHED_EXERCISE = 'finished_exercise' | |
| PROPERTY_ID = "384068977" | |
| try: | |
| credentials_json = os.environ['GOOGLE_APPLICATION_CREDENTIALS_JSON'] | |
| credentials_dict = json.loads(credentials_json) | |
| # write json to file | |
| with open('credentials.json', 'w') as f: | |
| json.dump(credentials_dict, f) | |
| # set env var to filename | |
| os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = os.path.join(os.path.dirname(__file__), 'credentials.json') | |
| except KeyError: # running locally | |
| pass | |
| iso = pd.read_csv('iso.tsv', sep='\t') | |
| iso['Alpha-2 code'] = iso['Alpha-2 code'].str.strip() | |
| iso['Alpha-3 code'] = iso['Alpha-3 code'].str.strip() | |
| iso.set_index('Alpha-2 code', inplace=True) | |
| alpha_2_map = iso['Alpha-3 code'].to_dict() | |
| # read counties json file | |
| with open('counties.json') as f: | |
| counties = json.load(f) | |
| ga_cities = pd.read_csv('cities.csv') | |
| cities = pd.read_csv('uscities.csv') | |
| cities['full_city'] = cities['city'] + ', ' + cities['state_name'] | |
| cities.set_index('full_city', inplace=True) | |
| ga_cities['Criteria ID'] = ga_cities['Criteria ID'].astype(str) | |
| ga_cities.set_index('Criteria ID', inplace=True) | |
| ga_city_map = ga_cities['Name'].to_dict() | |
| ga_cities['state'] = ga_cities['Canonical Name'].str.split(',').str[1].str.strip() | |
| ga_state_map = ga_cities['state'].to_dict() | |
| city_county_map = cities['county_fips'].to_dict() | |
| city_county_name_map = cities['county_name'].to_dict() | |
| cached_report = None | |
| cache_time = 0 | |
| reload_cache = False | |
| # 6 hours | |
| reload_every = 6 * 60 * 60 | |
| def full_report(): | |
| global cached_report, cache_time, reload_cache | |
| if time.time() - cache_time > reload_every: | |
| reload_cache = False | |
| if not reload_cache: | |
| print("Loading report...") | |
| reload_cache = True | |
| cache_time = time.time() | |
| client = BetaAnalyticsDataClient() | |
| request = RunReportRequest( | |
| property=f"properties/{PROPERTY_ID}", | |
| dimensions=[Dimension(name="nthDay"), | |
| Dimension(name='eventName'), | |
| Dimension(name="continent"), | |
| Dimension(name="country"), | |
| Dimension(name="countryId"), | |
| Dimension(name="cityId")], | |
| metrics=[Metric(name="eventValue")], | |
| #return_property_quota=True, | |
| date_ranges=[DateRange(start_date="2023-06-30", end_date="today")], | |
| ) | |
| response = client.run_report(request) | |
| res = {'day': [], 'jumps': [], 'continent': [], 'country': [], 'iso': [], 'cityId': []} | |
| for row in response.rows: | |
| event_name = row.dimension_values[1].value | |
| if event_name == FINISHED_EXERCISE: | |
| day = int(row.dimension_values[0].value) | |
| continent = row.dimension_values[2].value | |
| country = row.dimension_values[3].value | |
| country_iso = row.dimension_values[4].value | |
| city = row.dimension_values[5].value | |
| event_value = float(row.metric_values[0].value) | |
| res['day'].append(day) | |
| res['jumps'].append(event_value) | |
| res['continent'].append(continent) | |
| res['country'].append(country) | |
| res['iso'].append(country_iso) | |
| res['cityId'].append(city) | |
| df = pd.DataFrame.from_dict(res) | |
| cached_report = df.copy(deep=True) | |
| else: | |
| print("Using cached report...") | |
| df = cached_report.copy(deep=True) | |
| total_jumps = int(df['jumps'].sum()) | |
| unique_countries = df['country'].nunique() | |
| unique_cities = df['cityId'].nunique() | |
| print(f"Total jumps: {total_jumps}, unique countries: {unique_countries}, unique cities: {unique_cities}") | |
| df['iso'] = df['iso'].map(alpha_2_map) | |
| df['jumps'] = df['jumps'].astype(int) | |
| df['city'] = df['cityId'].map(ga_city_map) | |
| df['state'] = df['cityId'].map(ga_state_map) | |
| df['city'] = df.apply(lambda row: row['city'] if row['country'] != 'Bermuda' else 'Hamilton', axis=1) | |
| df['city'] = df['city'] + ', ' + df['state'] | |
| country_df = df.groupby(['country', 'iso']).sum().reset_index() | |
| country_df = country_df.sort_values(by=['jumps'], ascending=False) | |
| top_10_countries = country_df.iloc[:10]['country'].tolist() | |
| country_df_to_plot = df.groupby(['country', 'iso', 'city']).sum().reset_index() | |
| country_df_to_plot = country_df_to_plot[country_df_to_plot['country'].isin(top_10_countries)].reset_index(drop=True) | |
| country_df_to_plot = country_df_to_plot.sort_values(by=['day', 'jumps'], ascending=True) | |
| total = px.bar(country_df_to_plot, | |
| y='country', x='jumps', | |
| color='city', | |
| title='Total Jumps by Country/City', | |
| orientation='h', | |
| category_orders={'country': top_10_countries}, | |
| template="plotly_dark") | |
| total.update_layout(showlegend=False) | |
| city_df = df.groupby(['city', 'iso']).sum().reset_index() | |
| city_df = city_df.sort_values(by=['jumps'], ascending=False) | |
| city_df['city'] = city_df.apply(lambda row: row['city'] + ', ' + row['iso'], axis=1) | |
| top_10_cities = city_df.iloc[:10]['city'].tolist() | |
| city_df = df.groupby(['city', 'iso', 'day']).sum().reset_index() | |
| city_df = city_df[city_df['city'] != '(not set)'] | |
| city_df['city'] = city_df.apply(lambda row: row['city'] + ', ' + row['iso'], axis=1) | |
| city_df = city_df[city_df['city'].isin(top_10_cities)].reset_index(drop=True) | |
| city_df = city_df.sort_values(by=['day', 'jumps'], ascending=True) | |
| avg = px.bar(city_df, | |
| y='city', x='jumps', color='day', | |
| title='Total Jumps by City/Day', | |
| orientation='h', | |
| category_orders={'city': top_10_cities}, | |
| template="plotly_dark") | |
| avg.update_layout(showlegend=False) | |
| avg.update(layout_coloraxis_showscale=False) | |
| country_df['rank'] = country_df['jumps'].rank(ascending=False) | |
| total_map = px.choropleth(country_df, locations="iso", | |
| color="rank", | |
| hover_name="country", # column to add to hover information | |
| hover_data=["jumps"], | |
| color_continuous_scale ="OrRd_r", | |
| projection='natural earth', | |
| template="plotly_dark") | |
| # remove the legend | |
| total_map.update_layout(showlegend=False) | |
| total_map.update(layout_coloraxis_showscale=False) | |
| county_df = df.copy() | |
| county_df['county'] = county_df['city'].map(city_county_map) | |
| county_df['count_name'] = county_df['city'].map(city_county_name_map) | |
| county_df = county_df.groupby(['county', 'count_name']).sum().reset_index() | |
| county_df['rank'] = county_df['jumps'].rank(ascending=False) | |
| county_df['county'] = county_df['county'].astype(int) | |
| county_df['county'] = county_df['county'].astype(str).str.zfill(5) # county codes are two digits for state, three for county | |
| county_map = px.choropleth(county_df, geojson=counties, locations='county', color='rank', | |
| color_continuous_scale="OrRd_r", | |
| scope="usa", | |
| hover_name="count_name", | |
| hover_data=["jumps"], | |
| template="plotly_dark" | |
| ) | |
| county_map.update_layout(showlegend=False) | |
| county_map.update(layout_coloraxis_showscale=False) | |
| df = df.groupby(['day', 'continent']).sum().reset_index() | |
| df = df.sort_values(by=['day']) | |
| df['total_jumps'] = df.groupby('continent')['jumps'].cumsum() | |
| # fill in any missing days with current max value | |
| for day in range(1, df['day'].max() + 1): | |
| for continent in df['continent'].unique(): | |
| if not df[(df['day'] == day) & (df['continent'] == continent)].any().any(): | |
| max_jumps = df[(df['day'] < day) & (df['continent'] == continent)]['total_jumps'].max() | |
| df = pd.concat([df, pd.DataFrame([[day, continent, max_jumps]], columns=['day', 'continent', 'total_jumps'])]) | |
| #df = df.append({'day': day, 'continent': continent, 'total_jumps': max_jumps}, ignore_index=True) | |
| df = df.sort_values(by=['day']).reset_index(drop=True) | |
| jumps_over_time = px.area(df, x='day', | |
| y='total_jumps', | |
| color='continent', | |
| template="plotly_dark") | |
| daily_df = df.groupby(['day']).sum().reset_index() | |
| per_day_plot = px.scatter(daily_df, x='day', y='jumps', | |
| trendline='rolling', | |
| trendline_options=dict(window=14), | |
| trendline_color_override="goldenrod", | |
| trendline_scope='overall', | |
| template="plotly_dark") | |
| return f"# {total_jumps:,} total jumps in {unique_cities:,} cities across {unique_countries:,} countries", total, avg, total_map, jumps_over_time, county_map, per_day_plot | |
| with gr.Blocks(theme='WeixuanYuan/Soft_dark') as demo: | |
| with gr.Row(): | |
| total_jumps_label = gr.Markdown("Total Jumps: 0") | |
| with gr.Row(): | |
| map_fig = gr.Plot(label="Map") | |
| with gr.Row(): | |
| jumps_over_time = gr.Plot(label="Jumps Over Time") | |
| with gr.Row(): | |
| total_plot = gr.Plot(label="Top 10 Countries") | |
| with gr.Row(): | |
| avg_plot = gr.Plot(label="Top 10 Cities") | |
| with gr.Row(): | |
| per_day_plot = gr.Plot(label="Jumps per Day") | |
| with gr.Row(): | |
| county_map = gr.Plot(label="US Map") | |
| outputs = [total_jumps_label, total_plot, avg_plot, map_fig, jumps_over_time, county_map, per_day_plot] | |
| dep = demo.load(full_report, None, outputs) | |
| if __name__ == "__main__": | |
| demo.launch(share=False) |