from google.analytics.data_v1beta import BetaAnalyticsDataClient from google.analytics.data_v1beta.types import ( DateRange, Dimension, Metric, RunReportRequest, RunRealtimeReportRequest ) import gradio as gr import os import json import time import math import numpy as np import pandas as pd import matplotlib.pyplot as plt import plotly.express as px FINISHED_EXERCISE = 'finished_exercise' PROPERTY_ID = "384068977" try: credentials_json = os.environ['GOOGLE_APPLICATION_CREDENTIALS_JSON'] credentials_dict = json.loads(credentials_json) # write json to file with open('credentials.json', 'w') as f: json.dump(credentials_dict, f) # set env var to filename os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = os.path.join(os.path.dirname(__file__), 'credentials.json') except KeyError: # running locally pass except Exception as e: print(f"Error loading credentials: {e}") pass iso = pd.read_csv('iso.tsv', sep='\t') iso['Alpha-2 code'] = iso['Alpha-2 code'].str.strip() iso['Alpha-3 code'] = iso['Alpha-3 code'].str.strip() iso.set_index('Alpha-2 code', inplace=True) alpha_2_map = iso['Alpha-3 code'].to_dict() # read counties json file with open('counties.json') as f: counties = json.load(f) ga_cities = pd.read_csv('cities.csv') cities = pd.read_csv('uscities.csv') cities['full_city'] = cities['city'] + ', ' + cities['state_name'] cities.set_index('full_city', inplace=True) ga_cities['Criteria ID'] = ga_cities['Criteria ID'].astype(str) ga_cities.set_index('Criteria ID', inplace=True) ga_city_map = ga_cities['Name'].to_dict() ga_cities['state'] = ga_cities['Canonical Name'].str.split(',').str[1].str.strip() ga_state_map = ga_cities['state'].to_dict() city_county_map = cities['county_fips'].to_dict() city_county_name_map = cities['county_name'].to_dict() cached_report = None cache_time = 0 reload_cache = False # 6 hours reload_every = 6 * 60 * 60 def mpl_to_plotly(cmap, pl_entries=11, rdigits=2): # cmap - colormap # pl_entries - int = number of Plotly colorscale entries # rdigits - int -=number of digits for rounding scale values scale = np.linspace(0, 1, pl_entries) colors = (cmap(scale)[:, :3]*255).astype(np.uint8) pl_colorscale = [[round(s, rdigits), f'rgb{tuple(color)}'] for s, color in zip(scale, colors)] return pl_colorscale def full_report(): global cached_report, cache_time, reload_cache if time.time() - cache_time > reload_every: reload_cache = False if not reload_cache: print("Loading report...") reload_cache = True cache_time = time.time() client = BetaAnalyticsDataClient() # first request all data where we have the exercise name request = RunReportRequest( property=f"properties/{PROPERTY_ID}", dimensions=[Dimension(name="nthDay"), Dimension(name='eventName'), Dimension(name="continent"), Dimension(name="country"), Dimension(name="countryId"), Dimension(name="cityId"), Dimension(name="customEvent:exercise")], metrics=[Metric(name="eventValue")], #return_property_quota=True, date_ranges=[DateRange(start_date="2023-06-30", end_date="today")], ) response = client.run_report(request) res = {'day': [], 'jumps': [], 'continent': [], 'country': [], 'iso': [], 'cityId': [], 'exercise': []} for row in response.rows: event_name = row.dimension_values[1].value if event_name == FINISHED_EXERCISE: day = int(row.dimension_values[0].value) continent = row.dimension_values[2].value country = row.dimension_values[3].value country_iso = row.dimension_values[4].value city = row.dimension_values[5].value exercise = row.dimension_values[6].value event_value = float(row.metric_values[0].value) res['day'].append(day) res['jumps'].append(event_value) res['continent'].append(continent) res['country'].append(country) res['iso'].append(country_iso) res['cityId'].append(city) res['exercise'].append(exercise) df = pd.DataFrame.from_dict(res) # then find the earliest day we started getting exercise name data first_day = int(df['day'].min()) end_date = pd.to_datetime('2023-06-30') + pd.DateOffset(days=first_day) # only need YYY-MM-DD end_date = str(end_date.strftime('%Y-%m-%d')) # then request all data where we don't have the exercise name request = RunReportRequest( property=f"properties/{PROPERTY_ID}", dimensions=[Dimension(name="nthDay"), Dimension(name='eventName'), Dimension(name="continent"), Dimension(name="country"), Dimension(name="countryId"), Dimension(name="cityId")], metrics=[Metric(name="eventValue")], #return_property_quota=True, date_ranges=[DateRange(start_date="2023-06-30", end_date=end_date)], ) response = client.run_report(request) res = {'day': [], 'jumps': [], 'continent': [], 'country': [], 'iso': [], 'cityId': [], 'exercise': []} for row in response.rows: event_name = row.dimension_values[1].value if event_name == FINISHED_EXERCISE: day = int(row.dimension_values[0].value) continent = row.dimension_values[2].value country = row.dimension_values[3].value country_iso = row.dimension_values[4].value city = row.dimension_values[5].value event_value = float(row.metric_values[0].value) res['day'].append(day) res['jumps'].append(event_value) res['continent'].append(continent) res['country'].append(country) res['iso'].append(country_iso) res['cityId'].append(city) res['exercise'].append('n/a') new_df = pd.DataFrame.from_dict(res) # drop any rows we already have #new_df = new_df[new_df['day'] < first_day] df = pd.concat([df, new_df]).reset_index(drop=True) df['duration'] = df['exercise'].apply(lambda x: 0 if x in ['n/a', '(not set)'] else int(x[2:])) print(df['duration'].sum()) cached_report = df.copy(deep=True) else: print("Using cached report...") df = cached_report.copy(deep=True) total_jumps = int(df['jumps'].sum()) unique_countries = df['country'].nunique() unique_cities = df['cityId'].nunique() print(f"Total jumps: {total_jumps}, unique countries: {unique_countries}, unique cities: {unique_cities}") df['iso'] = df['iso'].map(alpha_2_map) df['jumps'] = df['jumps'].astype(int) df['city'] = df['cityId'].map(ga_city_map) df['state'] = df['cityId'].map(ga_state_map) df['city'] = df.apply(lambda row: row['city'] if row['country'] != 'Bermuda' else 'Hamilton', axis=1) df['city'] = df['city'] + ', ' + df['state'] country_df = df.groupby(['country', 'iso']).sum().reset_index() country_df = country_df.sort_values(by=['jumps'], ascending=False) top_10_countries = country_df.iloc[:15]['country'].tolist() country_df_to_plot = df.groupby(['country', 'iso']).sum().reset_index() country_df_to_plot = country_df_to_plot[country_df_to_plot['country'].isin(top_10_countries)].reset_index(drop=True) country_df_to_plot = country_df_to_plot.sort_values(by=['jumps'], ascending=True) df['rank'] = df['jumps'].rank(ascending=False) df['world'] = 'Earth' exercise_df = df[~df['exercise'].isin(['n/a', '(not set)'])] # plot a bar graph of the most popular exercises and their counts in the dataset top_6_events = exercise_df['exercise'].value_counts().reset_index()[:6] pop_events = px.bar(top_6_events, y=top_6_events.index, x='exercise', color=top_6_events.index, title='Most Popular Exercises', template="plotly_dark") pop_events.update_layout(showlegend=False) total = px.bar(country_df_to_plot, y='country', x='jumps', color='country', title='Total Jumps by Country', orientation='h', category_orders={'country': top_10_countries}, height=800, template="plotly_dark") total.update_layout(showlegend=False) country_df_to_plot_weekly = df[df['day'] >= df['day'].max() - 7].groupby(['country', 'iso']).sum().reset_index() country_df_to_plot_weekly = country_df_to_plot_weekly.sort_values(by=['jumps'], ascending=False) top_5_weekly = country_df_to_plot_weekly.iloc[:10]['country'].tolist() country_df_to_plot_weekly = country_df_to_plot_weekly[country_df_to_plot_weekly['country'].isin(top_5_weekly)].reset_index(drop=True) country_df_to_plot_weekly = country_df_to_plot_weekly.sort_values(by=['jumps'], ascending=True) total_weekly = px.bar(country_df_to_plot_weekly, y='country', x='jumps', color='country', title='Top Countries This Week', orientation='h', category_orders={'country': top_5_weekly}, height=500, template="plotly_dark") total_weekly.update_layout(showlegend=False) city_df = df.groupby(['city', 'iso']).sum().reset_index() city_df = city_df.sort_values(by=['jumps'], ascending=False) city_df['city'] = city_df.apply(lambda row: row['city'] + ', ' + row['iso'], axis=1) top_10_cities = city_df.iloc[:15]['city'].tolist() icicle_df = df.groupby(['world', 'continent', 'country', 'state', 'city']).sum().reset_index() #icicle_df['log10_jumps'] = icicle_df['jumps'].apply(lambda x: math.log10(x) if x > 0 else 0) # icicle = px.icicle(icicle_df, path=['world', 'continent', 'country', 'city'], # values='jumps', # title='Jumps by Continent/Country', # template="plotly_dark", # color_continuous_scale='OrRd', # maxdepth=7, # branchvalues='remainder', # color='jumps') print(df.columns) nipy_spec = mpl_to_plotly(plt.cm.nipy_spectral, pl_entries=15) icicle = px.treemap(icicle_df, path=['world', 'continent', 'country', 'state', 'city'], values='jumps', title='Jumps by Continent/Country/City (click anywhere!)', template="plotly_dark", color_continuous_scale='jet', range_color=[0, np.quantile(icicle_df['jumps'].values, q=0.99)], branchvalues='total', height=800, maxdepth=4, color='jumps') city_df = df.groupby(['city', 'iso']).sum().reset_index() city_df = city_df[city_df['city'] != '(not set)'] city_df['city'] = city_df.apply(lambda row: row['city'] + ', ' + row['iso'], axis=1) city_df = city_df[city_df['city'].isin(top_10_cities)].reset_index(drop=True) city_df = city_df.sort_values(by=['jumps'], ascending=True) avg = px.bar(city_df, y='city', x='jumps', color='city', title='Total Jumps by City', orientation='h', category_orders={'city': top_10_cities}, height=800, template="plotly_dark") city_df_weekly = df[df['day'] >= df['day'].max() - 7].groupby(['city', 'iso']).sum().reset_index() city_df_weekly = city_df_weekly[city_df_weekly['city'] != '(not set)'] city_df_weekly['city'] = city_df_weekly.apply(lambda row: row['city'] + ', ' + row['iso'], axis=1) city_df_weekly = city_df_weekly.sort_values(by=['jumps'], ascending=False) top_5_weekly = city_df_weekly.iloc[:10]['city'].tolist() city_df_weekly = city_df_weekly[city_df_weekly['city'].isin(top_5_weekly)].reset_index(drop=True) city_df_weekly = city_df_weekly.sort_values(by=['jumps'], ascending=True) avg_weekly = px.bar(city_df_weekly, y='city', x='jumps', color='city', title='Top Cities This Week', orientation='h', category_orders={'city': top_5_weekly}, height=500, template="plotly_dark") avg.update_layout(showlegend=False) avg.update(layout_coloraxis_showscale=False) avg_weekly.update_layout(showlegend=False) avg_weekly.update(layout_coloraxis_showscale=False) country_df['rank'] = country_df['jumps'].rank(ascending=False) total_map = px.choropleth(country_df, locations="iso", color="rank", hover_name="country", # column to add to hover information hover_data=["jumps"], color_continuous_scale ="OrRd_r", projection='natural earth', template="plotly_dark") # remove the legend total_map.update_layout(showlegend=False) total_map.update(layout_coloraxis_showscale=False) county_df = df.copy() county_df['county'] = county_df['city'].map(city_county_map) county_df['count_name'] = county_df['city'].map(city_county_name_map) county_df = county_df.groupby(['county', 'count_name']).sum().reset_index() county_df['rank'] = county_df['jumps'].rank(ascending=False) county_df['county'] = county_df['county'].astype(int) county_df['county'] = county_df['county'].astype(str).str.zfill(5) # county codes are two digits for state, three for county county_map = px.choropleth(county_df, geojson=counties, locations='county', color='rank', color_continuous_scale="OrRd_r", scope="usa", hover_name="count_name", hover_data=["jumps"], template="plotly_dark" ) county_map.update_layout(showlegend=False) county_map.update(layout_coloraxis_showscale=False) df = df.groupby(['day', 'continent']).sum().reset_index() df = df.sort_values(by=['day']) df['total_jumps'] = df.groupby('continent')['jumps'].cumsum() # fill in any missing days with current max value for day in range(1, int(df['day'].max()) + 1): for continent in df['continent'].unique(): if not df[(df['day'] == day) & (df['continent'] == continent)].any().any(): max_jumps = df[(df['day'] < day) & (df['continent'] == continent)]['total_jumps'].max() df = pd.concat([df, pd.DataFrame([[day, continent, max_jumps]], columns=['day', 'continent', 'total_jumps'])]) #df = df.append({'day': day, 'continent': continent, 'total_jumps': max_jumps}, ignore_index=True) df = df.sort_values(by=['day']).reset_index(drop=True) jumps_over_time = px.area(df, x='day', y='total_jumps', color='continent', template="plotly_dark") df.fillna(0, inplace=True) daily_df = df.groupby(['day'])[['jumps']].sum().reset_index() per_day_plot = px.scatter(daily_df, x='day', y='jumps', trendline='rolling', trendline_options=dict(window=14), trendline_color_override="goldenrod", trendline_scope='overall', template="plotly_dark") return f"# {total_jumps:,} total jumps in {unique_cities:,} cities across {unique_countries:,} countries", \ total, total_weekly, avg, avg_weekly, total_map, icicle, jumps_over_time, pop_events, county_map, per_day_plot with gr.Blocks() as demo: with gr.Row(): total_jumps_label = gr.Markdown("Total Jumps: 0") with gr.Row(): map_fig = gr.Plot(label="Map") with gr.Row(): jumps_over_time = gr.Plot(label="Jumps Over Time") with gr.Row(): total_plot = gr.Plot(label="Top Countries (All Time)") with gr.Row(): total_plot_weekly = gr.Plot(label="Top Countries (This Week)") with gr.Row(): avg_plot = gr.Plot(label="Top Cities (All Time)") with gr.Row(): avg_plot_weekly = gr.Plot(label="Top Cities (This Week)") with gr.Row(): icicle_fig = gr.Plot(label="Treemap") with gr.Row(): per_day_plot = gr.Plot(label="Jumps per Day") with gr.Row(): county_map = gr.Plot(label="US Map") with gr.Row(): popular_events = gr.Plot(label="Popular Events") outputs = [total_jumps_label, total_plot, total_plot_weekly, avg_plot, avg_plot_weekly, map_fig, icicle_fig, jumps_over_time, popular_events, county_map, per_day_plot] dep = demo.load(full_report, None, outputs) if __name__ == "__main__": demo.launch(share=False)