dylan-plummer's picture
color by rank
eec4d7a
raw
history blame
7.28 kB
from google.analytics.data_v1beta import BetaAnalyticsDataClient
from google.analytics.data_v1beta.types import (
DateRange,
Dimension,
Metric,
RunReportRequest,
RunRealtimeReportRequest
)
import gradio as gr
import os
import json
import time
import pandas as pd
import plotly.express as px
FINISHED_EXERCISE = 'finished_exercise'
PROPERTY_ID = "384068977"
try:
credentials_json = os.environ['GOOGLE_APPLICATION_CREDENTIALS_JSON']
credentials_dict = json.loads(credentials_json)
# write json to file
with open('credentials.json', 'w') as f:
json.dump(credentials_dict, f)
# set env var to filename
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = os.path.join(os.path.dirname(__file__), 'credentials.json')
except KeyError: # running locally
pass
iso = pd.read_csv('iso.tsv', sep='\t')
iso['Alpha-2 code'] = iso['Alpha-2 code'].str.strip()
iso['Alpha-3 code'] = iso['Alpha-3 code'].str.strip()
iso.set_index('Alpha-2 code', inplace=True)
alpha_2_map = iso['Alpha-3 code'].to_dict()
# read counties json file
with open('counties.json') as f:
counties = json.load(f)
cities = pd.read_csv('uscities.csv')
cities.set_index('city', inplace=True)
city_county_map = cities['county_fips'].to_dict()
city_county_name_map = cities['county_name'].to_dict()
cached_report = None
cache_time = 0
reload_cache = False
# 6 hours
reload_every = 6 * 60 * 60
def full_report():
global cached_report, cache_time, reload_cache
if time.time() - cache_time > reload_every:
reload_cache = False
if not reload_cache:
print("Loading report...")
reload_cache = True
cache_time = time.time()
client = BetaAnalyticsDataClient()
request = RunReportRequest(
property=f"properties/{PROPERTY_ID}",
dimensions=[Dimension(name="nthDay"),
Dimension(name='eventName'),
Dimension(name="continent"),
Dimension(name="country"),
Dimension(name="countryId"),
Dimension(name="city")],
metrics=[Metric(name="eventValue")],
#return_property_quota=True,
date_ranges=[DateRange(start_date="2023-06-30", end_date="today")],
)
response = client.run_report(request)
res = {'day': [], 'jumps': [], 'continent': [], 'country': [], 'iso': [], 'city': []}
for row in response.rows:
event_name = row.dimension_values[1].value
if event_name == FINISHED_EXERCISE:
day = int(row.dimension_values[0].value)
continent = row.dimension_values[2].value
country = row.dimension_values[3].value
country_iso = row.dimension_values[4].value
city = row.dimension_values[5].value
event_value = float(row.metric_values[0].value)
res['day'].append(day)
res['jumps'].append(event_value)
res['continent'].append(continent)
res['country'].append(country)
res['iso'].append(country_iso)
res['city'].append(city)
df = pd.DataFrame.from_dict(res)
cached_report = df.copy(deep=True)
else:
print("Using cached report...")
df = cached_report.copy(deep=True)
total_jumps = int(df['jumps'].sum())
print(f"Total jumps: {total_jumps}")
df['iso'] = df['iso'].map(alpha_2_map)
country_df = df.groupby(['country', 'iso']).sum().reset_index()
country_df = country_df.sort_values(by=['jumps'], ascending=False)
country_avg = df.groupby(['country', 'iso']).mean().reset_index()
country_avg = country_avg.sort_values(by=['jumps'], ascending=False)
total = px.bar(country_df,
x='country', y='jumps',
title='Total Jumps by Country',
template="plotly_dark")
avg = px.bar(country_avg,
x='country', y='jumps',
title='Average Jumps per Daily Session',
template="plotly_dark")
country_df['rank'] = country_df['jumps'].rank(ascending=False)
total_map = px.choropleth(country_df, locations="iso",
color="rank",
hover_name="country", # column to add to hover information
color_continuous_scale ="YlOrRd",
projection='natural earth',
template="plotly_dark")
# remove the legend
total_map.update_layout(showlegend=False)
total_map.update(layout_coloraxis_showscale=False)
county_df = df.copy()
county_df['county'] = county_df['city'].map(city_county_map)
county_df['count_name'] = county_df['city'].map(city_county_name_map)
county_df = county_df.groupby(['county', 'count_name']).sum().reset_index()
county_df['rank'] = county_df['jumps'].rank(ascending=False)
county_map = px.choropleth(county_df, geojson=counties, locations='county', color='rank',
color_continuous_scale="YlOrRd",
scope="usa",
hover_name="count_name",
template="plotly_dark"
)
county_map.update_layout(showlegend=False)
county_map.update(layout_coloraxis_showscale=False)
df = df.groupby(['day', 'continent']).sum().reset_index()
df = df.sort_values(by=['day'])
df['total_jumps'] = df.groupby('continent')['jumps'].cumsum()
jumps_over_time = px.line(df, x='day',
y='total_jumps',
color='continent',
template="plotly_dark")
daily_df = df.groupby(['day']).sum().reset_index()
per_day_plot = px.scatter(daily_df, x='day', y='jumps',
trendline='rolling',
trendline_options=dict(window=7),
trendline_color_override="goldenrod",
trendline_scope='overall',
template="plotly_dark")
return f"# Total Jumps: {total_jumps:,}", total, avg, total_map, jumps_over_time, county_map, per_day_plot
with gr.Blocks(theme='WeixuanYuan/Soft_dark') as demo:
with gr.Row():
total_jumps_label = gr.Markdown("Total Jumps: 0")
with gr.Row():
map_fig = gr.Plot(label="Map")
with gr.Row():
county_map = gr.Plot(label="US Map")
with gr.Row():
per_day_plot = gr.Plot(label="Jumps per Day")
with gr.Row():
jumps_over_time = gr.Plot(label="Jumps Over Time")
with gr.Row():
total_plot = gr.Plot(label="Total Jumps")
with gr.Row():
avg_plot = gr.Plot(label="Average Jumps per Day")
outputs = [total_jumps_label, total_plot, avg_plot, map_fig, jumps_over_time, county_map, per_day_plot]
dep = demo.load(full_report, None, outputs)
if __name__ == "__main__":
demo.launch(share=False)