Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,373 +1,374 @@
|
|
| 1 |
-
from google.analytics.data_v1beta import BetaAnalyticsDataClient
|
| 2 |
-
from google.analytics.data_v1beta.types import (
|
| 3 |
-
DateRange,
|
| 4 |
-
Dimension,
|
| 5 |
-
Metric,
|
| 6 |
-
RunReportRequest,
|
| 7 |
-
RunRealtimeReportRequest
|
| 8 |
-
)
|
| 9 |
-
|
| 10 |
-
import gradio as gr
|
| 11 |
-
import os
|
| 12 |
-
import json
|
| 13 |
-
import time
|
| 14 |
-
import math
|
| 15 |
-
import numpy as np
|
| 16 |
-
import pandas as pd
|
| 17 |
-
import matplotlib.pyplot as plt
|
| 18 |
-
import plotly.express as px
|
| 19 |
-
|
| 20 |
-
FINISHED_EXERCISE = 'finished_exercise'
|
| 21 |
-
PROPERTY_ID = "384068977"
|
| 22 |
-
|
| 23 |
-
try:
|
| 24 |
-
credentials_json = os.environ['GOOGLE_APPLICATION_CREDENTIALS_JSON']
|
| 25 |
-
credentials_dict = json.loads(credentials_json)
|
| 26 |
-
# write json to file
|
| 27 |
-
with open('credentials.json', 'w') as f:
|
| 28 |
-
json.dump(credentials_dict, f)
|
| 29 |
-
# set env var to filename
|
| 30 |
-
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = os.path.join(os.path.dirname(__file__), 'credentials.json')
|
| 31 |
-
except KeyError: # running locally
|
| 32 |
-
pass
|
| 33 |
-
except Exception as e:
|
| 34 |
-
print(f"Error loading credentials: {e}")
|
| 35 |
-
pass
|
| 36 |
-
|
| 37 |
-
iso = pd.read_csv('iso.tsv', sep='\t')
|
| 38 |
-
iso['Alpha-2 code'] = iso['Alpha-2 code'].str.strip()
|
| 39 |
-
iso['Alpha-3 code'] = iso['Alpha-3 code'].str.strip()
|
| 40 |
-
iso.set_index('Alpha-2 code', inplace=True)
|
| 41 |
-
alpha_2_map = iso['Alpha-3 code'].to_dict()
|
| 42 |
-
|
| 43 |
-
# read counties json file
|
| 44 |
-
with open('counties.json') as f:
|
| 45 |
-
counties = json.load(f)
|
| 46 |
-
|
| 47 |
-
ga_cities = pd.read_csv('cities.csv')
|
| 48 |
-
cities = pd.read_csv('uscities.csv')
|
| 49 |
-
cities['full_city'] = cities['city'] + ', ' + cities['state_name']
|
| 50 |
-
cities.set_index('full_city', inplace=True)
|
| 51 |
-
ga_cities['Criteria ID'] = ga_cities['Criteria ID'].astype(str)
|
| 52 |
-
ga_cities.set_index('Criteria ID', inplace=True)
|
| 53 |
-
ga_city_map = ga_cities['Name'].to_dict()
|
| 54 |
-
ga_cities['state'] = ga_cities['Canonical Name'].str.split(',').str[1].str.strip()
|
| 55 |
-
ga_state_map = ga_cities['state'].to_dict()
|
| 56 |
-
city_county_map = cities['county_fips'].to_dict()
|
| 57 |
-
city_county_name_map = cities['county_name'].to_dict()
|
| 58 |
-
|
| 59 |
-
cached_report = None
|
| 60 |
-
cache_time = 0
|
| 61 |
-
reload_cache = False
|
| 62 |
-
# 6 hours
|
| 63 |
-
reload_every = 6 * 60 * 60
|
| 64 |
-
|
| 65 |
-
def mpl_to_plotly(cmap, pl_entries=11, rdigits=2):
|
| 66 |
-
# cmap - colormap
|
| 67 |
-
# pl_entries - int = number of Plotly colorscale entries
|
| 68 |
-
# rdigits - int -=number of digits for rounding scale values
|
| 69 |
-
scale = np.linspace(0, 1, pl_entries)
|
| 70 |
-
colors = (cmap(scale)[:, :3]*255).astype(np.uint8)
|
| 71 |
-
pl_colorscale = [[round(s, rdigits), f'rgb{tuple(color)}'] for s, color in zip(scale, colors)]
|
| 72 |
-
return pl_colorscale
|
| 73 |
-
|
| 74 |
-
def full_report():
|
| 75 |
-
global cached_report, cache_time, reload_cache
|
| 76 |
-
if time.time() - cache_time > reload_every:
|
| 77 |
-
reload_cache = False
|
| 78 |
-
if not reload_cache:
|
| 79 |
-
print("Loading report...")
|
| 80 |
-
reload_cache = True
|
| 81 |
-
cache_time = time.time()
|
| 82 |
-
client = BetaAnalyticsDataClient()
|
| 83 |
-
|
| 84 |
-
# first request all data where we have the exercise name
|
| 85 |
-
request = RunReportRequest(
|
| 86 |
-
property=f"properties/{PROPERTY_ID}",
|
| 87 |
-
dimensions=[Dimension(name="nthDay"),
|
| 88 |
-
Dimension(name='eventName'),
|
| 89 |
-
Dimension(name="continent"),
|
| 90 |
-
Dimension(name="country"),
|
| 91 |
-
Dimension(name="countryId"),
|
| 92 |
-
Dimension(name="cityId"),
|
| 93 |
-
Dimension(name="customEvent:exercise")],
|
| 94 |
-
metrics=[Metric(name="eventValue")],
|
| 95 |
-
#return_property_quota=True,
|
| 96 |
-
date_ranges=[DateRange(start_date="2023-06-30", end_date="today")],
|
| 97 |
-
)
|
| 98 |
-
response = client.run_report(request)
|
| 99 |
-
|
| 100 |
-
res = {'day': [], 'jumps': [], 'continent': [], 'country': [], 'iso': [], 'cityId': [], 'exercise': []}
|
| 101 |
-
|
| 102 |
-
for row in response.rows:
|
| 103 |
-
event_name = row.dimension_values[1].value
|
| 104 |
-
if event_name == FINISHED_EXERCISE:
|
| 105 |
-
day = int(row.dimension_values[0].value)
|
| 106 |
-
continent = row.dimension_values[2].value
|
| 107 |
-
country = row.dimension_values[3].value
|
| 108 |
-
country_iso = row.dimension_values[4].value
|
| 109 |
-
city = row.dimension_values[5].value
|
| 110 |
-
exercise = row.dimension_values[6].value
|
| 111 |
-
event_value = float(row.metric_values[0].value)
|
| 112 |
-
res['day'].append(day)
|
| 113 |
-
res['jumps'].append(event_value)
|
| 114 |
-
res['continent'].append(continent)
|
| 115 |
-
res['country'].append(country)
|
| 116 |
-
res['iso'].append(country_iso)
|
| 117 |
-
res['cityId'].append(city)
|
| 118 |
-
res['exercise'].append(exercise)
|
| 119 |
-
|
| 120 |
-
df = pd.DataFrame.from_dict(res)
|
| 121 |
-
# then find the earliest day we started getting exercise name data
|
| 122 |
-
first_day = int(df['day'].min())
|
| 123 |
-
end_date = pd.to_datetime('2023-06-30') + pd.DateOffset(days=first_day)
|
| 124 |
-
# only need YYY-MM-DD
|
| 125 |
-
end_date = str(end_date.strftime('%Y-%m-%d'))
|
| 126 |
-
# then request all data where we don't have the exercise name
|
| 127 |
-
request = RunReportRequest(
|
| 128 |
-
property=f"properties/{PROPERTY_ID}",
|
| 129 |
-
dimensions=[Dimension(name="nthDay"),
|
| 130 |
-
Dimension(name='eventName'),
|
| 131 |
-
Dimension(name="continent"),
|
| 132 |
-
Dimension(name="country"),
|
| 133 |
-
Dimension(name="countryId"),
|
| 134 |
-
Dimension(name="cityId")],
|
| 135 |
-
metrics=[Metric(name="eventValue")],
|
| 136 |
-
#return_property_quota=True,
|
| 137 |
-
date_ranges=[DateRange(start_date="2023-06-30", end_date=end_date)],
|
| 138 |
-
)
|
| 139 |
-
response = client.run_report(request)
|
| 140 |
-
res = {'day': [], 'jumps': [], 'continent': [], 'country': [], 'iso': [], 'cityId': [], 'exercise': []}
|
| 141 |
-
for row in response.rows:
|
| 142 |
-
event_name = row.dimension_values[1].value
|
| 143 |
-
if event_name == FINISHED_EXERCISE:
|
| 144 |
-
day = int(row.dimension_values[0].value)
|
| 145 |
-
continent = row.dimension_values[2].value
|
| 146 |
-
country = row.dimension_values[3].value
|
| 147 |
-
country_iso = row.dimension_values[4].value
|
| 148 |
-
city = row.dimension_values[5].value
|
| 149 |
-
event_value = float(row.metric_values[0].value)
|
| 150 |
-
res['day'].append(day)
|
| 151 |
-
res['jumps'].append(event_value)
|
| 152 |
-
res['continent'].append(continent)
|
| 153 |
-
res['country'].append(country)
|
| 154 |
-
res['iso'].append(country_iso)
|
| 155 |
-
res['cityId'].append(city)
|
| 156 |
-
res['exercise'].append('n/a')
|
| 157 |
-
new_df = pd.DataFrame.from_dict(res)
|
| 158 |
-
# drop any rows we already have
|
| 159 |
-
#new_df = new_df[new_df['day'] < first_day]
|
| 160 |
-
df = pd.concat([df, new_df]).reset_index(drop=True)
|
| 161 |
-
df['duration'] = df['exercise'].apply(lambda x: 0 if x in ['n/a', '(not set)'] else int(x[2:]))
|
| 162 |
-
print(df['duration'].sum())
|
| 163 |
-
cached_report = df.copy(deep=True)
|
| 164 |
-
else:
|
| 165 |
-
print("Using cached report...")
|
| 166 |
-
df = cached_report.copy(deep=True)
|
| 167 |
-
|
| 168 |
-
total_jumps = int(df['jumps'].sum())
|
| 169 |
-
unique_countries = df['country'].nunique()
|
| 170 |
-
unique_cities = df['cityId'].nunique()
|
| 171 |
-
|
| 172 |
-
print(f"Total jumps: {total_jumps}, unique countries: {unique_countries}, unique cities: {unique_cities}")
|
| 173 |
-
df['iso'] = df['iso'].map(alpha_2_map)
|
| 174 |
-
df['jumps'] = df['jumps'].astype(int)
|
| 175 |
-
df['city'] = df['cityId'].map(ga_city_map)
|
| 176 |
-
df['state'] = df['cityId'].map(ga_state_map)
|
| 177 |
-
df['city'] = df.apply(lambda row: row['city'] if row['country'] != 'Bermuda' else 'Hamilton', axis=1)
|
| 178 |
-
df['city'] = df['city'] + ', ' + df['state']
|
| 179 |
-
|
| 180 |
-
country_df = df.groupby(['country', 'iso']).sum().reset_index()
|
| 181 |
-
country_df = country_df.sort_values(by=['jumps'], ascending=False)
|
| 182 |
-
top_10_countries = country_df.iloc[:15]['country'].tolist()
|
| 183 |
-
|
| 184 |
-
country_df_to_plot = df.groupby(['country', 'iso']).sum().reset_index()
|
| 185 |
-
country_df_to_plot = country_df_to_plot[country_df_to_plot['country'].isin(top_10_countries)].reset_index(drop=True)
|
| 186 |
-
country_df_to_plot = country_df_to_plot.sort_values(by=['jumps'], ascending=True)
|
| 187 |
-
df['rank'] = df['jumps'].rank(ascending=False)
|
| 188 |
-
df['world'] = 'Earth'
|
| 189 |
-
|
| 190 |
-
exercise_df = df[~df['exercise'].isin(['n/a', '(not set)'])]
|
| 191 |
-
# plot a bar graph of the most popular exercises and their counts in the dataset
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
country_df_to_plot_weekly =
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
country_df_to_plot_weekly = country_df_to_plot_weekly
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
city_df =
|
| 227 |
-
city_df
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
#
|
| 235 |
-
#
|
| 236 |
-
#
|
| 237 |
-
#
|
| 238 |
-
#
|
| 239 |
-
#
|
| 240 |
-
#
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
city_df =
|
| 256 |
-
city_df
|
| 257 |
-
city_df = city_df[
|
| 258 |
-
city_df = city_df
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
city_df_weekly =
|
| 270 |
-
city_df_weekly
|
| 271 |
-
city_df_weekly = city_df_weekly.
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
city_df_weekly = city_df_weekly
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
avg.
|
| 285 |
-
|
| 286 |
-
avg_weekly.
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
total_map.
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
county_df
|
| 302 |
-
county_df['
|
| 303 |
-
county_df = county_df
|
| 304 |
-
county_df
|
| 305 |
-
county_df['
|
| 306 |
-
county_df['county'] = county_df['county'].astype(
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
county_map.
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
df = df.
|
| 319 |
-
df
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
|
| 346 |
-
|
| 347 |
-
|
| 348 |
-
|
| 349 |
-
|
| 350 |
-
|
| 351 |
-
|
| 352 |
-
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
|
| 358 |
-
|
| 359 |
-
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
|
| 372 |
-
|
|
|
|
| 373 |
demo.launch(share=False)
|
|
|
|
| 1 |
+
from google.analytics.data_v1beta import BetaAnalyticsDataClient
|
| 2 |
+
from google.analytics.data_v1beta.types import (
|
| 3 |
+
DateRange,
|
| 4 |
+
Dimension,
|
| 5 |
+
Metric,
|
| 6 |
+
RunReportRequest,
|
| 7 |
+
RunRealtimeReportRequest
|
| 8 |
+
)
|
| 9 |
+
|
| 10 |
+
import gradio as gr
|
| 11 |
+
import os
|
| 12 |
+
import json
|
| 13 |
+
import time
|
| 14 |
+
import math
|
| 15 |
+
import numpy as np
|
| 16 |
+
import pandas as pd
|
| 17 |
+
import matplotlib.pyplot as plt
|
| 18 |
+
import plotly.express as px
|
| 19 |
+
|
| 20 |
+
FINISHED_EXERCISE = 'finished_exercise'
|
| 21 |
+
PROPERTY_ID = "384068977"
|
| 22 |
+
|
| 23 |
+
try:
|
| 24 |
+
credentials_json = os.environ['GOOGLE_APPLICATION_CREDENTIALS_JSON']
|
| 25 |
+
credentials_dict = json.loads(credentials_json)
|
| 26 |
+
# write json to file
|
| 27 |
+
with open('credentials.json', 'w') as f:
|
| 28 |
+
json.dump(credentials_dict, f)
|
| 29 |
+
# set env var to filename
|
| 30 |
+
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = os.path.join(os.path.dirname(__file__), 'credentials.json')
|
| 31 |
+
except KeyError: # running locally
|
| 32 |
+
pass
|
| 33 |
+
except Exception as e:
|
| 34 |
+
print(f"Error loading credentials: {e}")
|
| 35 |
+
pass
|
| 36 |
+
|
| 37 |
+
iso = pd.read_csv('iso.tsv', sep='\t')
|
| 38 |
+
iso['Alpha-2 code'] = iso['Alpha-2 code'].str.strip()
|
| 39 |
+
iso['Alpha-3 code'] = iso['Alpha-3 code'].str.strip()
|
| 40 |
+
iso.set_index('Alpha-2 code', inplace=True)
|
| 41 |
+
alpha_2_map = iso['Alpha-3 code'].to_dict()
|
| 42 |
+
|
| 43 |
+
# read counties json file
|
| 44 |
+
with open('counties.json') as f:
|
| 45 |
+
counties = json.load(f)
|
| 46 |
+
|
| 47 |
+
ga_cities = pd.read_csv('cities.csv')
|
| 48 |
+
cities = pd.read_csv('uscities.csv')
|
| 49 |
+
cities['full_city'] = cities['city'] + ', ' + cities['state_name']
|
| 50 |
+
cities.set_index('full_city', inplace=True)
|
| 51 |
+
ga_cities['Criteria ID'] = ga_cities['Criteria ID'].astype(str)
|
| 52 |
+
ga_cities.set_index('Criteria ID', inplace=True)
|
| 53 |
+
ga_city_map = ga_cities['Name'].to_dict()
|
| 54 |
+
ga_cities['state'] = ga_cities['Canonical Name'].str.split(',').str[1].str.strip()
|
| 55 |
+
ga_state_map = ga_cities['state'].to_dict()
|
| 56 |
+
city_county_map = cities['county_fips'].to_dict()
|
| 57 |
+
city_county_name_map = cities['county_name'].to_dict()
|
| 58 |
+
|
| 59 |
+
cached_report = None
|
| 60 |
+
cache_time = 0
|
| 61 |
+
reload_cache = False
|
| 62 |
+
# 6 hours
|
| 63 |
+
reload_every = 6 * 60 * 60
|
| 64 |
+
|
| 65 |
+
def mpl_to_plotly(cmap, pl_entries=11, rdigits=2):
|
| 66 |
+
# cmap - colormap
|
| 67 |
+
# pl_entries - int = number of Plotly colorscale entries
|
| 68 |
+
# rdigits - int -=number of digits for rounding scale values
|
| 69 |
+
scale = np.linspace(0, 1, pl_entries)
|
| 70 |
+
colors = (cmap(scale)[:, :3]*255).astype(np.uint8)
|
| 71 |
+
pl_colorscale = [[round(s, rdigits), f'rgb{tuple(color)}'] for s, color in zip(scale, colors)]
|
| 72 |
+
return pl_colorscale
|
| 73 |
+
|
| 74 |
+
def full_report():
|
| 75 |
+
global cached_report, cache_time, reload_cache
|
| 76 |
+
if time.time() - cache_time > reload_every:
|
| 77 |
+
reload_cache = False
|
| 78 |
+
if not reload_cache:
|
| 79 |
+
print("Loading report...")
|
| 80 |
+
reload_cache = True
|
| 81 |
+
cache_time = time.time()
|
| 82 |
+
client = BetaAnalyticsDataClient()
|
| 83 |
+
|
| 84 |
+
# first request all data where we have the exercise name
|
| 85 |
+
request = RunReportRequest(
|
| 86 |
+
property=f"properties/{PROPERTY_ID}",
|
| 87 |
+
dimensions=[Dimension(name="nthDay"),
|
| 88 |
+
Dimension(name='eventName'),
|
| 89 |
+
Dimension(name="continent"),
|
| 90 |
+
Dimension(name="country"),
|
| 91 |
+
Dimension(name="countryId"),
|
| 92 |
+
Dimension(name="cityId"),
|
| 93 |
+
Dimension(name="customEvent:exercise")],
|
| 94 |
+
metrics=[Metric(name="eventValue")],
|
| 95 |
+
#return_property_quota=True,
|
| 96 |
+
date_ranges=[DateRange(start_date="2023-06-30", end_date="today")],
|
| 97 |
+
)
|
| 98 |
+
response = client.run_report(request)
|
| 99 |
+
|
| 100 |
+
res = {'day': [], 'jumps': [], 'continent': [], 'country': [], 'iso': [], 'cityId': [], 'exercise': []}
|
| 101 |
+
|
| 102 |
+
for row in response.rows:
|
| 103 |
+
event_name = row.dimension_values[1].value
|
| 104 |
+
if event_name == FINISHED_EXERCISE:
|
| 105 |
+
day = int(row.dimension_values[0].value)
|
| 106 |
+
continent = row.dimension_values[2].value
|
| 107 |
+
country = row.dimension_values[3].value
|
| 108 |
+
country_iso = row.dimension_values[4].value
|
| 109 |
+
city = row.dimension_values[5].value
|
| 110 |
+
exercise = row.dimension_values[6].value
|
| 111 |
+
event_value = float(row.metric_values[0].value)
|
| 112 |
+
res['day'].append(day)
|
| 113 |
+
res['jumps'].append(event_value)
|
| 114 |
+
res['continent'].append(continent)
|
| 115 |
+
res['country'].append(country)
|
| 116 |
+
res['iso'].append(country_iso)
|
| 117 |
+
res['cityId'].append(city)
|
| 118 |
+
res['exercise'].append(exercise)
|
| 119 |
+
|
| 120 |
+
df = pd.DataFrame.from_dict(res)
|
| 121 |
+
# then find the earliest day we started getting exercise name data
|
| 122 |
+
first_day = int(df['day'].min())
|
| 123 |
+
end_date = pd.to_datetime('2023-06-30') + pd.DateOffset(days=first_day)
|
| 124 |
+
# only need YYY-MM-DD
|
| 125 |
+
end_date = str(end_date.strftime('%Y-%m-%d'))
|
| 126 |
+
# then request all data where we don't have the exercise name
|
| 127 |
+
request = RunReportRequest(
|
| 128 |
+
property=f"properties/{PROPERTY_ID}",
|
| 129 |
+
dimensions=[Dimension(name="nthDay"),
|
| 130 |
+
Dimension(name='eventName'),
|
| 131 |
+
Dimension(name="continent"),
|
| 132 |
+
Dimension(name="country"),
|
| 133 |
+
Dimension(name="countryId"),
|
| 134 |
+
Dimension(name="cityId")],
|
| 135 |
+
metrics=[Metric(name="eventValue")],
|
| 136 |
+
#return_property_quota=True,
|
| 137 |
+
date_ranges=[DateRange(start_date="2023-06-30", end_date=end_date)],
|
| 138 |
+
)
|
| 139 |
+
response = client.run_report(request)
|
| 140 |
+
res = {'day': [], 'jumps': [], 'continent': [], 'country': [], 'iso': [], 'cityId': [], 'exercise': []}
|
| 141 |
+
for row in response.rows:
|
| 142 |
+
event_name = row.dimension_values[1].value
|
| 143 |
+
if event_name == FINISHED_EXERCISE:
|
| 144 |
+
day = int(row.dimension_values[0].value)
|
| 145 |
+
continent = row.dimension_values[2].value
|
| 146 |
+
country = row.dimension_values[3].value
|
| 147 |
+
country_iso = row.dimension_values[4].value
|
| 148 |
+
city = row.dimension_values[5].value
|
| 149 |
+
event_value = float(row.metric_values[0].value)
|
| 150 |
+
res['day'].append(day)
|
| 151 |
+
res['jumps'].append(event_value)
|
| 152 |
+
res['continent'].append(continent)
|
| 153 |
+
res['country'].append(country)
|
| 154 |
+
res['iso'].append(country_iso)
|
| 155 |
+
res['cityId'].append(city)
|
| 156 |
+
res['exercise'].append('n/a')
|
| 157 |
+
new_df = pd.DataFrame.from_dict(res)
|
| 158 |
+
# drop any rows we already have
|
| 159 |
+
#new_df = new_df[new_df['day'] < first_day]
|
| 160 |
+
df = pd.concat([df, new_df]).reset_index(drop=True)
|
| 161 |
+
df['duration'] = df['exercise'].apply(lambda x: 0 if x in ['n/a', '(not set)'] else int(x[2:]))
|
| 162 |
+
print(df['duration'].sum())
|
| 163 |
+
cached_report = df.copy(deep=True)
|
| 164 |
+
else:
|
| 165 |
+
print("Using cached report...")
|
| 166 |
+
df = cached_report.copy(deep=True)
|
| 167 |
+
|
| 168 |
+
total_jumps = int(df['jumps'].sum())
|
| 169 |
+
unique_countries = df['country'].nunique()
|
| 170 |
+
unique_cities = df['cityId'].nunique()
|
| 171 |
+
|
| 172 |
+
print(f"Total jumps: {total_jumps}, unique countries: {unique_countries}, unique cities: {unique_cities}")
|
| 173 |
+
df['iso'] = df['iso'].map(alpha_2_map)
|
| 174 |
+
df['jumps'] = df['jumps'].astype(int)
|
| 175 |
+
df['city'] = df['cityId'].map(ga_city_map)
|
| 176 |
+
df['state'] = df['cityId'].map(ga_state_map)
|
| 177 |
+
df['city'] = df.apply(lambda row: row['city'] if row['country'] != 'Bermuda' else 'Hamilton', axis=1)
|
| 178 |
+
df['city'] = df['city'] + ', ' + df['state']
|
| 179 |
+
|
| 180 |
+
country_df = df.groupby(['country', 'iso']).sum().reset_index()
|
| 181 |
+
country_df = country_df.sort_values(by=['jumps'], ascending=False)
|
| 182 |
+
top_10_countries = country_df.iloc[:15]['country'].tolist()
|
| 183 |
+
|
| 184 |
+
country_df_to_plot = df.groupby(['country', 'iso']).sum().reset_index()
|
| 185 |
+
country_df_to_plot = country_df_to_plot[country_df_to_plot['country'].isin(top_10_countries)].reset_index(drop=True)
|
| 186 |
+
country_df_to_plot = country_df_to_plot.sort_values(by=['jumps'], ascending=True)
|
| 187 |
+
df['rank'] = df['jumps'].rank(ascending=False)
|
| 188 |
+
df['world'] = 'Earth'
|
| 189 |
+
|
| 190 |
+
exercise_df = df[~df['exercise'].isin(['n/a', '(not set)'])]
|
| 191 |
+
# plot a bar graph of the most popular exercises and their counts in the dataset
|
| 192 |
+
top_6_events = exercise_df['exercise'].value_counts().reset_index()[:6]
|
| 193 |
+
pop_events = px.bar(top_6_events,
|
| 194 |
+
y=top_6_events.index,
|
| 195 |
+
x='exercise',
|
| 196 |
+
color='index',
|
| 197 |
+
title='Most Popular Exercises',
|
| 198 |
+
template="plotly_dark")
|
| 199 |
+
pop_events.update_layout(showlegend=False)
|
| 200 |
+
|
| 201 |
+
total = px.bar(country_df_to_plot,
|
| 202 |
+
y='country', x='jumps',
|
| 203 |
+
color='country',
|
| 204 |
+
title='Total Jumps by Country',
|
| 205 |
+
orientation='h',
|
| 206 |
+
category_orders={'country': top_10_countries},
|
| 207 |
+
height=800,
|
| 208 |
+
template="plotly_dark")
|
| 209 |
+
total.update_layout(showlegend=False)
|
| 210 |
+
|
| 211 |
+
country_df_to_plot_weekly = df[df['day'] >= df['day'].max() - 7].groupby(['country', 'iso']).sum().reset_index()
|
| 212 |
+
country_df_to_plot_weekly = country_df_to_plot_weekly.sort_values(by=['jumps'], ascending=False)
|
| 213 |
+
top_5_weekly = country_df_to_plot_weekly.iloc[:10]['country'].tolist()
|
| 214 |
+
country_df_to_plot_weekly = country_df_to_plot_weekly[country_df_to_plot_weekly['country'].isin(top_5_weekly)].reset_index(drop=True)
|
| 215 |
+
country_df_to_plot_weekly = country_df_to_plot_weekly.sort_values(by=['jumps'], ascending=True)
|
| 216 |
+
total_weekly = px.bar(country_df_to_plot_weekly,
|
| 217 |
+
y='country', x='jumps',
|
| 218 |
+
color='country',
|
| 219 |
+
title='Top Countries This Week',
|
| 220 |
+
orientation='h',
|
| 221 |
+
category_orders={'country': top_5_weekly},
|
| 222 |
+
height=500,
|
| 223 |
+
template="plotly_dark")
|
| 224 |
+
total_weekly.update_layout(showlegend=False)
|
| 225 |
+
|
| 226 |
+
city_df = df.groupby(['city', 'iso']).sum().reset_index()
|
| 227 |
+
city_df = city_df.sort_values(by=['jumps'], ascending=False)
|
| 228 |
+
city_df['city'] = city_df.apply(lambda row: row['city'] + ', ' + row['iso'], axis=1)
|
| 229 |
+
top_10_cities = city_df.iloc[:15]['city'].tolist()
|
| 230 |
+
|
| 231 |
+
icicle_df = df.groupby(['world', 'continent', 'country', 'state', 'city']).sum().reset_index()
|
| 232 |
+
#icicle_df['log10_jumps'] = icicle_df['jumps'].apply(lambda x: math.log10(x) if x > 0 else 0)
|
| 233 |
+
|
| 234 |
+
# icicle = px.icicle(icicle_df, path=['world', 'continent', 'country', 'city'],
|
| 235 |
+
# values='jumps',
|
| 236 |
+
# title='Jumps by Continent/Country',
|
| 237 |
+
# template="plotly_dark",
|
| 238 |
+
# color_continuous_scale='OrRd',
|
| 239 |
+
# maxdepth=7,
|
| 240 |
+
# branchvalues='remainder',
|
| 241 |
+
# color='jumps')
|
| 242 |
+
|
| 243 |
+
print(df.columns)
|
| 244 |
+
nipy_spec = mpl_to_plotly(plt.cm.nipy_spectral, pl_entries=15)
|
| 245 |
+
icicle = px.treemap(icicle_df, path=['world', 'continent', 'country', 'state', 'city'],
|
| 246 |
+
values='jumps',
|
| 247 |
+
title='Jumps by Continent/Country/City (click anywhere!)',
|
| 248 |
+
template="plotly_dark",
|
| 249 |
+
color_continuous_scale=nipy_spec,
|
| 250 |
+
branchvalues='total',
|
| 251 |
+
height=800,
|
| 252 |
+
maxdepth=4,
|
| 253 |
+
color='jumps')
|
| 254 |
+
|
| 255 |
+
city_df = df.groupby(['city', 'iso']).sum().reset_index()
|
| 256 |
+
city_df = city_df[city_df['city'] != '(not set)']
|
| 257 |
+
city_df['city'] = city_df.apply(lambda row: row['city'] + ', ' + row['iso'], axis=1)
|
| 258 |
+
city_df = city_df[city_df['city'].isin(top_10_cities)].reset_index(drop=True)
|
| 259 |
+
city_df = city_df.sort_values(by=['jumps'], ascending=True)
|
| 260 |
+
|
| 261 |
+
avg = px.bar(city_df,
|
| 262 |
+
y='city', x='jumps', color='city',
|
| 263 |
+
title='Total Jumps by City',
|
| 264 |
+
orientation='h',
|
| 265 |
+
category_orders={'city': top_10_cities},
|
| 266 |
+
height=800,
|
| 267 |
+
template="plotly_dark")
|
| 268 |
+
|
| 269 |
+
city_df_weekly = df[df['day'] >= df['day'].max() - 7].groupby(['city', 'iso']).sum().reset_index()
|
| 270 |
+
city_df_weekly = city_df_weekly[city_df_weekly['city'] != '(not set)']
|
| 271 |
+
city_df_weekly['city'] = city_df_weekly.apply(lambda row: row['city'] + ', ' + row['iso'], axis=1)
|
| 272 |
+
city_df_weekly = city_df_weekly.sort_values(by=['jumps'], ascending=False)
|
| 273 |
+
top_5_weekly = city_df_weekly.iloc[:10]['city'].tolist()
|
| 274 |
+
city_df_weekly = city_df_weekly[city_df_weekly['city'].isin(top_5_weekly)].reset_index(drop=True)
|
| 275 |
+
city_df_weekly = city_df_weekly.sort_values(by=['jumps'], ascending=True)
|
| 276 |
+
avg_weekly = px.bar(city_df_weekly,
|
| 277 |
+
y='city', x='jumps', color='city',
|
| 278 |
+
title='Top Cities This Week',
|
| 279 |
+
orientation='h',
|
| 280 |
+
category_orders={'city': top_5_weekly},
|
| 281 |
+
height=500,
|
| 282 |
+
template="plotly_dark")
|
| 283 |
+
|
| 284 |
+
avg.update_layout(showlegend=False)
|
| 285 |
+
avg.update(layout_coloraxis_showscale=False)
|
| 286 |
+
avg_weekly.update_layout(showlegend=False)
|
| 287 |
+
avg_weekly.update(layout_coloraxis_showscale=False)
|
| 288 |
+
|
| 289 |
+
country_df['rank'] = country_df['jumps'].rank(ascending=False)
|
| 290 |
+
total_map = px.choropleth(country_df, locations="iso",
|
| 291 |
+
color="rank",
|
| 292 |
+
hover_name="country", # column to add to hover information
|
| 293 |
+
hover_data=["jumps"],
|
| 294 |
+
color_continuous_scale ="OrRd_r",
|
| 295 |
+
projection='natural earth',
|
| 296 |
+
template="plotly_dark")
|
| 297 |
+
# remove the legend
|
| 298 |
+
total_map.update_layout(showlegend=False)
|
| 299 |
+
total_map.update(layout_coloraxis_showscale=False)
|
| 300 |
+
|
| 301 |
+
county_df = df.copy()
|
| 302 |
+
county_df['county'] = county_df['city'].map(city_county_map)
|
| 303 |
+
county_df['count_name'] = county_df['city'].map(city_county_name_map)
|
| 304 |
+
county_df = county_df.groupby(['county', 'count_name']).sum().reset_index()
|
| 305 |
+
county_df['rank'] = county_df['jumps'].rank(ascending=False)
|
| 306 |
+
county_df['county'] = county_df['county'].astype(int)
|
| 307 |
+
county_df['county'] = county_df['county'].astype(str).str.zfill(5) # county codes are two digits for state, three for county
|
| 308 |
+
county_map = px.choropleth(county_df, geojson=counties, locations='county', color='rank',
|
| 309 |
+
color_continuous_scale="OrRd_r",
|
| 310 |
+
scope="usa",
|
| 311 |
+
hover_name="count_name",
|
| 312 |
+
hover_data=["jumps"],
|
| 313 |
+
template="plotly_dark"
|
| 314 |
+
)
|
| 315 |
+
county_map.update_layout(showlegend=False)
|
| 316 |
+
county_map.update(layout_coloraxis_showscale=False)
|
| 317 |
+
|
| 318 |
+
df = df.groupby(['day', 'continent']).sum().reset_index()
|
| 319 |
+
df = df.sort_values(by=['day'])
|
| 320 |
+
df['total_jumps'] = df.groupby('continent')['jumps'].cumsum()
|
| 321 |
+
# fill in any missing days with current max value
|
| 322 |
+
for day in range(1, int(df['day'].max()) + 1):
|
| 323 |
+
for continent in df['continent'].unique():
|
| 324 |
+
if not df[(df['day'] == day) & (df['continent'] == continent)].any().any():
|
| 325 |
+
max_jumps = df[(df['day'] < day) & (df['continent'] == continent)]['total_jumps'].max()
|
| 326 |
+
df = pd.concat([df, pd.DataFrame([[day, continent, max_jumps]], columns=['day', 'continent', 'total_jumps'])])
|
| 327 |
+
#df = df.append({'day': day, 'continent': continent, 'total_jumps': max_jumps}, ignore_index=True)
|
| 328 |
+
df = df.sort_values(by=['day']).reset_index(drop=True)
|
| 329 |
+
jumps_over_time = px.area(df, x='day',
|
| 330 |
+
y='total_jumps',
|
| 331 |
+
color='continent',
|
| 332 |
+
template="plotly_dark")
|
| 333 |
+
df.fillna(0, inplace=True)
|
| 334 |
+
daily_df = df.groupby(['day'])[['jumps']].sum().reset_index()
|
| 335 |
+
per_day_plot = px.scatter(daily_df, x='day', y='jumps',
|
| 336 |
+
trendline='rolling',
|
| 337 |
+
trendline_options=dict(window=14),
|
| 338 |
+
trendline_color_override="goldenrod",
|
| 339 |
+
trendline_scope='overall',
|
| 340 |
+
template="plotly_dark")
|
| 341 |
+
|
| 342 |
+
return f"# {total_jumps:,} total jumps in {unique_cities:,} cities across {unique_countries:,} countries", \
|
| 343 |
+
total, total_weekly, avg, avg_weekly, total_map, icicle, jumps_over_time, pop_events, county_map, per_day_plot
|
| 344 |
+
|
| 345 |
+
|
| 346 |
+
with gr.Blocks() as demo:
|
| 347 |
+
with gr.Row():
|
| 348 |
+
total_jumps_label = gr.Markdown("Total Jumps: 0")
|
| 349 |
+
with gr.Row():
|
| 350 |
+
map_fig = gr.Plot(label="Map")
|
| 351 |
+
with gr.Row():
|
| 352 |
+
jumps_over_time = gr.Plot(label="Jumps Over Time")
|
| 353 |
+
with gr.Row():
|
| 354 |
+
total_plot = gr.Plot(label="Top Countries (All Time)")
|
| 355 |
+
with gr.Row():
|
| 356 |
+
total_plot_weekly = gr.Plot(label="Top Countries (This Week)")
|
| 357 |
+
with gr.Row():
|
| 358 |
+
avg_plot = gr.Plot(label="Top Cities (All Time)")
|
| 359 |
+
with gr.Row():
|
| 360 |
+
avg_plot_weekly = gr.Plot(label="Top Cities (This Week)")
|
| 361 |
+
with gr.Row():
|
| 362 |
+
icicle_fig = gr.Plot(label="Treemap")
|
| 363 |
+
with gr.Row():
|
| 364 |
+
per_day_plot = gr.Plot(label="Jumps per Day")
|
| 365 |
+
with gr.Row():
|
| 366 |
+
county_map = gr.Plot(label="US Map")
|
| 367 |
+
with gr.Row():
|
| 368 |
+
popular_events = gr.Plot(label="Popular Events")
|
| 369 |
+
|
| 370 |
+
outputs = [total_jumps_label, total_plot, total_plot_weekly, avg_plot, avg_plot_weekly, map_fig, icicle_fig, jumps_over_time, popular_events, county_map, per_day_plot]
|
| 371 |
+
dep = demo.load(full_report, None, outputs)
|
| 372 |
+
|
| 373 |
+
if __name__ == "__main__":
|
| 374 |
demo.launch(share=False)
|