dylanplummer commited on
Commit
d955465
·
verified ·
1 Parent(s): ee4701d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +373 -372
app.py CHANGED
@@ -1,373 +1,374 @@
1
- from google.analytics.data_v1beta import BetaAnalyticsDataClient
2
- from google.analytics.data_v1beta.types import (
3
- DateRange,
4
- Dimension,
5
- Metric,
6
- RunReportRequest,
7
- RunRealtimeReportRequest
8
- )
9
-
10
- import gradio as gr
11
- import os
12
- import json
13
- import time
14
- import math
15
- import numpy as np
16
- import pandas as pd
17
- import matplotlib.pyplot as plt
18
- import plotly.express as px
19
-
20
- FINISHED_EXERCISE = 'finished_exercise'
21
- PROPERTY_ID = "384068977"
22
-
23
- try:
24
- credentials_json = os.environ['GOOGLE_APPLICATION_CREDENTIALS_JSON']
25
- credentials_dict = json.loads(credentials_json)
26
- # write json to file
27
- with open('credentials.json', 'w') as f:
28
- json.dump(credentials_dict, f)
29
- # set env var to filename
30
- os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = os.path.join(os.path.dirname(__file__), 'credentials.json')
31
- except KeyError: # running locally
32
- pass
33
- except Exception as e:
34
- print(f"Error loading credentials: {e}")
35
- pass
36
-
37
- iso = pd.read_csv('iso.tsv', sep='\t')
38
- iso['Alpha-2 code'] = iso['Alpha-2 code'].str.strip()
39
- iso['Alpha-3 code'] = iso['Alpha-3 code'].str.strip()
40
- iso.set_index('Alpha-2 code', inplace=True)
41
- alpha_2_map = iso['Alpha-3 code'].to_dict()
42
-
43
- # read counties json file
44
- with open('counties.json') as f:
45
- counties = json.load(f)
46
-
47
- ga_cities = pd.read_csv('cities.csv')
48
- cities = pd.read_csv('uscities.csv')
49
- cities['full_city'] = cities['city'] + ', ' + cities['state_name']
50
- cities.set_index('full_city', inplace=True)
51
- ga_cities['Criteria ID'] = ga_cities['Criteria ID'].astype(str)
52
- ga_cities.set_index('Criteria ID', inplace=True)
53
- ga_city_map = ga_cities['Name'].to_dict()
54
- ga_cities['state'] = ga_cities['Canonical Name'].str.split(',').str[1].str.strip()
55
- ga_state_map = ga_cities['state'].to_dict()
56
- city_county_map = cities['county_fips'].to_dict()
57
- city_county_name_map = cities['county_name'].to_dict()
58
-
59
- cached_report = None
60
- cache_time = 0
61
- reload_cache = False
62
- # 6 hours
63
- reload_every = 6 * 60 * 60
64
-
65
- def mpl_to_plotly(cmap, pl_entries=11, rdigits=2):
66
- # cmap - colormap
67
- # pl_entries - int = number of Plotly colorscale entries
68
- # rdigits - int -=number of digits for rounding scale values
69
- scale = np.linspace(0, 1, pl_entries)
70
- colors = (cmap(scale)[:, :3]*255).astype(np.uint8)
71
- pl_colorscale = [[round(s, rdigits), f'rgb{tuple(color)}'] for s, color in zip(scale, colors)]
72
- return pl_colorscale
73
-
74
- def full_report():
75
- global cached_report, cache_time, reload_cache
76
- if time.time() - cache_time > reload_every:
77
- reload_cache = False
78
- if not reload_cache:
79
- print("Loading report...")
80
- reload_cache = True
81
- cache_time = time.time()
82
- client = BetaAnalyticsDataClient()
83
-
84
- # first request all data where we have the exercise name
85
- request = RunReportRequest(
86
- property=f"properties/{PROPERTY_ID}",
87
- dimensions=[Dimension(name="nthDay"),
88
- Dimension(name='eventName'),
89
- Dimension(name="continent"),
90
- Dimension(name="country"),
91
- Dimension(name="countryId"),
92
- Dimension(name="cityId"),
93
- Dimension(name="customEvent:exercise")],
94
- metrics=[Metric(name="eventValue")],
95
- #return_property_quota=True,
96
- date_ranges=[DateRange(start_date="2023-06-30", end_date="today")],
97
- )
98
- response = client.run_report(request)
99
-
100
- res = {'day': [], 'jumps': [], 'continent': [], 'country': [], 'iso': [], 'cityId': [], 'exercise': []}
101
-
102
- for row in response.rows:
103
- event_name = row.dimension_values[1].value
104
- if event_name == FINISHED_EXERCISE:
105
- day = int(row.dimension_values[0].value)
106
- continent = row.dimension_values[2].value
107
- country = row.dimension_values[3].value
108
- country_iso = row.dimension_values[4].value
109
- city = row.dimension_values[5].value
110
- exercise = row.dimension_values[6].value
111
- event_value = float(row.metric_values[0].value)
112
- res['day'].append(day)
113
- res['jumps'].append(event_value)
114
- res['continent'].append(continent)
115
- res['country'].append(country)
116
- res['iso'].append(country_iso)
117
- res['cityId'].append(city)
118
- res['exercise'].append(exercise)
119
-
120
- df = pd.DataFrame.from_dict(res)
121
- # then find the earliest day we started getting exercise name data
122
- first_day = int(df['day'].min())
123
- end_date = pd.to_datetime('2023-06-30') + pd.DateOffset(days=first_day)
124
- # only need YYY-MM-DD
125
- end_date = str(end_date.strftime('%Y-%m-%d'))
126
- # then request all data where we don't have the exercise name
127
- request = RunReportRequest(
128
- property=f"properties/{PROPERTY_ID}",
129
- dimensions=[Dimension(name="nthDay"),
130
- Dimension(name='eventName'),
131
- Dimension(name="continent"),
132
- Dimension(name="country"),
133
- Dimension(name="countryId"),
134
- Dimension(name="cityId")],
135
- metrics=[Metric(name="eventValue")],
136
- #return_property_quota=True,
137
- date_ranges=[DateRange(start_date="2023-06-30", end_date=end_date)],
138
- )
139
- response = client.run_report(request)
140
- res = {'day': [], 'jumps': [], 'continent': [], 'country': [], 'iso': [], 'cityId': [], 'exercise': []}
141
- for row in response.rows:
142
- event_name = row.dimension_values[1].value
143
- if event_name == FINISHED_EXERCISE:
144
- day = int(row.dimension_values[0].value)
145
- continent = row.dimension_values[2].value
146
- country = row.dimension_values[3].value
147
- country_iso = row.dimension_values[4].value
148
- city = row.dimension_values[5].value
149
- event_value = float(row.metric_values[0].value)
150
- res['day'].append(day)
151
- res['jumps'].append(event_value)
152
- res['continent'].append(continent)
153
- res['country'].append(country)
154
- res['iso'].append(country_iso)
155
- res['cityId'].append(city)
156
- res['exercise'].append('n/a')
157
- new_df = pd.DataFrame.from_dict(res)
158
- # drop any rows we already have
159
- #new_df = new_df[new_df['day'] < first_day]
160
- df = pd.concat([df, new_df]).reset_index(drop=True)
161
- df['duration'] = df['exercise'].apply(lambda x: 0 if x in ['n/a', '(not set)'] else int(x[2:]))
162
- print(df['duration'].sum())
163
- cached_report = df.copy(deep=True)
164
- else:
165
- print("Using cached report...")
166
- df = cached_report.copy(deep=True)
167
-
168
- total_jumps = int(df['jumps'].sum())
169
- unique_countries = df['country'].nunique()
170
- unique_cities = df['cityId'].nunique()
171
-
172
- print(f"Total jumps: {total_jumps}, unique countries: {unique_countries}, unique cities: {unique_cities}")
173
- df['iso'] = df['iso'].map(alpha_2_map)
174
- df['jumps'] = df['jumps'].astype(int)
175
- df['city'] = df['cityId'].map(ga_city_map)
176
- df['state'] = df['cityId'].map(ga_state_map)
177
- df['city'] = df.apply(lambda row: row['city'] if row['country'] != 'Bermuda' else 'Hamilton', axis=1)
178
- df['city'] = df['city'] + ', ' + df['state']
179
-
180
- country_df = df.groupby(['country', 'iso']).sum().reset_index()
181
- country_df = country_df.sort_values(by=['jumps'], ascending=False)
182
- top_10_countries = country_df.iloc[:15]['country'].tolist()
183
-
184
- country_df_to_plot = df.groupby(['country', 'iso']).sum().reset_index()
185
- country_df_to_plot = country_df_to_plot[country_df_to_plot['country'].isin(top_10_countries)].reset_index(drop=True)
186
- country_df_to_plot = country_df_to_plot.sort_values(by=['jumps'], ascending=True)
187
- df['rank'] = df['jumps'].rank(ascending=False)
188
- df['world'] = 'Earth'
189
-
190
- exercise_df = df[~df['exercise'].isin(['n/a', '(not set)'])]
191
- # plot a bar graph of the most popular exercises and their counts in the dataset
192
- pop_events = px.bar(exercise_df['exercise'].value_counts().reset_index()[:6],
193
- y='index',
194
- x='exercise',
195
- color='index',
196
- title='Most Popular Exercises',
197
- template="plotly_dark")
198
- pop_events.update_layout(showlegend=False)
199
-
200
- total = px.bar(country_df_to_plot,
201
- y='country', x='jumps',
202
- color='country',
203
- title='Total Jumps by Country',
204
- orientation='h',
205
- category_orders={'country': top_10_countries},
206
- height=800,
207
- template="plotly_dark")
208
- total.update_layout(showlegend=False)
209
-
210
- country_df_to_plot_weekly = df[df['day'] >= df['day'].max() - 7].groupby(['country', 'iso']).sum().reset_index()
211
- country_df_to_plot_weekly = country_df_to_plot_weekly.sort_values(by=['jumps'], ascending=False)
212
- top_5_weekly = country_df_to_plot_weekly.iloc[:10]['country'].tolist()
213
- country_df_to_plot_weekly = country_df_to_plot_weekly[country_df_to_plot_weekly['country'].isin(top_5_weekly)].reset_index(drop=True)
214
- country_df_to_plot_weekly = country_df_to_plot_weekly.sort_values(by=['jumps'], ascending=True)
215
- total_weekly = px.bar(country_df_to_plot_weekly,
216
- y='country', x='jumps',
217
- color='country',
218
- title='Top Countries This Week',
219
- orientation='h',
220
- category_orders={'country': top_5_weekly},
221
- height=500,
222
- template="plotly_dark")
223
- total_weekly.update_layout(showlegend=False)
224
-
225
- city_df = df.groupby(['city', 'iso']).sum().reset_index()
226
- city_df = city_df.sort_values(by=['jumps'], ascending=False)
227
- city_df['city'] = city_df.apply(lambda row: row['city'] + ', ' + row['iso'], axis=1)
228
- top_10_cities = city_df.iloc[:15]['city'].tolist()
229
-
230
- icicle_df = df.groupby(['world', 'continent', 'country', 'state', 'city']).sum().reset_index()
231
- #icicle_df['log10_jumps'] = icicle_df['jumps'].apply(lambda x: math.log10(x) if x > 0 else 0)
232
-
233
- # icicle = px.icicle(icicle_df, path=['world', 'continent', 'country', 'city'],
234
- # values='jumps',
235
- # title='Jumps by Continent/Country',
236
- # template="plotly_dark",
237
- # color_continuous_scale='OrRd',
238
- # maxdepth=7,
239
- # branchvalues='remainder',
240
- # color='jumps')
241
-
242
- print(df.columns)
243
- nipy_spec = mpl_to_plotly(plt.cm.nipy_spectral, pl_entries=15)
244
- icicle = px.treemap(icicle_df, path=['world', 'continent', 'country', 'state', 'city'],
245
- values='jumps',
246
- title='Jumps by Continent/Country/City (click anywhere!)',
247
- template="plotly_dark",
248
- color_continuous_scale=nipy_spec,
249
- branchvalues='total',
250
- height=800,
251
- maxdepth=4,
252
- color='jumps')
253
-
254
- city_df = df.groupby(['city', 'iso']).sum().reset_index()
255
- city_df = city_df[city_df['city'] != '(not set)']
256
- city_df['city'] = city_df.apply(lambda row: row['city'] + ', ' + row['iso'], axis=1)
257
- city_df = city_df[city_df['city'].isin(top_10_cities)].reset_index(drop=True)
258
- city_df = city_df.sort_values(by=['jumps'], ascending=True)
259
-
260
- avg = px.bar(city_df,
261
- y='city', x='jumps', color='city',
262
- title='Total Jumps by City',
263
- orientation='h',
264
- category_orders={'city': top_10_cities},
265
- height=800,
266
- template="plotly_dark")
267
-
268
- city_df_weekly = df[df['day'] >= df['day'].max() - 7].groupby(['city', 'iso']).sum().reset_index()
269
- city_df_weekly = city_df_weekly[city_df_weekly['city'] != '(not set)']
270
- city_df_weekly['city'] = city_df_weekly.apply(lambda row: row['city'] + ', ' + row['iso'], axis=1)
271
- city_df_weekly = city_df_weekly.sort_values(by=['jumps'], ascending=False)
272
- top_5_weekly = city_df_weekly.iloc[:10]['city'].tolist()
273
- city_df_weekly = city_df_weekly[city_df_weekly['city'].isin(top_5_weekly)].reset_index(drop=True)
274
- city_df_weekly = city_df_weekly.sort_values(by=['jumps'], ascending=True)
275
- avg_weekly = px.bar(city_df_weekly,
276
- y='city', x='jumps', color='city',
277
- title='Top Cities This Week',
278
- orientation='h',
279
- category_orders={'city': top_5_weekly},
280
- height=500,
281
- template="plotly_dark")
282
-
283
- avg.update_layout(showlegend=False)
284
- avg.update(layout_coloraxis_showscale=False)
285
- avg_weekly.update_layout(showlegend=False)
286
- avg_weekly.update(layout_coloraxis_showscale=False)
287
-
288
- country_df['rank'] = country_df['jumps'].rank(ascending=False)
289
- total_map = px.choropleth(country_df, locations="iso",
290
- color="rank",
291
- hover_name="country", # column to add to hover information
292
- hover_data=["jumps"],
293
- color_continuous_scale ="OrRd_r",
294
- projection='natural earth',
295
- template="plotly_dark")
296
- # remove the legend
297
- total_map.update_layout(showlegend=False)
298
- total_map.update(layout_coloraxis_showscale=False)
299
-
300
- county_df = df.copy()
301
- county_df['county'] = county_df['city'].map(city_county_map)
302
- county_df['count_name'] = county_df['city'].map(city_county_name_map)
303
- county_df = county_df.groupby(['county', 'count_name']).sum().reset_index()
304
- county_df['rank'] = county_df['jumps'].rank(ascending=False)
305
- county_df['county'] = county_df['county'].astype(int)
306
- county_df['county'] = county_df['county'].astype(str).str.zfill(5) # county codes are two digits for state, three for county
307
- county_map = px.choropleth(county_df, geojson=counties, locations='county', color='rank',
308
- color_continuous_scale="OrRd_r",
309
- scope="usa",
310
- hover_name="count_name",
311
- hover_data=["jumps"],
312
- template="plotly_dark"
313
- )
314
- county_map.update_layout(showlegend=False)
315
- county_map.update(layout_coloraxis_showscale=False)
316
-
317
- df = df.groupby(['day', 'continent']).sum().reset_index()
318
- df = df.sort_values(by=['day'])
319
- df['total_jumps'] = df.groupby('continent')['jumps'].cumsum()
320
- # fill in any missing days with current max value
321
- for day in range(1, int(df['day'].max()) + 1):
322
- for continent in df['continent'].unique():
323
- if not df[(df['day'] == day) & (df['continent'] == continent)].any().any():
324
- max_jumps = df[(df['day'] < day) & (df['continent'] == continent)]['total_jumps'].max()
325
- df = pd.concat([df, pd.DataFrame([[day, continent, max_jumps]], columns=['day', 'continent', 'total_jumps'])])
326
- #df = df.append({'day': day, 'continent': continent, 'total_jumps': max_jumps}, ignore_index=True)
327
- df = df.sort_values(by=['day']).reset_index(drop=True)
328
- jumps_over_time = px.area(df, x='day',
329
- y='total_jumps',
330
- color='continent',
331
- template="plotly_dark")
332
- df.fillna(0, inplace=True)
333
- daily_df = df.groupby(['day'])[['jumps']].sum().reset_index()
334
- per_day_plot = px.scatter(daily_df, x='day', y='jumps',
335
- trendline='rolling',
336
- trendline_options=dict(window=14),
337
- trendline_color_override="goldenrod",
338
- trendline_scope='overall',
339
- template="plotly_dark")
340
-
341
- return f"# {total_jumps:,} total jumps in {unique_cities:,} cities across {unique_countries:,} countries", \
342
- total, total_weekly, avg, avg_weekly, total_map, icicle, jumps_over_time, pop_events, county_map, per_day_plot
343
-
344
-
345
- with gr.Blocks() as demo:
346
- with gr.Row():
347
- total_jumps_label = gr.Markdown("Total Jumps: 0")
348
- with gr.Row():
349
- map_fig = gr.Plot(label="Map")
350
- with gr.Row():
351
- jumps_over_time = gr.Plot(label="Jumps Over Time")
352
- with gr.Row():
353
- total_plot = gr.Plot(label="Top Countries (All Time)")
354
- with gr.Row():
355
- total_plot_weekly = gr.Plot(label="Top Countries (This Week)")
356
- with gr.Row():
357
- avg_plot = gr.Plot(label="Top Cities (All Time)")
358
- with gr.Row():
359
- avg_plot_weekly = gr.Plot(label="Top Cities (This Week)")
360
- with gr.Row():
361
- icicle_fig = gr.Plot(label="Treemap")
362
- with gr.Row():
363
- per_day_plot = gr.Plot(label="Jumps per Day")
364
- with gr.Row():
365
- county_map = gr.Plot(label="US Map")
366
- with gr.Row():
367
- popular_events = gr.Plot(label="Popular Events")
368
-
369
- outputs = [total_jumps_label, total_plot, total_plot_weekly, avg_plot, avg_plot_weekly, map_fig, icicle_fig, jumps_over_time, popular_events, county_map, per_day_plot]
370
- dep = demo.load(full_report, None, outputs)
371
-
372
- if __name__ == "__main__":
 
373
  demo.launch(share=False)
 
1
+ from google.analytics.data_v1beta import BetaAnalyticsDataClient
2
+ from google.analytics.data_v1beta.types import (
3
+ DateRange,
4
+ Dimension,
5
+ Metric,
6
+ RunReportRequest,
7
+ RunRealtimeReportRequest
8
+ )
9
+
10
+ import gradio as gr
11
+ import os
12
+ import json
13
+ import time
14
+ import math
15
+ import numpy as np
16
+ import pandas as pd
17
+ import matplotlib.pyplot as plt
18
+ import plotly.express as px
19
+
20
+ FINISHED_EXERCISE = 'finished_exercise'
21
+ PROPERTY_ID = "384068977"
22
+
23
+ try:
24
+ credentials_json = os.environ['GOOGLE_APPLICATION_CREDENTIALS_JSON']
25
+ credentials_dict = json.loads(credentials_json)
26
+ # write json to file
27
+ with open('credentials.json', 'w') as f:
28
+ json.dump(credentials_dict, f)
29
+ # set env var to filename
30
+ os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = os.path.join(os.path.dirname(__file__), 'credentials.json')
31
+ except KeyError: # running locally
32
+ pass
33
+ except Exception as e:
34
+ print(f"Error loading credentials: {e}")
35
+ pass
36
+
37
+ iso = pd.read_csv('iso.tsv', sep='\t')
38
+ iso['Alpha-2 code'] = iso['Alpha-2 code'].str.strip()
39
+ iso['Alpha-3 code'] = iso['Alpha-3 code'].str.strip()
40
+ iso.set_index('Alpha-2 code', inplace=True)
41
+ alpha_2_map = iso['Alpha-3 code'].to_dict()
42
+
43
+ # read counties json file
44
+ with open('counties.json') as f:
45
+ counties = json.load(f)
46
+
47
+ ga_cities = pd.read_csv('cities.csv')
48
+ cities = pd.read_csv('uscities.csv')
49
+ cities['full_city'] = cities['city'] + ', ' + cities['state_name']
50
+ cities.set_index('full_city', inplace=True)
51
+ ga_cities['Criteria ID'] = ga_cities['Criteria ID'].astype(str)
52
+ ga_cities.set_index('Criteria ID', inplace=True)
53
+ ga_city_map = ga_cities['Name'].to_dict()
54
+ ga_cities['state'] = ga_cities['Canonical Name'].str.split(',').str[1].str.strip()
55
+ ga_state_map = ga_cities['state'].to_dict()
56
+ city_county_map = cities['county_fips'].to_dict()
57
+ city_county_name_map = cities['county_name'].to_dict()
58
+
59
+ cached_report = None
60
+ cache_time = 0
61
+ reload_cache = False
62
+ # 6 hours
63
+ reload_every = 6 * 60 * 60
64
+
65
+ def mpl_to_plotly(cmap, pl_entries=11, rdigits=2):
66
+ # cmap - colormap
67
+ # pl_entries - int = number of Plotly colorscale entries
68
+ # rdigits - int -=number of digits for rounding scale values
69
+ scale = np.linspace(0, 1, pl_entries)
70
+ colors = (cmap(scale)[:, :3]*255).astype(np.uint8)
71
+ pl_colorscale = [[round(s, rdigits), f'rgb{tuple(color)}'] for s, color in zip(scale, colors)]
72
+ return pl_colorscale
73
+
74
+ def full_report():
75
+ global cached_report, cache_time, reload_cache
76
+ if time.time() - cache_time > reload_every:
77
+ reload_cache = False
78
+ if not reload_cache:
79
+ print("Loading report...")
80
+ reload_cache = True
81
+ cache_time = time.time()
82
+ client = BetaAnalyticsDataClient()
83
+
84
+ # first request all data where we have the exercise name
85
+ request = RunReportRequest(
86
+ property=f"properties/{PROPERTY_ID}",
87
+ dimensions=[Dimension(name="nthDay"),
88
+ Dimension(name='eventName'),
89
+ Dimension(name="continent"),
90
+ Dimension(name="country"),
91
+ Dimension(name="countryId"),
92
+ Dimension(name="cityId"),
93
+ Dimension(name="customEvent:exercise")],
94
+ metrics=[Metric(name="eventValue")],
95
+ #return_property_quota=True,
96
+ date_ranges=[DateRange(start_date="2023-06-30", end_date="today")],
97
+ )
98
+ response = client.run_report(request)
99
+
100
+ res = {'day': [], 'jumps': [], 'continent': [], 'country': [], 'iso': [], 'cityId': [], 'exercise': []}
101
+
102
+ for row in response.rows:
103
+ event_name = row.dimension_values[1].value
104
+ if event_name == FINISHED_EXERCISE:
105
+ day = int(row.dimension_values[0].value)
106
+ continent = row.dimension_values[2].value
107
+ country = row.dimension_values[3].value
108
+ country_iso = row.dimension_values[4].value
109
+ city = row.dimension_values[5].value
110
+ exercise = row.dimension_values[6].value
111
+ event_value = float(row.metric_values[0].value)
112
+ res['day'].append(day)
113
+ res['jumps'].append(event_value)
114
+ res['continent'].append(continent)
115
+ res['country'].append(country)
116
+ res['iso'].append(country_iso)
117
+ res['cityId'].append(city)
118
+ res['exercise'].append(exercise)
119
+
120
+ df = pd.DataFrame.from_dict(res)
121
+ # then find the earliest day we started getting exercise name data
122
+ first_day = int(df['day'].min())
123
+ end_date = pd.to_datetime('2023-06-30') + pd.DateOffset(days=first_day)
124
+ # only need YYY-MM-DD
125
+ end_date = str(end_date.strftime('%Y-%m-%d'))
126
+ # then request all data where we don't have the exercise name
127
+ request = RunReportRequest(
128
+ property=f"properties/{PROPERTY_ID}",
129
+ dimensions=[Dimension(name="nthDay"),
130
+ Dimension(name='eventName'),
131
+ Dimension(name="continent"),
132
+ Dimension(name="country"),
133
+ Dimension(name="countryId"),
134
+ Dimension(name="cityId")],
135
+ metrics=[Metric(name="eventValue")],
136
+ #return_property_quota=True,
137
+ date_ranges=[DateRange(start_date="2023-06-30", end_date=end_date)],
138
+ )
139
+ response = client.run_report(request)
140
+ res = {'day': [], 'jumps': [], 'continent': [], 'country': [], 'iso': [], 'cityId': [], 'exercise': []}
141
+ for row in response.rows:
142
+ event_name = row.dimension_values[1].value
143
+ if event_name == FINISHED_EXERCISE:
144
+ day = int(row.dimension_values[0].value)
145
+ continent = row.dimension_values[2].value
146
+ country = row.dimension_values[3].value
147
+ country_iso = row.dimension_values[4].value
148
+ city = row.dimension_values[5].value
149
+ event_value = float(row.metric_values[0].value)
150
+ res['day'].append(day)
151
+ res['jumps'].append(event_value)
152
+ res['continent'].append(continent)
153
+ res['country'].append(country)
154
+ res['iso'].append(country_iso)
155
+ res['cityId'].append(city)
156
+ res['exercise'].append('n/a')
157
+ new_df = pd.DataFrame.from_dict(res)
158
+ # drop any rows we already have
159
+ #new_df = new_df[new_df['day'] < first_day]
160
+ df = pd.concat([df, new_df]).reset_index(drop=True)
161
+ df['duration'] = df['exercise'].apply(lambda x: 0 if x in ['n/a', '(not set)'] else int(x[2:]))
162
+ print(df['duration'].sum())
163
+ cached_report = df.copy(deep=True)
164
+ else:
165
+ print("Using cached report...")
166
+ df = cached_report.copy(deep=True)
167
+
168
+ total_jumps = int(df['jumps'].sum())
169
+ unique_countries = df['country'].nunique()
170
+ unique_cities = df['cityId'].nunique()
171
+
172
+ print(f"Total jumps: {total_jumps}, unique countries: {unique_countries}, unique cities: {unique_cities}")
173
+ df['iso'] = df['iso'].map(alpha_2_map)
174
+ df['jumps'] = df['jumps'].astype(int)
175
+ df['city'] = df['cityId'].map(ga_city_map)
176
+ df['state'] = df['cityId'].map(ga_state_map)
177
+ df['city'] = df.apply(lambda row: row['city'] if row['country'] != 'Bermuda' else 'Hamilton', axis=1)
178
+ df['city'] = df['city'] + ', ' + df['state']
179
+
180
+ country_df = df.groupby(['country', 'iso']).sum().reset_index()
181
+ country_df = country_df.sort_values(by=['jumps'], ascending=False)
182
+ top_10_countries = country_df.iloc[:15]['country'].tolist()
183
+
184
+ country_df_to_plot = df.groupby(['country', 'iso']).sum().reset_index()
185
+ country_df_to_plot = country_df_to_plot[country_df_to_plot['country'].isin(top_10_countries)].reset_index(drop=True)
186
+ country_df_to_plot = country_df_to_plot.sort_values(by=['jumps'], ascending=True)
187
+ df['rank'] = df['jumps'].rank(ascending=False)
188
+ df['world'] = 'Earth'
189
+
190
+ exercise_df = df[~df['exercise'].isin(['n/a', '(not set)'])]
191
+ # plot a bar graph of the most popular exercises and their counts in the dataset
192
+ top_6_events = exercise_df['exercise'].value_counts().reset_index()[:6]
193
+ pop_events = px.bar(top_6_events,
194
+ y=top_6_events.index,
195
+ x='exercise',
196
+ color='index',
197
+ title='Most Popular Exercises',
198
+ template="plotly_dark")
199
+ pop_events.update_layout(showlegend=False)
200
+
201
+ total = px.bar(country_df_to_plot,
202
+ y='country', x='jumps',
203
+ color='country',
204
+ title='Total Jumps by Country',
205
+ orientation='h',
206
+ category_orders={'country': top_10_countries},
207
+ height=800,
208
+ template="plotly_dark")
209
+ total.update_layout(showlegend=False)
210
+
211
+ country_df_to_plot_weekly = df[df['day'] >= df['day'].max() - 7].groupby(['country', 'iso']).sum().reset_index()
212
+ country_df_to_plot_weekly = country_df_to_plot_weekly.sort_values(by=['jumps'], ascending=False)
213
+ top_5_weekly = country_df_to_plot_weekly.iloc[:10]['country'].tolist()
214
+ country_df_to_plot_weekly = country_df_to_plot_weekly[country_df_to_plot_weekly['country'].isin(top_5_weekly)].reset_index(drop=True)
215
+ country_df_to_plot_weekly = country_df_to_plot_weekly.sort_values(by=['jumps'], ascending=True)
216
+ total_weekly = px.bar(country_df_to_plot_weekly,
217
+ y='country', x='jumps',
218
+ color='country',
219
+ title='Top Countries This Week',
220
+ orientation='h',
221
+ category_orders={'country': top_5_weekly},
222
+ height=500,
223
+ template="plotly_dark")
224
+ total_weekly.update_layout(showlegend=False)
225
+
226
+ city_df = df.groupby(['city', 'iso']).sum().reset_index()
227
+ city_df = city_df.sort_values(by=['jumps'], ascending=False)
228
+ city_df['city'] = city_df.apply(lambda row: row['city'] + ', ' + row['iso'], axis=1)
229
+ top_10_cities = city_df.iloc[:15]['city'].tolist()
230
+
231
+ icicle_df = df.groupby(['world', 'continent', 'country', 'state', 'city']).sum().reset_index()
232
+ #icicle_df['log10_jumps'] = icicle_df['jumps'].apply(lambda x: math.log10(x) if x > 0 else 0)
233
+
234
+ # icicle = px.icicle(icicle_df, path=['world', 'continent', 'country', 'city'],
235
+ # values='jumps',
236
+ # title='Jumps by Continent/Country',
237
+ # template="plotly_dark",
238
+ # color_continuous_scale='OrRd',
239
+ # maxdepth=7,
240
+ # branchvalues='remainder',
241
+ # color='jumps')
242
+
243
+ print(df.columns)
244
+ nipy_spec = mpl_to_plotly(plt.cm.nipy_spectral, pl_entries=15)
245
+ icicle = px.treemap(icicle_df, path=['world', 'continent', 'country', 'state', 'city'],
246
+ values='jumps',
247
+ title='Jumps by Continent/Country/City (click anywhere!)',
248
+ template="plotly_dark",
249
+ color_continuous_scale=nipy_spec,
250
+ branchvalues='total',
251
+ height=800,
252
+ maxdepth=4,
253
+ color='jumps')
254
+
255
+ city_df = df.groupby(['city', 'iso']).sum().reset_index()
256
+ city_df = city_df[city_df['city'] != '(not set)']
257
+ city_df['city'] = city_df.apply(lambda row: row['city'] + ', ' + row['iso'], axis=1)
258
+ city_df = city_df[city_df['city'].isin(top_10_cities)].reset_index(drop=True)
259
+ city_df = city_df.sort_values(by=['jumps'], ascending=True)
260
+
261
+ avg = px.bar(city_df,
262
+ y='city', x='jumps', color='city',
263
+ title='Total Jumps by City',
264
+ orientation='h',
265
+ category_orders={'city': top_10_cities},
266
+ height=800,
267
+ template="plotly_dark")
268
+
269
+ city_df_weekly = df[df['day'] >= df['day'].max() - 7].groupby(['city', 'iso']).sum().reset_index()
270
+ city_df_weekly = city_df_weekly[city_df_weekly['city'] != '(not set)']
271
+ city_df_weekly['city'] = city_df_weekly.apply(lambda row: row['city'] + ', ' + row['iso'], axis=1)
272
+ city_df_weekly = city_df_weekly.sort_values(by=['jumps'], ascending=False)
273
+ top_5_weekly = city_df_weekly.iloc[:10]['city'].tolist()
274
+ city_df_weekly = city_df_weekly[city_df_weekly['city'].isin(top_5_weekly)].reset_index(drop=True)
275
+ city_df_weekly = city_df_weekly.sort_values(by=['jumps'], ascending=True)
276
+ avg_weekly = px.bar(city_df_weekly,
277
+ y='city', x='jumps', color='city',
278
+ title='Top Cities This Week',
279
+ orientation='h',
280
+ category_orders={'city': top_5_weekly},
281
+ height=500,
282
+ template="plotly_dark")
283
+
284
+ avg.update_layout(showlegend=False)
285
+ avg.update(layout_coloraxis_showscale=False)
286
+ avg_weekly.update_layout(showlegend=False)
287
+ avg_weekly.update(layout_coloraxis_showscale=False)
288
+
289
+ country_df['rank'] = country_df['jumps'].rank(ascending=False)
290
+ total_map = px.choropleth(country_df, locations="iso",
291
+ color="rank",
292
+ hover_name="country", # column to add to hover information
293
+ hover_data=["jumps"],
294
+ color_continuous_scale ="OrRd_r",
295
+ projection='natural earth',
296
+ template="plotly_dark")
297
+ # remove the legend
298
+ total_map.update_layout(showlegend=False)
299
+ total_map.update(layout_coloraxis_showscale=False)
300
+
301
+ county_df = df.copy()
302
+ county_df['county'] = county_df['city'].map(city_county_map)
303
+ county_df['count_name'] = county_df['city'].map(city_county_name_map)
304
+ county_df = county_df.groupby(['county', 'count_name']).sum().reset_index()
305
+ county_df['rank'] = county_df['jumps'].rank(ascending=False)
306
+ county_df['county'] = county_df['county'].astype(int)
307
+ county_df['county'] = county_df['county'].astype(str).str.zfill(5) # county codes are two digits for state, three for county
308
+ county_map = px.choropleth(county_df, geojson=counties, locations='county', color='rank',
309
+ color_continuous_scale="OrRd_r",
310
+ scope="usa",
311
+ hover_name="count_name",
312
+ hover_data=["jumps"],
313
+ template="plotly_dark"
314
+ )
315
+ county_map.update_layout(showlegend=False)
316
+ county_map.update(layout_coloraxis_showscale=False)
317
+
318
+ df = df.groupby(['day', 'continent']).sum().reset_index()
319
+ df = df.sort_values(by=['day'])
320
+ df['total_jumps'] = df.groupby('continent')['jumps'].cumsum()
321
+ # fill in any missing days with current max value
322
+ for day in range(1, int(df['day'].max()) + 1):
323
+ for continent in df['continent'].unique():
324
+ if not df[(df['day'] == day) & (df['continent'] == continent)].any().any():
325
+ max_jumps = df[(df['day'] < day) & (df['continent'] == continent)]['total_jumps'].max()
326
+ df = pd.concat([df, pd.DataFrame([[day, continent, max_jumps]], columns=['day', 'continent', 'total_jumps'])])
327
+ #df = df.append({'day': day, 'continent': continent, 'total_jumps': max_jumps}, ignore_index=True)
328
+ df = df.sort_values(by=['day']).reset_index(drop=True)
329
+ jumps_over_time = px.area(df, x='day',
330
+ y='total_jumps',
331
+ color='continent',
332
+ template="plotly_dark")
333
+ df.fillna(0, inplace=True)
334
+ daily_df = df.groupby(['day'])[['jumps']].sum().reset_index()
335
+ per_day_plot = px.scatter(daily_df, x='day', y='jumps',
336
+ trendline='rolling',
337
+ trendline_options=dict(window=14),
338
+ trendline_color_override="goldenrod",
339
+ trendline_scope='overall',
340
+ template="plotly_dark")
341
+
342
+ return f"# {total_jumps:,} total jumps in {unique_cities:,} cities across {unique_countries:,} countries", \
343
+ total, total_weekly, avg, avg_weekly, total_map, icicle, jumps_over_time, pop_events, county_map, per_day_plot
344
+
345
+
346
+ with gr.Blocks() as demo:
347
+ with gr.Row():
348
+ total_jumps_label = gr.Markdown("Total Jumps: 0")
349
+ with gr.Row():
350
+ map_fig = gr.Plot(label="Map")
351
+ with gr.Row():
352
+ jumps_over_time = gr.Plot(label="Jumps Over Time")
353
+ with gr.Row():
354
+ total_plot = gr.Plot(label="Top Countries (All Time)")
355
+ with gr.Row():
356
+ total_plot_weekly = gr.Plot(label="Top Countries (This Week)")
357
+ with gr.Row():
358
+ avg_plot = gr.Plot(label="Top Cities (All Time)")
359
+ with gr.Row():
360
+ avg_plot_weekly = gr.Plot(label="Top Cities (This Week)")
361
+ with gr.Row():
362
+ icicle_fig = gr.Plot(label="Treemap")
363
+ with gr.Row():
364
+ per_day_plot = gr.Plot(label="Jumps per Day")
365
+ with gr.Row():
366
+ county_map = gr.Plot(label="US Map")
367
+ with gr.Row():
368
+ popular_events = gr.Plot(label="Popular Events")
369
+
370
+ outputs = [total_jumps_label, total_plot, total_plot_weekly, avg_plot, avg_plot_weekly, map_fig, icicle_fig, jumps_over_time, popular_events, county_map, per_day_plot]
371
+ dep = demo.load(full_report, None, outputs)
372
+
373
+ if __name__ == "__main__":
374
  demo.launch(share=False)