dylan-plummer commited on
Commit
ad698fe
·
1 Parent(s): ae86a17

add exercise info and update treemap

Browse files
Files changed (1) hide show
  1. app.py +78 -8
app.py CHANGED
@@ -12,7 +12,9 @@ import os
12
  import json
13
  import time
14
  import math
 
15
  import pandas as pd
 
16
  import plotly.express as px
17
 
18
  FINISHED_EXERCISE = 'finished_exercise'
@@ -60,6 +62,15 @@ reload_cache = False
60
  # 6 hours
61
  reload_every = 6 * 60 * 60
62
 
 
 
 
 
 
 
 
 
 
63
  def full_report():
64
  global cached_report, cache_time, reload_cache
65
  if time.time() - cache_time > reload_every:
@@ -70,6 +81,7 @@ def full_report():
70
  cache_time = time.time()
71
  client = BetaAnalyticsDataClient()
72
 
 
73
  request = RunReportRequest(
74
  property=f"properties/{PROPERTY_ID}",
75
  dimensions=[Dimension(name="nthDay"),
@@ -77,14 +89,15 @@ def full_report():
77
  Dimension(name="continent"),
78
  Dimension(name="country"),
79
  Dimension(name="countryId"),
80
- Dimension(name="cityId")],
 
81
  metrics=[Metric(name="eventValue")],
82
  #return_property_quota=True,
83
  date_ranges=[DateRange(start_date="2023-06-30", end_date="today")],
84
  )
85
  response = client.run_report(request)
86
 
87
- res = {'day': [], 'jumps': [], 'continent': [], 'country': [], 'iso': [], 'cityId': []}
88
 
89
  for row in response.rows:
90
  event_name = row.dimension_values[1].value
@@ -94,6 +107,7 @@ def full_report():
94
  country = row.dimension_values[3].value
95
  country_iso = row.dimension_values[4].value
96
  city = row.dimension_values[5].value
 
97
  event_value = float(row.metric_values[0].value)
98
  res['day'].append(day)
99
  res['jumps'].append(event_value)
@@ -101,8 +115,51 @@ def full_report():
101
  res['country'].append(country)
102
  res['iso'].append(country_iso)
103
  res['cityId'].append(city)
 
104
 
105
  df = pd.DataFrame.from_dict(res)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  cached_report = df.copy(deep=True)
107
  else:
108
  print("Using cached report...")
@@ -130,6 +187,16 @@ def full_report():
130
  df['rank'] = df['jumps'].rank(ascending=False)
131
  df['world'] = 'Earth'
132
 
 
 
 
 
 
 
 
 
 
 
133
  total = px.bar(country_df_to_plot,
134
  y='country', x='jumps',
135
  color='country',
@@ -173,14 +240,15 @@ def full_report():
173
  # color='jumps')
174
 
175
  print(df.columns)
176
-
177
  icicle = px.treemap(icicle_df, path=['world', 'continent', 'country', 'state', 'city'],
178
  values='jumps',
179
  title='Jumps by Continent/Country/City (click anywhere!)',
180
  template="plotly_dark",
181
- color_continuous_scale='jet',
182
  branchvalues='total',
183
  height=800,
 
184
  color='jumps')
185
 
186
  city_df = df.groupby(['city', 'iso']).sum().reset_index()
@@ -250,7 +318,7 @@ def full_report():
250
  df = df.sort_values(by=['day'])
251
  df['total_jumps'] = df.groupby('continent')['jumps'].cumsum()
252
  # fill in any missing days with current max value
253
- for day in range(1, df['day'].max() + 1):
254
  for continent in df['continent'].unique():
255
  if not df[(df['day'] == day) & (df['continent'] == continent)].any().any():
256
  max_jumps = df[(df['day'] < day) & (df['continent'] == continent)]['total_jumps'].max()
@@ -271,10 +339,10 @@ def full_report():
271
  template="plotly_dark")
272
 
273
  return f"# {total_jumps:,} total jumps in {unique_cities:,} cities across {unique_countries:,} countries", \
274
- total, total_weekly, avg, avg_weekly, total_map, icicle, jumps_over_time, county_map, per_day_plot
275
 
276
 
277
- with gr.Blocks(theme='WeixuanYuan/Soft_dark') as demo:
278
  with gr.Row():
279
  total_jumps_label = gr.Markdown("Total Jumps: 0")
280
  with gr.Row():
@@ -295,8 +363,10 @@ with gr.Blocks(theme='WeixuanYuan/Soft_dark') as demo:
295
  per_day_plot = gr.Plot(label="Jumps per Day")
296
  with gr.Row():
297
  county_map = gr.Plot(label="US Map")
 
 
298
 
299
- outputs = [total_jumps_label, total_plot, total_plot_weekly, avg_plot, avg_plot_weekly, map_fig, icicle_fig, jumps_over_time, county_map, per_day_plot]
300
  dep = demo.load(full_report, None, outputs)
301
 
302
  if __name__ == "__main__":
 
12
  import json
13
  import time
14
  import math
15
+ import numpy as np
16
  import pandas as pd
17
+ import matplotlib.pyplot as plt
18
  import plotly.express as px
19
 
20
  FINISHED_EXERCISE = 'finished_exercise'
 
62
  # 6 hours
63
  reload_every = 6 * 60 * 60
64
 
65
+ def mpl_to_plotly(cmap, pl_entries=11, rdigits=2):
66
+ # cmap - colormap
67
+ # pl_entries - int = number of Plotly colorscale entries
68
+ # rdigits - int -=number of digits for rounding scale values
69
+ scale = np.linspace(0, 1, pl_entries)
70
+ colors = (cmap(scale)[:, :3]*255).astype(np.uint8)
71
+ pl_colorscale = [[round(s, rdigits), f'rgb{tuple(color)}'] for s, color in zip(scale, colors)]
72
+ return pl_colorscale
73
+
74
  def full_report():
75
  global cached_report, cache_time, reload_cache
76
  if time.time() - cache_time > reload_every:
 
81
  cache_time = time.time()
82
  client = BetaAnalyticsDataClient()
83
 
84
+ # first request all data where we have the exercise name
85
  request = RunReportRequest(
86
  property=f"properties/{PROPERTY_ID}",
87
  dimensions=[Dimension(name="nthDay"),
 
89
  Dimension(name="continent"),
90
  Dimension(name="country"),
91
  Dimension(name="countryId"),
92
+ Dimension(name="cityId"),
93
+ Dimension(name="customEvent:exercise")],
94
  metrics=[Metric(name="eventValue")],
95
  #return_property_quota=True,
96
  date_ranges=[DateRange(start_date="2023-06-30", end_date="today")],
97
  )
98
  response = client.run_report(request)
99
 
100
+ res = {'day': [], 'jumps': [], 'continent': [], 'country': [], 'iso': [], 'cityId': [], 'exercise': []}
101
 
102
  for row in response.rows:
103
  event_name = row.dimension_values[1].value
 
107
  country = row.dimension_values[3].value
108
  country_iso = row.dimension_values[4].value
109
  city = row.dimension_values[5].value
110
+ exercise = row.dimension_values[6].value
111
  event_value = float(row.metric_values[0].value)
112
  res['day'].append(day)
113
  res['jumps'].append(event_value)
 
115
  res['country'].append(country)
116
  res['iso'].append(country_iso)
117
  res['cityId'].append(city)
118
+ res['exercise'].append(exercise)
119
 
120
  df = pd.DataFrame.from_dict(res)
121
+ # then find the earliest day we started getting exercise name data
122
+ first_day = int(df['day'].min())
123
+ end_date = pd.to_datetime('2023-06-30') + pd.DateOffset(days=first_day)
124
+ # only need YYY-MM-DD
125
+ end_date = str(end_date.strftime('%Y-%m-%d'))
126
+ # then request all data where we don't have the exercise name
127
+ request = RunReportRequest(
128
+ property=f"properties/{PROPERTY_ID}",
129
+ dimensions=[Dimension(name="nthDay"),
130
+ Dimension(name='eventName'),
131
+ Dimension(name="continent"),
132
+ Dimension(name="country"),
133
+ Dimension(name="countryId"),
134
+ Dimension(name="cityId")],
135
+ metrics=[Metric(name="eventValue")],
136
+ #return_property_quota=True,
137
+ date_ranges=[DateRange(start_date="2023-06-30", end_date=end_date)],
138
+ )
139
+ response = client.run_report(request)
140
+ res = {'day': [], 'jumps': [], 'continent': [], 'country': [], 'iso': [], 'cityId': [], 'exercise': []}
141
+ for row in response.rows:
142
+ event_name = row.dimension_values[1].value
143
+ if event_name == FINISHED_EXERCISE:
144
+ day = int(row.dimension_values[0].value)
145
+ continent = row.dimension_values[2].value
146
+ country = row.dimension_values[3].value
147
+ country_iso = row.dimension_values[4].value
148
+ city = row.dimension_values[5].value
149
+ event_value = float(row.metric_values[0].value)
150
+ res['day'].append(day)
151
+ res['jumps'].append(event_value)
152
+ res['continent'].append(continent)
153
+ res['country'].append(country)
154
+ res['iso'].append(country_iso)
155
+ res['cityId'].append(city)
156
+ res['exercise'].append('n/a')
157
+ new_df = pd.DataFrame.from_dict(res)
158
+ # drop any rows we already have
159
+ #new_df = new_df[new_df['day'] < first_day]
160
+ df = pd.concat([df, new_df]).reset_index(drop=True)
161
+ df['duration'] = df['exercise'].apply(lambda x: 0 if x in ['n/a', '(not set)'] else int(x[2:]))
162
+ print(df['duration'].sum())
163
  cached_report = df.copy(deep=True)
164
  else:
165
  print("Using cached report...")
 
187
  df['rank'] = df['jumps'].rank(ascending=False)
188
  df['world'] = 'Earth'
189
 
190
+ exercise_df = df[~df['exercise'].isin(['n/a', '(not set)'])]
191
+ # plot a bar graph of the most popular exercises and their counts in the dataset
192
+ pop_events = px.bar(exercise_df['exercise'].value_counts().reset_index()[:6],
193
+ y='index',
194
+ x='exercise',
195
+ color='index',
196
+ title='Most Popular Exercises',
197
+ template="plotly_dark")
198
+ pop_events.update_layout(showlegend=False)
199
+
200
  total = px.bar(country_df_to_plot,
201
  y='country', x='jumps',
202
  color='country',
 
240
  # color='jumps')
241
 
242
  print(df.columns)
243
+ nipy_spec = mpl_to_plotly(plt.cm.nipy_spectral, pl_entries=15)
244
  icicle = px.treemap(icicle_df, path=['world', 'continent', 'country', 'state', 'city'],
245
  values='jumps',
246
  title='Jumps by Continent/Country/City (click anywhere!)',
247
  template="plotly_dark",
248
+ color_continuous_scale=nipy_spec,
249
  branchvalues='total',
250
  height=800,
251
+ maxdepth=4,
252
  color='jumps')
253
 
254
  city_df = df.groupby(['city', 'iso']).sum().reset_index()
 
318
  df = df.sort_values(by=['day'])
319
  df['total_jumps'] = df.groupby('continent')['jumps'].cumsum()
320
  # fill in any missing days with current max value
321
+ for day in range(1, int(df['day'].max()) + 1):
322
  for continent in df['continent'].unique():
323
  if not df[(df['day'] == day) & (df['continent'] == continent)].any().any():
324
  max_jumps = df[(df['day'] < day) & (df['continent'] == continent)]['total_jumps'].max()
 
339
  template="plotly_dark")
340
 
341
  return f"# {total_jumps:,} total jumps in {unique_cities:,} cities across {unique_countries:,} countries", \
342
+ total, total_weekly, avg, avg_weekly, total_map, icicle, jumps_over_time, pop_events, county_map, per_day_plot
343
 
344
 
345
+ with gr.Blocks() as demo:
346
  with gr.Row():
347
  total_jumps_label = gr.Markdown("Total Jumps: 0")
348
  with gr.Row():
 
363
  per_day_plot = gr.Plot(label="Jumps per Day")
364
  with gr.Row():
365
  county_map = gr.Plot(label="US Map")
366
+ with gr.Row():
367
+ popular_events = gr.Plot(label="Popular Events")
368
 
369
+ outputs = [total_jumps_label, total_plot, total_plot_weekly, avg_plot, avg_plot_weekly, map_fig, icicle_fig, jumps_over_time, popular_events, county_map, per_day_plot]
370
  dep = demo.load(full_report, None, outputs)
371
 
372
  if __name__ == "__main__":