File size: 10,404 Bytes
f8fbbae
 
 
 
 
 
 
 
 
 
 
 
64e560c
f8fbbae
a638a7e
f8fbbae
 
 
 
a638a7e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f8fbbae
64e560c
 
 
 
4520d60
64e560c
4520d60
 
 
 
 
 
 
64e560c
 
 
 
 
5aeaeae
d30b80e
 
64e560c
f8fbbae
5aeaeae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4520d60
5aeaeae
 
 
 
 
 
4520d60
5aeaeae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4520d60
5aeaeae
 
 
 
 
 
f8fbbae
4896170
661eb89
4520d60
661eb89
 
a638a7e
5754989
4520d60
 
661eb89
4520d60
 
a638a7e
f8fbbae
25e57be
 
 
 
 
 
 
 
 
 
 
a638a7e
25e57be
 
 
 
 
 
 
 
 
 
 
661eb89
25e57be
 
 
 
 
 
a638a7e
25e57be
 
b3d7da6
a638a7e
6f7a661
a638a7e
eec4d7a
a638a7e
fd1a344
4520d60
882717c
a638a7e
882717c
 
 
a638a7e
64e560c
 
 
 
6f7a661
4520d60
 
eec4d7a
4520d60
64e560c
 
fd1a344
64e560c
 
4896170
 
64e560c
90f7110
882717c
90f7110
5754989
 
 
 
 
e585bcf
 
5754989
 
882717c
 
 
0ea7c56
 
 
 
5754989
0ea7c56
 
 
882717c
661eb89
f8fbbae
 
 
 
 
a638a7e
 
882717c
 
a638a7e
b3d7da6
882717c
b3d7da6
661eb89
 
 
 
f8fbbae
0ea7c56
a638a7e
f8fbbae
 
a638a7e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
from google.analytics.data_v1beta import BetaAnalyticsDataClient
from google.analytics.data_v1beta.types import (
    DateRange,
    Dimension,
    Metric,
    RunReportRequest,
    RunRealtimeReportRequest
)

import gradio as gr
import os
import json
import time
import pandas as pd
import plotly.express as px

FINISHED_EXERCISE = 'finished_exercise'
PROPERTY_ID = "384068977"

try:
    credentials_json = os.environ['GOOGLE_APPLICATION_CREDENTIALS_JSON']
    credentials_dict = json.loads(credentials_json)
    # write json to file
    with open('credentials.json', 'w') as f:
        json.dump(credentials_dict, f)
    # set env var to filename
    os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = os.path.join(os.path.dirname(__file__), 'credentials.json')
except KeyError:  # running locally
    pass

iso = pd.read_csv('iso.tsv', sep='\t')
iso['Alpha-2 code'] = iso['Alpha-2 code'].str.strip()
iso['Alpha-3 code'] = iso['Alpha-3 code'].str.strip()
iso.set_index('Alpha-2 code', inplace=True)
alpha_2_map = iso['Alpha-3 code'].to_dict()

# read counties json file
with open('counties.json') as f:
    counties = json.load(f)

ga_cities = pd.read_csv('cities.csv')
cities = pd.read_csv('uscities.csv')
cities['full_city'] = cities['city'] + ', ' + cities['state_name']
cities.set_index('full_city', inplace=True)
ga_cities['Criteria ID'] = ga_cities['Criteria ID'].astype(str)
ga_cities.set_index('Criteria ID', inplace=True)
ga_city_map = ga_cities['Name'].to_dict()
ga_cities['state'] = ga_cities['Canonical Name'].str.split(',').str[1].str.strip()
ga_state_map = ga_cities['state'].to_dict()
city_county_map = cities['county_fips'].to_dict()
city_county_name_map = cities['county_name'].to_dict()

cached_report = None
cache_time = 0
reload_cache = False
# 6 hours
reload_every = 6 * 60 * 60

def full_report():
    global cached_report, cache_time, reload_cache
    if time.time() - cache_time > reload_every:
        reload_cache = False
    if not reload_cache:
        print("Loading report...")
        reload_cache = True
        cache_time = time.time()
        client = BetaAnalyticsDataClient()

        request = RunReportRequest(
            property=f"properties/{PROPERTY_ID}",
            dimensions=[Dimension(name="nthDay"),
                        Dimension(name='eventName'),
                        Dimension(name="continent"),
                        Dimension(name="country"),
                        Dimension(name="countryId"),
                        Dimension(name="cityId")],
            metrics=[Metric(name="eventValue")],
            #return_property_quota=True,
            date_ranges=[DateRange(start_date="2023-06-30", end_date="today")],
        )
        response = client.run_report(request)

        res = {'day': [], 'jumps': [], 'continent': [], 'country': [], 'iso': [], 'cityId': []}

        for row in response.rows:
            event_name = row.dimension_values[1].value
            if event_name == FINISHED_EXERCISE:
                day = int(row.dimension_values[0].value)
                continent = row.dimension_values[2].value
                country = row.dimension_values[3].value
                country_iso = row.dimension_values[4].value
                city = row.dimension_values[5].value
                event_value = float(row.metric_values[0].value)
                res['day'].append(day)
                res['jumps'].append(event_value)
                res['continent'].append(continent)
                res['country'].append(country)
                res['iso'].append(country_iso)
                res['cityId'].append(city)

        df = pd.DataFrame.from_dict(res)
        cached_report = df.copy(deep=True)
    else:
        print("Using cached report...")
        df = cached_report.copy(deep=True)

    total_jumps = int(df['jumps'].sum())
    unique_countries = df['country'].nunique()
    unique_cities = df['cityId'].nunique()

    print(f"Total jumps: {total_jumps}, unique countries: {unique_countries}, unique cities: {unique_cities}")
    df['iso'] = df['iso'].map(alpha_2_map)
    df['jumps'] = df['jumps'].astype(int)
    df['city'] = df['cityId'].map(ga_city_map)
    df['state'] = df['cityId'].map(ga_state_map)
    df['city'] = df.apply(lambda row: row['city'] if row['country'] != 'Bermuda' else 'Hamilton', axis=1)
    df['city'] = df['city'] + ', ' + df['state']

    country_df = df.groupby(['country', 'iso']).sum().reset_index()
    country_df = country_df.sort_values(by=['jumps'], ascending=False)
    top_10_countries = country_df.iloc[:10]['country'].tolist()

    country_df_to_plot = df.groupby(['country', 'iso', 'city']).sum().reset_index()
    country_df_to_plot = country_df_to_plot[country_df_to_plot['country'].isin(top_10_countries)].reset_index(drop=True)
    country_df_to_plot = country_df_to_plot.sort_values(by=['day', 'jumps'], ascending=True)
    total = px.bar(country_df_to_plot, 
                   y='country', x='jumps', 
                   color='city',
                   title='Total Jumps by Country/City', 
                   orientation='h',
                   category_orders={'country': top_10_countries},
                   template="plotly_dark")
    total.update_layout(showlegend=False)

    city_df = df.groupby(['city', 'iso']).sum().reset_index()
    city_df = city_df.sort_values(by=['jumps'], ascending=False)
    city_df['city'] = city_df.apply(lambda row: row['city'] + ', ' + row['iso'], axis=1)
    top_10_cities = city_df.iloc[:10]['city'].tolist()

    city_df = df.groupby(['city', 'iso', 'day']).sum().reset_index()
    city_df = city_df[city_df['city'] != '(not set)']
    city_df['city'] = city_df.apply(lambda row: row['city'] + ', ' + row['iso'], axis=1)
    city_df = city_df[city_df['city'].isin(top_10_cities)].reset_index(drop=True)
    city_df = city_df.sort_values(by=['day', 'jumps'], ascending=True)

    avg = px.bar(city_df, 
                 y='city', x='jumps', color='day',
                 title='Total Jumps by City/Day', 
                 orientation='h',
                 category_orders={'city': top_10_cities},
                 template="plotly_dark")
    
    avg.update_layout(showlegend=False)
    avg.update(layout_coloraxis_showscale=False)

    country_df['rank'] = country_df['jumps'].rank(ascending=False)
    total_map = px.choropleth(country_df, locations="iso",
                                color="rank", 
                                hover_name="country", # column to add to hover information
                                hover_data=["jumps"],
                                color_continuous_scale ="OrRd_r",
                                projection='natural earth',
                                template="plotly_dark")
    # remove the legend
    total_map.update_layout(showlegend=False)
    total_map.update(layout_coloraxis_showscale=False)

    county_df = df.copy()
    county_df['county'] = county_df['city'].map(city_county_map)
    county_df['count_name'] = county_df['city'].map(city_county_name_map)
    county_df = county_df.groupby(['county', 'count_name']).sum().reset_index()
    county_df['rank'] = county_df['jumps'].rank(ascending=False)
    county_df['county'] = county_df['county'].astype(int)
    county_df['county'] = county_df['county'].astype(str).str.zfill(5)  # county codes are two digits for state, three for county
    county_map = px.choropleth(county_df, geojson=counties, locations='county', color='rank',
                           color_continuous_scale="OrRd_r",
                           scope="usa",
                           hover_name="count_name",
                            hover_data=["jumps"],
                           template="plotly_dark"
                          )
    county_map.update_layout(showlegend=False)
    county_map.update(layout_coloraxis_showscale=False)

    df = df.groupby(['day', 'continent']).sum().reset_index()
    df = df.sort_values(by=['day'])
    df['total_jumps'] = df.groupby('continent')['jumps'].cumsum()
    # fill in any missing days with current max value
    for day in range(1, df['day'].max() + 1):
        for continent in df['continent'].unique():
            if not df[(df['day'] == day) & (df['continent'] == continent)].any().any():
                max_jumps = df[(df['day'] < day) & (df['continent'] == continent)]['total_jumps'].max()
                df = pd.concat([df, pd.DataFrame([[day, continent, max_jumps]], columns=['day', 'continent', 'total_jumps'])])
                #df = df.append({'day': day, 'continent': continent, 'total_jumps': max_jumps}, ignore_index=True)
    df = df.sort_values(by=['day']).reset_index(drop=True)
    jumps_over_time = px.area(df, x='day', 
                              y='total_jumps', 
                              color='continent', 
                              template="plotly_dark")
    
    daily_df = df.groupby(['day']).sum().reset_index()
    per_day_plot = px.scatter(daily_df, x='day', y='jumps', 
                              trendline='rolling',
                              trendline_options=dict(window=14),
                              trendline_color_override="goldenrod",
                              trendline_scope='overall',
                              template="plotly_dark")

    return f"# {total_jumps:,} total jumps in {unique_cities:,} cities across {unique_countries:,} countries", total, avg, total_map, jumps_over_time, county_map, per_day_plot


with gr.Blocks(theme='WeixuanYuan/Soft_dark') as demo:
    with gr.Row():
        total_jumps_label = gr.Markdown("Total Jumps: 0")
    with gr.Row():
        map_fig = gr.Plot(label="Map")
    with gr.Row():
        jumps_over_time = gr.Plot(label="Jumps Over Time")
    with gr.Row():
        total_plot = gr.Plot(label="Top 10 Countries")
    with gr.Row():
        avg_plot = gr.Plot(label="Top 10 Cities")
    with gr.Row():
        per_day_plot = gr.Plot(label="Jumps per Day")
    with gr.Row():
        county_map = gr.Plot(label="US Map")

    outputs = [total_jumps_label, total_plot, avg_plot, map_fig, jumps_over_time, county_map, per_day_plot]
    dep = demo.load(full_report, None, outputs)

if __name__ == "__main__":
    demo.launch(share=False)