Spaces:

dylanplummer
/

NextJump-analytics

Running

App Files Files Community

dylanplummer commited on Jan 20

Commit

d955465

verified ·

1 Parent(s): ee4701d

Update app.py

Browse files

Files changed (1) hide show

app.py +373 -372

app.py CHANGED Viewed

@@ -1,373 +1,374 @@
-from google.analytics.data_v1beta import BetaAnalyticsDataClient
-from google.analytics.data_v1beta.types import (
-    DateRange,
-    Dimension,
-    Metric,
-    RunReportRequest,
-    RunRealtimeReportRequest
-)
-import gradio as gr
-import os
-import json
-import time
-import math
-import numpy as np
-import pandas as pd
-import matplotlib.pyplot as plt
-import plotly.express as px
-FINISHED_EXERCISE = 'finished_exercise'
-PROPERTY_ID = "384068977"
-try:
-    credentials_json = os.environ['GOOGLE_APPLICATION_CREDENTIALS_JSON']
-    credentials_dict = json.loads(credentials_json)
-    # write json to file
-    with open('credentials.json', 'w') as f:
-        json.dump(credentials_dict, f)
-    # set env var to filename
-    os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = os.path.join(os.path.dirname(__file__), 'credentials.json')
-except KeyError:  # running locally
-    pass
-except Exception as e:
-    print(f"Error loading credentials: {e}")
-    pass
-iso = pd.read_csv('iso.tsv', sep='\t')
-iso['Alpha-2 code'] = iso['Alpha-2 code'].str.strip()
-iso['Alpha-3 code'] = iso['Alpha-3 code'].str.strip()
-iso.set_index('Alpha-2 code', inplace=True)
-alpha_2_map = iso['Alpha-3 code'].to_dict()
-# read counties json file
-with open('counties.json') as f:
-    counties = json.load(f)
-ga_cities = pd.read_csv('cities.csv')
-cities = pd.read_csv('uscities.csv')
-cities['full_city'] = cities['city'] + ', ' + cities['state_name']
-cities.set_index('full_city', inplace=True)
-ga_cities['Criteria ID'] = ga_cities['Criteria ID'].astype(str)
-ga_cities.set_index('Criteria ID', inplace=True)
-ga_city_map = ga_cities['Name'].to_dict()
-ga_cities['state'] = ga_cities['Canonical Name'].str.split(',').str[1].str.strip()
-ga_state_map = ga_cities['state'].to_dict()
-city_county_map = cities['county_fips'].to_dict()
-city_county_name_map = cities['county_name'].to_dict()
-cached_report = None
-cache_time = 0
-reload_cache = False
-# 6 hours
-reload_every = 6 * 60 * 60
-def mpl_to_plotly(cmap, pl_entries=11, rdigits=2):
-    # cmap - colormap
-    # pl_entries - int = number of Plotly colorscale entries
-    # rdigits - int -=number of digits for rounding scale values
-    scale = np.linspace(0, 1, pl_entries)
-    colors = (cmap(scale)[:, :3]*255).astype(np.uint8)
-    pl_colorscale = [[round(s, rdigits), f'rgb{tuple(color)}'] for s, color in zip(scale, colors)]
-    return pl_colorscale
-def full_report():
-    global cached_report, cache_time, reload_cache
-    if time.time() - cache_time > reload_every:
-        reload_cache = False
-    if not reload_cache:
-        print("Loading report...")
-        reload_cache = True
-        cache_time = time.time()
-        client = BetaAnalyticsDataClient()
-        # first request all data where we have the exercise name
-        request = RunReportRequest(
-            property=f"properties/{PROPERTY_ID}",
-            dimensions=[Dimension(name="nthDay"),
-                        Dimension(name='eventName'),
-                        Dimension(name="continent"),
-                        Dimension(name="country"),
-                        Dimension(name="countryId"),
-                        Dimension(name="cityId"),
-                        Dimension(name="customEvent:exercise")],
-            metrics=[Metric(name="eventValue")],
-            #return_property_quota=True,
-            date_ranges=[DateRange(start_date="2023-06-30", end_date="today")],
-        )
-        response = client.run_report(request)
-        res = {'day': [], 'jumps': [], 'continent': [], 'country': [], 'iso': [], 'cityId': [], 'exercise': []}
-        for row in response.rows:
-            event_name = row.dimension_values[1].value
-            if event_name == FINISHED_EXERCISE:
-                day = int(row.dimension_values[0].value)
-                continent = row.dimension_values[2].value
-                country = row.dimension_values[3].value
-                country_iso = row.dimension_values[4].value
-                city = row.dimension_values[5].value
-                exercise = row.dimension_values[6].value
-                event_value = float(row.metric_values[0].value)
-                res['day'].append(day)
-                res['jumps'].append(event_value)
-                res['continent'].append(continent)
-                res['country'].append(country)
-                res['iso'].append(country_iso)
-                res['cityId'].append(city)
-                res['exercise'].append(exercise)
-        df = pd.DataFrame.from_dict(res)
-        # then find the earliest day we started getting exercise name data
-        first_day = int(df['day'].min())
-        end_date = pd.to_datetime('2023-06-30') + pd.DateOffset(days=first_day)
-        # only need YYY-MM-DD
-        end_date = str(end_date.strftime('%Y-%m-%d'))
-        # then request all data where we don't have the exercise name
-        request = RunReportRequest(
-            property=f"properties/{PROPERTY_ID}",
-            dimensions=[Dimension(name="nthDay"),
-                        Dimension(name='eventName'),
-                        Dimension(name="continent"),
-                        Dimension(name="country"),
-                        Dimension(name="countryId"),
-                        Dimension(name="cityId")],
-            metrics=[Metric(name="eventValue")],
-            #return_property_quota=True,
-            date_ranges=[DateRange(start_date="2023-06-30", end_date=end_date)],
-        )
-        response = client.run_report(request)
-        res = {'day': [], 'jumps': [], 'continent': [], 'country': [], 'iso': [], 'cityId': [], 'exercise': []}
-        for row in response.rows:
-            event_name = row.dimension_values[1].value
-            if event_name == FINISHED_EXERCISE:
-                day = int(row.dimension_values[0].value)
-                continent = row.dimension_values[2].value
-                country = row.dimension_values[3].value
-                country_iso = row.dimension_values[4].value
-                city = row.dimension_values[5].value
-                event_value = float(row.metric_values[0].value)
-                res['day'].append(day)
-                res['jumps'].append(event_value)
-                res['continent'].append(continent)
-                res['country'].append(country)
-                res['iso'].append(country_iso)
-                res['cityId'].append(city)
-                res['exercise'].append('n/a')
-        new_df = pd.DataFrame.from_dict(res)
-        # drop any rows we already have
-        #new_df = new_df[new_df['day'] < first_day]
-        df = pd.concat([df, new_df]).reset_index(drop=True)
-        df['duration'] = df['exercise'].apply(lambda x: 0 if x in ['n/a', '(not set)'] else int(x[2:]))
-        print(df['duration'].sum())
-        cached_report = df.copy(deep=True)
-    else:
-        print("Using cached report...")
-        df = cached_report.copy(deep=True)
-    total_jumps = int(df['jumps'].sum())
-    unique_countries = df['country'].nunique()
-    unique_cities = df['cityId'].nunique()
-    print(f"Total jumps: {total_jumps}, unique countries: {unique_countries}, unique cities: {unique_cities}")
-    df['iso'] = df['iso'].map(alpha_2_map)
-    df['jumps'] = df['jumps'].astype(int)
-    df['city'] = df['cityId'].map(ga_city_map)
-    df['state'] = df['cityId'].map(ga_state_map)
-    df['city'] = df.apply(lambda row: row['city'] if row['country'] != 'Bermuda' else 'Hamilton', axis=1)
-    df['city'] = df['city'] + ', ' + df['state']
-    country_df = df.groupby(['country', 'iso']).sum().reset_index()
-    country_df = country_df.sort_values(by=['jumps'], ascending=False)
-    top_10_countries = country_df.iloc[:15]['country'].tolist()
-    country_df_to_plot = df.groupby(['country', 'iso']).sum().reset_index()
-    country_df_to_plot = country_df_to_plot[country_df_to_plot['country'].isin(top_10_countries)].reset_index(drop=True)
-    country_df_to_plot = country_df_to_plot.sort_values(by=['jumps'], ascending=True)
-    df['rank'] = df['jumps'].rank(ascending=False)
-    df['world'] = 'Earth'
-    exercise_df = df[~df['exercise'].isin(['n/a', '(not set)'])]
-    # plot a bar graph of the most popular exercises and their counts in the dataset
-    pop_events = px.bar(exercise_df['exercise'].value_counts().reset_index()[:6],
-                        y='index',
-                        x='exercise',
-                        color='index',
-                        title='Most Popular Exercises',
-                        template="plotly_dark")
-    pop_events.update_layout(showlegend=False)
-    total = px.bar(country_df_to_plot,
-                   y='country', x='jumps',
-                   color='country',
-                   title='Total Jumps by Country',
-                   orientation='h',
-                   category_orders={'country': top_10_countries},
-                   height=800,
-                   template="plotly_dark")
-    total.update_layout(showlegend=False)
-    country_df_to_plot_weekly = df[df['day'] >= df['day'].max() - 7].groupby(['country', 'iso']).sum().reset_index()
-    country_df_to_plot_weekly = country_df_to_plot_weekly.sort_values(by=['jumps'], ascending=False)
-    top_5_weekly = country_df_to_plot_weekly.iloc[:10]['country'].tolist()
-    country_df_to_plot_weekly = country_df_to_plot_weekly[country_df_to_plot_weekly['country'].isin(top_5_weekly)].reset_index(drop=True)
-    country_df_to_plot_weekly = country_df_to_plot_weekly.sort_values(by=['jumps'], ascending=True)
-    total_weekly = px.bar(country_df_to_plot_weekly,
-                   y='country', x='jumps',
-                   color='country',
-                   title='Top Countries This Week',
-                   orientation='h',
-                   category_orders={'country': top_5_weekly},
-                   height=500,
-                   template="plotly_dark")
-    total_weekly.update_layout(showlegend=False)
-    city_df = df.groupby(['city', 'iso']).sum().reset_index()
-    city_df = city_df.sort_values(by=['jumps'], ascending=False)
-    city_df['city'] = city_df.apply(lambda row: row['city'] + ', ' + row['iso'], axis=1)
-    top_10_cities = city_df.iloc[:15]['city'].tolist()
-    icicle_df = df.groupby(['world', 'continent', 'country', 'state', 'city']).sum().reset_index()
-    #icicle_df['log10_jumps'] = icicle_df['jumps'].apply(lambda x: math.log10(x) if x > 0 else 0)
-    # icicle = px.icicle(icicle_df, path=['world', 'continent', 'country', 'city'],
-    #                    values='jumps',
-    #                    title='Jumps by Continent/Country',
-    #                    template="plotly_dark",
-    #                    color_continuous_scale='OrRd',
-    #                    maxdepth=7,
-    #                    branchvalues='remainder',
-    #                    color='jumps')
-    print(df.columns)
-    nipy_spec = mpl_to_plotly(plt.cm.nipy_spectral, pl_entries=15)
-    icicle = px.treemap(icicle_df, path=['world', 'continent', 'country', 'state', 'city'],
-                       values='jumps',
-                       title='Jumps by Continent/Country/City (click anywhere!)',
-                       template="plotly_dark",
-                       color_continuous_scale=nipy_spec,
-                       branchvalues='total',
-                       height=800,
-                       maxdepth=4,
-                       color='jumps')
-    city_df = df.groupby(['city', 'iso']).sum().reset_index()
-    city_df = city_df[city_df['city'] != '(not set)']
-    city_df['city'] = city_df.apply(lambda row: row['city'] + ', ' + row['iso'], axis=1)
-    city_df = city_df[city_df['city'].isin(top_10_cities)].reset_index(drop=True)
-    city_df = city_df.sort_values(by=['jumps'], ascending=True)
-    avg = px.bar(city_df,
-                 y='city', x='jumps', color='city',
-                 title='Total Jumps by City',
-                 orientation='h',
-                 category_orders={'city': top_10_cities},
-                 height=800,
-                 template="plotly_dark")
-    city_df_weekly = df[df['day'] >= df['day'].max() - 7].groupby(['city', 'iso']).sum().reset_index()
-    city_df_weekly = city_df_weekly[city_df_weekly['city'] != '(not set)']
-    city_df_weekly['city'] = city_df_weekly.apply(lambda row: row['city'] + ', ' + row['iso'], axis=1)
-    city_df_weekly = city_df_weekly.sort_values(by=['jumps'], ascending=False)
-    top_5_weekly = city_df_weekly.iloc[:10]['city'].tolist()
-    city_df_weekly = city_df_weekly[city_df_weekly['city'].isin(top_5_weekly)].reset_index(drop=True)
-    city_df_weekly = city_df_weekly.sort_values(by=['jumps'], ascending=True)
-    avg_weekly = px.bar(city_df_weekly,
-                 y='city', x='jumps', color='city',
-                 title='Top Cities This Week',
-                 orientation='h',
-                 category_orders={'city': top_5_weekly},
-                 height=500,
-                 template="plotly_dark")
-    avg.update_layout(showlegend=False)
-    avg.update(layout_coloraxis_showscale=False)
-    avg_weekly.update_layout(showlegend=False)
-    avg_weekly.update(layout_coloraxis_showscale=False)
-    country_df['rank'] = country_df['jumps'].rank(ascending=False)
-    total_map = px.choropleth(country_df, locations="iso",
-                                color="rank",
-                                hover_name="country", # column to add to hover information
-                                hover_data=["jumps"],
-                                color_continuous_scale ="OrRd_r",
-                                projection='natural earth',
-                                template="plotly_dark")
-    # remove the legend
-    total_map.update_layout(showlegend=False)
-    total_map.update(layout_coloraxis_showscale=False)
-    county_df = df.copy()
-    county_df['county'] = county_df['city'].map(city_county_map)
-    county_df['count_name'] = county_df['city'].map(city_county_name_map)
-    county_df = county_df.groupby(['county', 'count_name']).sum().reset_index()
-    county_df['rank'] = county_df['jumps'].rank(ascending=False)
-    county_df['county'] = county_df['county'].astype(int)
-    county_df['county'] = county_df['county'].astype(str).str.zfill(5)  # county codes are two digits for state, three for county
-    county_map = px.choropleth(county_df, geojson=counties, locations='county', color='rank',
-                           color_continuous_scale="OrRd_r",
-                           scope="usa",
-                           hover_name="count_name",
-                            hover_data=["jumps"],
-                           template="plotly_dark"
-                          )
-    county_map.update_layout(showlegend=False)
-    county_map.update(layout_coloraxis_showscale=False)
-    df = df.groupby(['day', 'continent']).sum().reset_index()
-    df = df.sort_values(by=['day'])
-    df['total_jumps'] = df.groupby('continent')['jumps'].cumsum()
-    # fill in any missing days with current max value
-    for day in range(1, int(df['day'].max()) + 1):
-        for continent in df['continent'].unique():
-            if not df[(df['day'] == day) & (df['continent'] == continent)].any().any():
-                max_jumps = df[(df['day'] < day) & (df['continent'] == continent)]['total_jumps'].max()
-                df = pd.concat([df, pd.DataFrame([[day, continent, max_jumps]], columns=['day', 'continent', 'total_jumps'])])
-                #df = df.append({'day': day, 'continent': continent, 'total_jumps': max_jumps}, ignore_index=True)
-    df = df.sort_values(by=['day']).reset_index(drop=True)
-    jumps_over_time = px.area(df, x='day',
-                              y='total_jumps',
-                              color='continent',
-                              template="plotly_dark")
-    df.fillna(0, inplace=True)
-    daily_df = df.groupby(['day'])[['jumps']].sum().reset_index()
-    per_day_plot = px.scatter(daily_df, x='day', y='jumps',
-                              trendline='rolling',
-                              trendline_options=dict(window=14),
-                              trendline_color_override="goldenrod",
-                              trendline_scope='overall',
-                              template="plotly_dark")
-    return f"# {total_jumps:,} total jumps in {unique_cities:,} cities across {unique_countries:,} countries", \
-           total, total_weekly, avg, avg_weekly, total_map, icicle, jumps_over_time, pop_events, county_map, per_day_plot
-with gr.Blocks() as demo:
-    with gr.Row():
-        total_jumps_label = gr.Markdown("Total Jumps: 0")
-    with gr.Row():
-        map_fig = gr.Plot(label="Map")
-    with gr.Row():
-        jumps_over_time = gr.Plot(label="Jumps Over Time")
-    with gr.Row():
-        total_plot = gr.Plot(label="Top Countries (All Time)")
-    with gr.Row():
-        total_plot_weekly = gr.Plot(label="Top Countries (This Week)")
-    with gr.Row():
-        avg_plot = gr.Plot(label="Top Cities (All Time)")
-    with gr.Row():
-        avg_plot_weekly = gr.Plot(label="Top Cities (This Week)")
-    with gr.Row():
-        icicle_fig = gr.Plot(label="Treemap")
-    with gr.Row():
-        per_day_plot = gr.Plot(label="Jumps per Day")
-    with gr.Row():
-        county_map = gr.Plot(label="US Map")
-    with gr.Row():
-        popular_events = gr.Plot(label="Popular Events")
-    outputs = [total_jumps_label, total_plot, total_plot_weekly, avg_plot, avg_plot_weekly, map_fig, icicle_fig, jumps_over_time, popular_events, county_map, per_day_plot]
-    dep = demo.load(full_report, None, outputs)
-if __name__ == "__main__":
     demo.launch(share=False)

+from google.analytics.data_v1beta import BetaAnalyticsDataClient
+from google.analytics.data_v1beta.types import (
+    DateRange,
+    Dimension,
+    Metric,
+    RunReportRequest,
+    RunRealtimeReportRequest
+)
+import gradio as gr
+import os
+import json
+import time
+import math
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+import plotly.express as px
+FINISHED_EXERCISE = 'finished_exercise'
+PROPERTY_ID = "384068977"
+try:
+    credentials_json = os.environ['GOOGLE_APPLICATION_CREDENTIALS_JSON']
+    credentials_dict = json.loads(credentials_json)
+    # write json to file
+    with open('credentials.json', 'w') as f:
+        json.dump(credentials_dict, f)
+    # set env var to filename
+    os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = os.path.join(os.path.dirname(__file__), 'credentials.json')
+except KeyError:  # running locally
+    pass
+except Exception as e:
+    print(f"Error loading credentials: {e}")
+    pass
+iso = pd.read_csv('iso.tsv', sep='\t')
+iso['Alpha-2 code'] = iso['Alpha-2 code'].str.strip()
+iso['Alpha-3 code'] = iso['Alpha-3 code'].str.strip()
+iso.set_index('Alpha-2 code', inplace=True)
+alpha_2_map = iso['Alpha-3 code'].to_dict()
+# read counties json file
+with open('counties.json') as f:
+    counties = json.load(f)
+ga_cities = pd.read_csv('cities.csv')
+cities = pd.read_csv('uscities.csv')
+cities['full_city'] = cities['city'] + ', ' + cities['state_name']
+cities.set_index('full_city', inplace=True)
+ga_cities['Criteria ID'] = ga_cities['Criteria ID'].astype(str)
+ga_cities.set_index('Criteria ID', inplace=True)
+ga_city_map = ga_cities['Name'].to_dict()
+ga_cities['state'] = ga_cities['Canonical Name'].str.split(',').str[1].str.strip()
+ga_state_map = ga_cities['state'].to_dict()
+city_county_map = cities['county_fips'].to_dict()
+city_county_name_map = cities['county_name'].to_dict()
+cached_report = None
+cache_time = 0
+reload_cache = False
+# 6 hours
+reload_every = 6 * 60 * 60
+def mpl_to_plotly(cmap, pl_entries=11, rdigits=2):
+    # cmap - colormap
+    # pl_entries - int = number of Plotly colorscale entries
+    # rdigits - int -=number of digits for rounding scale values
+    scale = np.linspace(0, 1, pl_entries)
+    colors = (cmap(scale)[:, :3]*255).astype(np.uint8)
+    pl_colorscale = [[round(s, rdigits), f'rgb{tuple(color)}'] for s, color in zip(scale, colors)]
+    return pl_colorscale
+def full_report():
+    global cached_report, cache_time, reload_cache
+    if time.time() - cache_time > reload_every:
+        reload_cache = False
+    if not reload_cache:
+        print("Loading report...")
+        reload_cache = True
+        cache_time = time.time()
+        client = BetaAnalyticsDataClient()
+        # first request all data where we have the exercise name
+        request = RunReportRequest(
+            property=f"properties/{PROPERTY_ID}",
+            dimensions=[Dimension(name="nthDay"),
+                        Dimension(name='eventName'),
+                        Dimension(name="continent"),
+                        Dimension(name="country"),
+                        Dimension(name="countryId"),
+                        Dimension(name="cityId"),
+                        Dimension(name="customEvent:exercise")],
+            metrics=[Metric(name="eventValue")],
+            #return_property_quota=True,
+            date_ranges=[DateRange(start_date="2023-06-30", end_date="today")],
+        )
+        response = client.run_report(request)
+        res = {'day': [], 'jumps': [], 'continent': [], 'country': [], 'iso': [], 'cityId': [], 'exercise': []}
+        for row in response.rows:
+            event_name = row.dimension_values[1].value
+            if event_name == FINISHED_EXERCISE:
+                day = int(row.dimension_values[0].value)
+                continent = row.dimension_values[2].value
+                country = row.dimension_values[3].value
+                country_iso = row.dimension_values[4].value
+                city = row.dimension_values[5].value
+                exercise = row.dimension_values[6].value
+                event_value = float(row.metric_values[0].value)
+                res['day'].append(day)
+                res['jumps'].append(event_value)
+                res['continent'].append(continent)
+                res['country'].append(country)
+                res['iso'].append(country_iso)
+                res['cityId'].append(city)
+                res['exercise'].append(exercise)
+        df = pd.DataFrame.from_dict(res)
+        # then find the earliest day we started getting exercise name data
+        first_day = int(df['day'].min())
+        end_date = pd.to_datetime('2023-06-30') + pd.DateOffset(days=first_day)
+        # only need YYY-MM-DD
+        end_date = str(end_date.strftime('%Y-%m-%d'))
+        # then request all data where we don't have the exercise name
+        request = RunReportRequest(
+            property=f"properties/{PROPERTY_ID}",
+            dimensions=[Dimension(name="nthDay"),
+                        Dimension(name='eventName'),
+                        Dimension(name="continent"),
+                        Dimension(name="country"),
+                        Dimension(name="countryId"),
+                        Dimension(name="cityId")],
+            metrics=[Metric(name="eventValue")],
+            #return_property_quota=True,
+            date_ranges=[DateRange(start_date="2023-06-30", end_date=end_date)],
+        )
+        response = client.run_report(request)
+        res = {'day': [], 'jumps': [], 'continent': [], 'country': [], 'iso': [], 'cityId': [], 'exercise': []}
+        for row in response.rows:
+            event_name = row.dimension_values[1].value
+            if event_name == FINISHED_EXERCISE:
+                day = int(row.dimension_values[0].value)
+                continent = row.dimension_values[2].value
+                country = row.dimension_values[3].value
+                country_iso = row.dimension_values[4].value
+                city = row.dimension_values[5].value
+                event_value = float(row.metric_values[0].value)
+                res['day'].append(day)
+                res['jumps'].append(event_value)
+                res['continent'].append(continent)
+                res['country'].append(country)
+                res['iso'].append(country_iso)
+                res['cityId'].append(city)
+                res['exercise'].append('n/a')
+        new_df = pd.DataFrame.from_dict(res)
+        # drop any rows we already have
+        #new_df = new_df[new_df['day'] < first_day]
+        df = pd.concat([df, new_df]).reset_index(drop=True)
+        df['duration'] = df['exercise'].apply(lambda x: 0 if x in ['n/a', '(not set)'] else int(x[2:]))
+        print(df['duration'].sum())
+        cached_report = df.copy(deep=True)
+    else:
+        print("Using cached report...")
+        df = cached_report.copy(deep=True)
+    total_jumps = int(df['jumps'].sum())
+    unique_countries = df['country'].nunique()
+    unique_cities = df['cityId'].nunique()
+    print(f"Total jumps: {total_jumps}, unique countries: {unique_countries}, unique cities: {unique_cities}")
+    df['iso'] = df['iso'].map(alpha_2_map)
+    df['jumps'] = df['jumps'].astype(int)
+    df['city'] = df['cityId'].map(ga_city_map)
+    df['state'] = df['cityId'].map(ga_state_map)
+    df['city'] = df.apply(lambda row: row['city'] if row['country'] != 'Bermuda' else 'Hamilton', axis=1)
+    df['city'] = df['city'] + ', ' + df['state']
+    country_df = df.groupby(['country', 'iso']).sum().reset_index()
+    country_df = country_df.sort_values(by=['jumps'], ascending=False)
+    top_10_countries = country_df.iloc[:15]['country'].tolist()
+    country_df_to_plot = df.groupby(['country', 'iso']).sum().reset_index()
+    country_df_to_plot = country_df_to_plot[country_df_to_plot['country'].isin(top_10_countries)].reset_index(drop=True)
+    country_df_to_plot = country_df_to_plot.sort_values(by=['jumps'], ascending=True)
+    df['rank'] = df['jumps'].rank(ascending=False)
+    df['world'] = 'Earth'
+    exercise_df = df[~df['exercise'].isin(['n/a', '(not set)'])]
+    # plot a bar graph of the most popular exercises and their counts in the dataset
+    top_6_events = exercise_df['exercise'].value_counts().reset_index()[:6]
+    pop_events = px.bar(top_6_events,
+                        y=top_6_events.index,
+                        x='exercise',
+                        color='index',
+                        title='Most Popular Exercises',
+                        template="plotly_dark")
+    pop_events.update_layout(showlegend=False)
+    total = px.bar(country_df_to_plot,
+                   y='country', x='jumps',
+                   color='country',
+                   title='Total Jumps by Country',
+                   orientation='h',
+                   category_orders={'country': top_10_countries},
+                   height=800,
+                   template="plotly_dark")
+    total.update_layout(showlegend=False)
+    country_df_to_plot_weekly = df[df['day'] >= df['day'].max() - 7].groupby(['country', 'iso']).sum().reset_index()
+    country_df_to_plot_weekly = country_df_to_plot_weekly.sort_values(by=['jumps'], ascending=False)
+    top_5_weekly = country_df_to_plot_weekly.iloc[:10]['country'].tolist()
+    country_df_to_plot_weekly = country_df_to_plot_weekly[country_df_to_plot_weekly['country'].isin(top_5_weekly)].reset_index(drop=True)
+    country_df_to_plot_weekly = country_df_to_plot_weekly.sort_values(by=['jumps'], ascending=True)
+    total_weekly = px.bar(country_df_to_plot_weekly,
+                   y='country', x='jumps',
+                   color='country',
+                   title='Top Countries This Week',
+                   orientation='h',
+                   category_orders={'country': top_5_weekly},
+                   height=500,
+                   template="plotly_dark")
+    total_weekly.update_layout(showlegend=False)
+    city_df = df.groupby(['city', 'iso']).sum().reset_index()
+    city_df = city_df.sort_values(by=['jumps'], ascending=False)
+    city_df['city'] = city_df.apply(lambda row: row['city'] + ', ' + row['iso'], axis=1)
+    top_10_cities = city_df.iloc[:15]['city'].tolist()
+    icicle_df = df.groupby(['world', 'continent', 'country', 'state', 'city']).sum().reset_index()
+    #icicle_df['log10_jumps'] = icicle_df['jumps'].apply(lambda x: math.log10(x) if x > 0 else 0)
+    # icicle = px.icicle(icicle_df, path=['world', 'continent', 'country', 'city'],
+    #                    values='jumps',
+    #                    title='Jumps by Continent/Country',
+    #                    template="plotly_dark",
+    #                    color_continuous_scale='OrRd',
+    #                    maxdepth=7,
+    #                    branchvalues='remainder',
+    #                    color='jumps')
+    print(df.columns)
+    nipy_spec = mpl_to_plotly(plt.cm.nipy_spectral, pl_entries=15)
+    icicle = px.treemap(icicle_df, path=['world', 'continent', 'country', 'state', 'city'],
+                       values='jumps',
+                       title='Jumps by Continent/Country/City (click anywhere!)',
+                       template="plotly_dark",
+                       color_continuous_scale=nipy_spec,
+                       branchvalues='total',
+                       height=800,
+                       maxdepth=4,
+                       color='jumps')
+    city_df = df.groupby(['city', 'iso']).sum().reset_index()
+    city_df = city_df[city_df['city'] != '(not set)']
+    city_df['city'] = city_df.apply(lambda row: row['city'] + ', ' + row['iso'], axis=1)
+    city_df = city_df[city_df['city'].isin(top_10_cities)].reset_index(drop=True)
+    city_df = city_df.sort_values(by=['jumps'], ascending=True)
+    avg = px.bar(city_df,
+                 y='city', x='jumps', color='city',
+                 title='Total Jumps by City',
+                 orientation='h',
+                 category_orders={'city': top_10_cities},
+                 height=800,
+                 template="plotly_dark")
+    city_df_weekly = df[df['day'] >= df['day'].max() - 7].groupby(['city', 'iso']).sum().reset_index()
+    city_df_weekly = city_df_weekly[city_df_weekly['city'] != '(not set)']
+    city_df_weekly['city'] = city_df_weekly.apply(lambda row: row['city'] + ', ' + row['iso'], axis=1)
+    city_df_weekly = city_df_weekly.sort_values(by=['jumps'], ascending=False)
+    top_5_weekly = city_df_weekly.iloc[:10]['city'].tolist()
+    city_df_weekly = city_df_weekly[city_df_weekly['city'].isin(top_5_weekly)].reset_index(drop=True)
+    city_df_weekly = city_df_weekly.sort_values(by=['jumps'], ascending=True)
+    avg_weekly = px.bar(city_df_weekly,
+                 y='city', x='jumps', color='city',
+                 title='Top Cities This Week',
+                 orientation='h',
+                 category_orders={'city': top_5_weekly},
+                 height=500,
+                 template="plotly_dark")
+    avg.update_layout(showlegend=False)
+    avg.update(layout_coloraxis_showscale=False)
+    avg_weekly.update_layout(showlegend=False)
+    avg_weekly.update(layout_coloraxis_showscale=False)
+    country_df['rank'] = country_df['jumps'].rank(ascending=False)
+    total_map = px.choropleth(country_df, locations="iso",
+                                color="rank",
+                                hover_name="country", # column to add to hover information
+                                hover_data=["jumps"],
+                                color_continuous_scale ="OrRd_r",
+                                projection='natural earth',
+                                template="plotly_dark")
+    # remove the legend
+    total_map.update_layout(showlegend=False)
+    total_map.update(layout_coloraxis_showscale=False)
+    county_df = df.copy()
+    county_df['county'] = county_df['city'].map(city_county_map)
+    county_df['count_name'] = county_df['city'].map(city_county_name_map)
+    county_df = county_df.groupby(['county', 'count_name']).sum().reset_index()
+    county_df['rank'] = county_df['jumps'].rank(ascending=False)
+    county_df['county'] = county_df['county'].astype(int)
+    county_df['county'] = county_df['county'].astype(str).str.zfill(5)  # county codes are two digits for state, three for county
+    county_map = px.choropleth(county_df, geojson=counties, locations='county', color='rank',
+                           color_continuous_scale="OrRd_r",
+                           scope="usa",
+                           hover_name="count_name",
+                            hover_data=["jumps"],
+                           template="plotly_dark"
+                          )
+    county_map.update_layout(showlegend=False)
+    county_map.update(layout_coloraxis_showscale=False)
+    df = df.groupby(['day', 'continent']).sum().reset_index()
+    df = df.sort_values(by=['day'])
+    df['total_jumps'] = df.groupby('continent')['jumps'].cumsum()
+    # fill in any missing days with current max value
+    for day in range(1, int(df['day'].max()) + 1):
+        for continent in df['continent'].unique():
+            if not df[(df['day'] == day) & (df['continent'] == continent)].any().any():
+                max_jumps = df[(df['day'] < day) & (df['continent'] == continent)]['total_jumps'].max()
+                df = pd.concat([df, pd.DataFrame([[day, continent, max_jumps]], columns=['day', 'continent', 'total_jumps'])])
+                #df = df.append({'day': day, 'continent': continent, 'total_jumps': max_jumps}, ignore_index=True)
+    df = df.sort_values(by=['day']).reset_index(drop=True)
+    jumps_over_time = px.area(df, x='day',
+                              y='total_jumps',
+                              color='continent',
+                              template="plotly_dark")
+    df.fillna(0, inplace=True)
+    daily_df = df.groupby(['day'])[['jumps']].sum().reset_index()
+    per_day_plot = px.scatter(daily_df, x='day', y='jumps',
+                              trendline='rolling',
+                              trendline_options=dict(window=14),
+                              trendline_color_override="goldenrod",
+                              trendline_scope='overall',
+                              template="plotly_dark")
+    return f"# {total_jumps:,} total jumps in {unique_cities:,} cities across {unique_countries:,} countries", \
+           total, total_weekly, avg, avg_weekly, total_map, icicle, jumps_over_time, pop_events, county_map, per_day_plot
+with gr.Blocks() as demo:
+    with gr.Row():
+        total_jumps_label = gr.Markdown("Total Jumps: 0")
+    with gr.Row():
+        map_fig = gr.Plot(label="Map")
+    with gr.Row():
+        jumps_over_time = gr.Plot(label="Jumps Over Time")
+    with gr.Row():
+        total_plot = gr.Plot(label="Top Countries (All Time)")
+    with gr.Row():
+        total_plot_weekly = gr.Plot(label="Top Countries (This Week)")
+    with gr.Row():
+        avg_plot = gr.Plot(label="Top Cities (All Time)")
+    with gr.Row():
+        avg_plot_weekly = gr.Plot(label="Top Cities (This Week)")
+    with gr.Row():
+        icicle_fig = gr.Plot(label="Treemap")
+    with gr.Row():
+        per_day_plot = gr.Plot(label="Jumps per Day")
+    with gr.Row():
+        county_map = gr.Plot(label="US Map")
+    with gr.Row():
+        popular_events = gr.Plot(label="Popular Events")
+    outputs = [total_jumps_label, total_plot, total_plot_weekly, avg_plot, avg_plot_weekly, map_fig, icicle_fig, jumps_over_time, popular_events, county_map, per_day_plot]
+    dep = demo.load(full_report, None, outputs)
+if __name__ == "__main__":
     demo.launch(share=False)