Spaces:

dylanplummer
/

NextJump-analytics

Running

File size: 10,404 Bytes

f8fbbae
 
 
 
 
 
 
 
 
 
 
 
64e560c
f8fbbae
a638a7e
f8fbbae
 
 
 
a638a7e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f8fbbae
64e560c
 
 
 
4520d60
64e560c
4520d60
 
 
 
 
 
 
64e560c
 
 
 
 
5aeaeae
d30b80e
 
64e560c
f8fbbae
5aeaeae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4520d60
5aeaeae
 
 
 
 
 
4520d60
5aeaeae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4520d60
5aeaeae
 
 
 
 
 
f8fbbae
4896170
661eb89
4520d60
661eb89
 
a638a7e
5754989
4520d60
 
661eb89
4520d60
 
a638a7e
f8fbbae
25e57be
 
 
 
 
 
 
 
 
 
 
a638a7e
25e57be
 
 
 
 
 
 
 
 
 
 
661eb89
25e57be
 
 
 
 
 
a638a7e
25e57be
 
b3d7da6
a638a7e
6f7a661
a638a7e
eec4d7a
a638a7e
fd1a344
4520d60
882717c
a638a7e
882717c
 
 
a638a7e
64e560c
 
 
 
6f7a661
4520d60
 
eec4d7a
4520d60
64e560c
 
fd1a344
64e560c
 
4896170
 
64e560c
90f7110
882717c
90f7110
5754989
 
 
 
 
e585bcf
 
5754989
 
882717c
 
 
0ea7c56
 
 
 
5754989
0ea7c56
 
 
882717c
661eb89
f8fbbae
 
 
 
 
a638a7e
 
882717c
 
a638a7e
b3d7da6
882717c
b3d7da6
661eb89
 
 
 
f8fbbae
0ea7c56
a638a7e
f8fbbae
 
a638a7e

from google.analytics.data_v1beta import BetaAnalyticsDataClient
from google.analytics.data_v1beta.types import (
    DateRange,
    Dimension,
    Metric,
    RunReportRequest,
    RunRealtimeReportRequest
)

import gradio as gr
import os
import json
import time
import pandas as pd
import plotly.express as px

FINISHED_EXERCISE = 'finished_exercise'
PROPERTY_ID = "384068977"

try:
    credentials_json = os.environ['GOOGLE_APPLICATION_CREDENTIALS_JSON']
    credentials_dict = json.loads(credentials_json)
    # write json to file
    with open('credentials.json', 'w') as f:
        json.dump(credentials_dict, f)
    # set env var to filename
    os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = os.path.join(os.path.dirname(__file__), 'credentials.json')
except KeyError:  # running locally
    pass

iso = pd.read_csv('iso.tsv', sep='\t')
iso['Alpha-2 code'] = iso['Alpha-2 code'].str.strip()
iso['Alpha-3 code'] = iso['Alpha-3 code'].str.strip()
iso.set_index('Alpha-2 code', inplace=True)
alpha_2_map = iso['Alpha-3 code'].to_dict()

# read counties json file
with open('counties.json') as f:
    counties = json.load(f)

ga_cities = pd.read_csv('cities.csv')
cities = pd.read_csv('uscities.csv')
cities['full_city'] = cities['city'] + ', ' + cities['state_name']
cities.set_index('full_city', inplace=True)
ga_cities['Criteria ID'] = ga_cities['Criteria ID'].astype(str)
ga_cities.set_index('Criteria ID', inplace=True)
ga_city_map = ga_cities['Name'].to_dict()
ga_cities['state'] = ga_cities['Canonical Name'].str.split(',').str[1].str.strip()
ga_state_map = ga_cities['state'].to_dict()
city_county_map = cities['county_fips'].to_dict()
city_county_name_map = cities['county_name'].to_dict()

cached_report = None
cache_time = 0
reload_cache = False
# 6 hours
reload_every = 6 * 60 * 60

def full_report():
    global cached_report, cache_time, reload_cache
    if time.time() - cache_time > reload_every:
        reload_cache = False
    if not reload_cache:
        print("Loading report...")
        reload_cache = True
        cache_time = time.time()
        client = BetaAnalyticsDataClient()

        request = RunReportRequest(
            property=f"properties/{PROPERTY_ID}",
            dimensions=[Dimension(name="nthDay"),
                        Dimension(name='eventName'),
                        Dimension(name="continent"),
                        Dimension(name="country"),
                        Dimension(name="countryId"),
                        Dimension(name="cityId")],
            metrics=[Metric(name="eventValue")],
            #return_property_quota=True,
            date_ranges=[DateRange(start_date="2023-06-30", end_date="today")],
        )
        response = client.run_report(request)

        res = {'day': [], 'jumps': [], 'continent': [], 'country': [], 'iso': [], 'cityId': []}

        for row in response.rows:
            event_name = row.dimension_values[1].value
            if event_name == FINISHED_EXERCISE:
                day = int(row.dimension_values[0].value)
                continent = row.dimension_values[2].value
                country = row.dimension_values[3].value
                country_iso = row.dimension_values[4].value
                city = row.dimension_values[5].value
                event_value = float(row.metric_values[0].value)
                res['day'].append(day)
                res['jumps'].append(event_value)
                res['continent'].append(continent)
                res['country'].append(country)
                res['iso'].append(country_iso)
                res['cityId'].append(city)

        df = pd.DataFrame.from_dict(res)
        cached_report = df.copy(deep=True)
    else:
        print("Using cached report...")
        df = cached_report.copy(deep=True)

    total_jumps = int(df['jumps'].sum())
    unique_countries = df['country'].nunique()
    unique_cities = df['cityId'].nunique()

    print(f"Total jumps: {total_jumps}, unique countries: {unique_countries}, unique cities: {unique_cities}")
    df['iso'] = df['iso'].map(alpha_2_map)
    df['jumps'] = df['jumps'].astype(int)
    df['city'] = df['cityId'].map(ga_city_map)
    df['state'] = df['cityId'].map(ga_state_map)
    df['city'] = df.apply(lambda row: row['city'] if row['country'] != 'Bermuda' else 'Hamilton', axis=1)
    df['city'] = df['city'] + ', ' + df['state']

    country_df = df.groupby(['country', 'iso']).sum().reset_index()
    country_df = country_df.sort_values(by=['jumps'], ascending=False)
    top_10_countries = country_df.iloc[:10]['country'].tolist()

    country_df_to_plot = df.groupby(['country', 'iso', 'city']).sum().reset_index()
    country_df_to_plot = country_df_to_plot[country_df_to_plot['country'].isin(top_10_countries)].reset_index(drop=True)
    country_df_to_plot = country_df_to_plot.sort_values(by=['day', 'jumps'], ascending=True)
    total = px.bar(country_df_to_plot, 
                   y='country', x='jumps', 
                   color='city',
                   title='Total Jumps by Country/City', 
                   orientation='h',
                   category_orders={'country': top_10_countries},
                   template="plotly_dark")
    total.update_layout(showlegend=False)

    city_df = df.groupby(['city', 'iso']).sum().reset_index()
    city_df = city_df.sort_values(by=['jumps'], ascending=False)
    city_df['city'] = city_df.apply(lambda row: row['city'] + ', ' + row['iso'], axis=1)
    top_10_cities = city_df.iloc[:10]['city'].tolist()

    city_df = df.groupby(['city', 'iso', 'day']).sum().reset_index()
    city_df = city_df[city_df['city'] != '(not set)']
    city_df['city'] = city_df.apply(lambda row: row['city'] + ', ' + row['iso'], axis=1)
    city_df = city_df[city_df['city'].isin(top_10_cities)].reset_index(drop=True)
    city_df = city_df.sort_values(by=['day', 'jumps'], ascending=True)

    avg = px.bar(city_df, 
                 y='city', x='jumps', color='day',
                 title='Total Jumps by City/Day', 
                 orientation='h',
                 category_orders={'city': top_10_cities},
                 template="plotly_dark")
    
    avg.update_layout(showlegend=False)
    avg.update(layout_coloraxis_showscale=False)

    country_df['rank'] = country_df['jumps'].rank(ascending=False)
    total_map = px.choropleth(country_df, locations="iso",
                                color="rank", 
                                hover_name="country", # column to add to hover information
                                hover_data=["jumps"],
                                color_continuous_scale ="OrRd_r",
                                projection='natural earth',
                                template="plotly_dark")
    # remove the legend
    total_map.update_layout(showlegend=False)
    total_map.update(layout_coloraxis_showscale=False)

    county_df = df.copy()
    county_df['county'] = county_df['city'].map(city_county_map)
    county_df['count_name'] = county_df['city'].map(city_county_name_map)
    county_df = county_df.groupby(['county', 'count_name']).sum().reset_index()
    county_df['rank'] = county_df['jumps'].rank(ascending=False)
    county_df['county'] = county_df['county'].astype(int)
    county_df['county'] = county_df['county'].astype(str).str.zfill(5)  # county codes are two digits for state, three for county
    county_map = px.choropleth(county_df, geojson=counties, locations='county', color='rank',
                           color_continuous_scale="OrRd_r",
                           scope="usa",
                           hover_name="count_name",
                            hover_data=["jumps"],
                           template="plotly_dark"
                          )
    county_map.update_layout(showlegend=False)
    county_map.update(layout_coloraxis_showscale=False)

    df = df.groupby(['day', 'continent']).sum().reset_index()
    df = df.sort_values(by=['day'])
    df['total_jumps'] = df.groupby('continent')['jumps'].cumsum()
    # fill in any missing days with current max value
    for day in range(1, df['day'].max() + 1):
        for continent in df['continent'].unique():
            if not df[(df['day'] == day) & (df['continent'] == continent)].any().any():
                max_jumps = df[(df['day'] < day) & (df['continent'] == continent)]['total_jumps'].max()
                df = pd.concat([df, pd.DataFrame([[day, continent, max_jumps]], columns=['day', 'continent', 'total_jumps'])])
                #df = df.append({'day': day, 'continent': continent, 'total_jumps': max_jumps}, ignore_index=True)
    df = df.sort_values(by=['day']).reset_index(drop=True)
    jumps_over_time = px.area(df, x='day', 
                              y='total_jumps', 
                              color='continent', 
                              template="plotly_dark")
    
    daily_df = df.groupby(['day']).sum().reset_index()
    per_day_plot = px.scatter(daily_df, x='day', y='jumps', 
                              trendline='rolling',
                              trendline_options=dict(window=14),
                              trendline_color_override="goldenrod",
                              trendline_scope='overall',
                              template="plotly_dark")

    return f"# {total_jumps:,} total jumps in {unique_cities:,} cities across {unique_countries:,} countries", total, avg, total_map, jumps_over_time, county_map, per_day_plot


with gr.Blocks(theme='WeixuanYuan/Soft_dark') as demo:
    with gr.Row():
        total_jumps_label = gr.Markdown("Total Jumps: 0")
    with gr.Row():
        map_fig = gr.Plot(label="Map")
    with gr.Row():
        jumps_over_time = gr.Plot(label="Jumps Over Time")
    with gr.Row():
        total_plot = gr.Plot(label="Top 10 Countries")
    with gr.Row():
        avg_plot = gr.Plot(label="Top 10 Cities")
    with gr.Row():
        per_day_plot = gr.Plot(label="Jumps per Day")
    with gr.Row():
        county_map = gr.Plot(label="US Map")

    outputs = [total_jumps_label, total_plot, avg_plot, map_fig, jumps_over_time, county_map, per_day_plot]
    dep = demo.load(full_report, None, outputs)

if __name__ == "__main__":
    demo.launch(share=False)