Commit ·
e4b03ab
1
Parent(s): 8252fc5
feat - ip map
Browse files- .gitignore +1 -0
- src/leaderboard_analytics/ui.py +117 -26
.gitignore
CHANGED
|
@@ -49,6 +49,7 @@ coverage.xml
|
|
| 49 |
*.py.cover
|
| 50 |
.hypothesis/
|
| 51 |
.pytest_cache/
|
|
|
|
| 52 |
cover/
|
| 53 |
|
| 54 |
# Translations
|
|
|
|
| 49 |
*.py.cover
|
| 50 |
.hypothesis/
|
| 51 |
.pytest_cache/
|
| 52 |
+
.pytest_tmp/
|
| 53 |
cover/
|
| 54 |
|
| 55 |
# Translations
|
src/leaderboard_analytics/ui.py
CHANGED
|
@@ -8,6 +8,7 @@ from typing import Any
|
|
| 8 |
import gradio as gr
|
| 9 |
import pandas as pd
|
| 10 |
import plotly.express as px
|
|
|
|
| 11 |
|
| 12 |
from leaderboard_analytics.schemas import Granularity, QueryFilters
|
| 13 |
from leaderboard_analytics.services import AnalyticsService
|
|
@@ -40,7 +41,9 @@ def _empty_plot(title: str):
|
|
| 40 |
|
| 41 |
|
| 42 |
def _empty_map(title: str):
|
| 43 |
-
|
|
|
|
|
|
|
| 44 |
|
| 45 |
|
| 46 |
def _query_range_text(filters: QueryFilters) -> str:
|
|
@@ -74,7 +77,9 @@ def _visitor_location_top_table(visitor_locations: pd.DataFrame) -> pd.DataFrame
|
|
| 74 |
|
| 75 |
|
| 76 |
def _visitor_location_debug_text(
|
| 77 |
-
visitor_locations: pd.DataFrame,
|
|
|
|
|
|
|
| 78 |
) -> str:
|
| 79 |
if visitor_locations.empty:
|
| 80 |
total_pv = 0
|
|
@@ -96,6 +101,16 @@ def _visitor_location_debug_text(
|
|
| 96 |
loaded = "yes" if geoip_debug.get("reader_loaded") else "no"
|
| 97 |
attempted = "yes" if geoip_debug.get("load_attempted") else "no"
|
| 98 |
path = geoip_debug.get("database_path") or "(not configured)"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 99 |
|
| 100 |
return (
|
| 101 |
f"GeoIP DB: configured={configured}, exists={exists}, loaded={loaded}, "
|
|
@@ -103,10 +118,106 @@ def _visitor_location_debug_text(
|
|
| 103 |
f"GeoIP path: `{path}` \n"
|
| 104 |
f"Total location PV: {total_pv} | Users/IPs: {total_users} | "
|
| 105 |
f"Mapped regions: {mapped_regions} \n"
|
| 106 |
-
f"Unknown PV: {unknown_pv} | Unknown users/IPs: {unknown_users}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
)
|
| 108 |
|
| 109 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
def build_dashboard(service: AnalyticsService) -> gr.Blocks:
|
| 111 |
default_end = datetime.now(tz=UTC)
|
| 112 |
default_start = (default_end - timedelta(days=7)).replace(microsecond=0)
|
|
@@ -153,11 +264,12 @@ def build_dashboard(service: AnalyticsService) -> gr.Blocks:
|
|
| 153 |
filter_df = service.get_filter_distribution(filters)
|
| 154 |
funnel_df = service.get_funnel(filters)
|
| 155 |
visitors_df = service.get_new_vs_returning(filters)
|
| 156 |
-
visitor_locations_df = service.
|
| 157 |
visitor_locations_top_df = _visitor_location_top_table(visitor_locations_df)
|
| 158 |
visitor_locations_debug = _visitor_location_debug_text(
|
| 159 |
visitor_locations_df,
|
| 160 |
service.get_geoip_debug_info(),
|
|
|
|
| 161 |
)
|
| 162 |
|
| 163 |
range_text = _query_range_text(filters)
|
|
@@ -210,28 +322,7 @@ def build_dashboard(service: AnalyticsService) -> gr.Blocks:
|
|
| 210 |
if not visitors_df.empty
|
| 211 |
else px.bar(title=f"New vs returning visitors (no data for {range_text})")
|
| 212 |
)
|
| 213 |
-
|
| 214 |
-
visitor_locations_df[visitor_locations_df["country_code"] != "Unknown"]
|
| 215 |
-
if not visitor_locations_df.empty
|
| 216 |
-
else visitor_locations_df
|
| 217 |
-
)
|
| 218 |
-
visitor_locations_plot = (
|
| 219 |
-
px.choropleth(
|
| 220 |
-
map_df,
|
| 221 |
-
locations="country_code",
|
| 222 |
-
color="pv",
|
| 223 |
-
hover_name="country_name",
|
| 224 |
-
hover_data={"country_code": True, "pv": True, "ip_count": True},
|
| 225 |
-
color_continuous_scale="Blues",
|
| 226 |
-
title="Visitor locations by country",
|
| 227 |
-
)
|
| 228 |
-
if not map_df.empty
|
| 229 |
-
else _empty_map(f"Visitor locations by country (no data for {range_text})")
|
| 230 |
-
)
|
| 231 |
-
visitor_locations_plot.update_layout(
|
| 232 |
-
height=520,
|
| 233 |
-
margin={"l": 0, "r": 0, "t": 48, "b": 0},
|
| 234 |
-
)
|
| 235 |
csv_archive = _write_csv_archive(
|
| 236 |
{
|
| 237 |
"overview": overview_df,
|
|
|
|
| 8 |
import gradio as gr
|
| 9 |
import pandas as pd
|
| 10 |
import plotly.express as px
|
| 11 |
+
import plotly.graph_objects as go
|
| 12 |
|
| 13 |
from leaderboard_analytics.schemas import Granularity, QueryFilters
|
| 14 |
from leaderboard_analytics.services import AnalyticsService
|
|
|
|
| 41 |
|
| 42 |
|
| 43 |
def _empty_map(title: str):
|
| 44 |
+
figure = go.Figure()
|
| 45 |
+
_style_visitor_location_map(figure, title)
|
| 46 |
+
return figure
|
| 47 |
|
| 48 |
|
| 49 |
def _query_range_text(filters: QueryFilters) -> str:
|
|
|
|
| 77 |
|
| 78 |
|
| 79 |
def _visitor_location_debug_text(
|
| 80 |
+
visitor_locations: pd.DataFrame,
|
| 81 |
+
geoip_debug: dict[str, object],
|
| 82 |
+
ip_debug: dict[str, object] | None = None,
|
| 83 |
) -> str:
|
| 84 |
if visitor_locations.empty:
|
| 85 |
total_pv = 0
|
|
|
|
| 101 |
loaded = "yes" if geoip_debug.get("reader_loaded") else "no"
|
| 102 |
attempted = "yes" if geoip_debug.get("load_attempted") else "no"
|
| 103 |
path = geoip_debug.get("database_path") or "(not configured)"
|
| 104 |
+
ip_debug = ip_debug or {}
|
| 105 |
+
global_ips = int(ip_debug.get("global_ips", 0))
|
| 106 |
+
global_pv = int(ip_debug.get("global_ip_pv", 0))
|
| 107 |
+
private_ips = int(ip_debug.get("private_ips", 0))
|
| 108 |
+
private_pv = int(ip_debug.get("private_ip_pv", 0))
|
| 109 |
+
loopback_ips = int(ip_debug.get("loopback_ips", 0))
|
| 110 |
+
loopback_pv = int(ip_debug.get("loopback_ip_pv", 0))
|
| 111 |
+
invalid_ips = int(ip_debug.get("invalid_ips", 0))
|
| 112 |
+
invalid_pv = int(ip_debug.get("invalid_ip_pv", 0))
|
| 113 |
+
buckets = ip_debug.get("top_ip_pv_buckets", {})
|
| 114 |
|
| 115 |
return (
|
| 116 |
f"GeoIP DB: configured={configured}, exists={exists}, loaded={loaded}, "
|
|
|
|
| 118 |
f"GeoIP path: `{path}` \n"
|
| 119 |
f"Total location PV: {total_pv} | Users/IPs: {total_users} | "
|
| 120 |
f"Mapped regions: {mapped_regions} \n"
|
| 121 |
+
f"Unknown PV: {unknown_pv} | Unknown users/IPs: {unknown_users} \n"
|
| 122 |
+
f"Public IPs: {global_ips} ({global_pv} PV) | Private IPs: {private_ips} "
|
| 123 |
+
f"({private_pv} PV) \n"
|
| 124 |
+
f"Loopback IPs: {loopback_ips} ({loopback_pv} PV) | Invalid IPs: {invalid_ips} "
|
| 125 |
+
f"({invalid_pv} PV) \n"
|
| 126 |
+
f"PV/IP buckets: {buckets}"
|
| 127 |
)
|
| 128 |
|
| 129 |
|
| 130 |
+
def _style_visitor_location_map(figure: go.Figure, title: str) -> None:
|
| 131 |
+
figure.update_geos(
|
| 132 |
+
projection_type="mercator",
|
| 133 |
+
showframe=False,
|
| 134 |
+
showcoastlines=True,
|
| 135 |
+
coastlinecolor="#cfd6df",
|
| 136 |
+
coastlinewidth=0.6,
|
| 137 |
+
showcountries=True,
|
| 138 |
+
countrycolor="#cfd6df",
|
| 139 |
+
countrywidth=0.7,
|
| 140 |
+
showland=True,
|
| 141 |
+
landcolor="#eef2f7",
|
| 142 |
+
showocean=True,
|
| 143 |
+
oceancolor="#f8fafc",
|
| 144 |
+
showlakes=True,
|
| 145 |
+
lakecolor="#f8fafc",
|
| 146 |
+
bgcolor="#ffffff",
|
| 147 |
+
lataxis_range=[-55, 75],
|
| 148 |
+
lonaxis_range=[-180, 180],
|
| 149 |
+
)
|
| 150 |
+
figure.update_layout(
|
| 151 |
+
title={"text": title, "x": 0.02, "xanchor": "left"},
|
| 152 |
+
height=560,
|
| 153 |
+
paper_bgcolor="#ffffff",
|
| 154 |
+
plot_bgcolor="#ffffff",
|
| 155 |
+
font={"color": "#1f2937"},
|
| 156 |
+
margin={"l": 0, "r": 0, "t": 52, "b": 0},
|
| 157 |
+
showlegend=False,
|
| 158 |
+
hoverlabel={
|
| 159 |
+
"bgcolor": "#ffffff",
|
| 160 |
+
"bordercolor": "#3b82f6",
|
| 161 |
+
"font_color": "#111827",
|
| 162 |
+
},
|
| 163 |
+
)
|
| 164 |
+
|
| 165 |
+
|
| 166 |
+
def _visitor_location_map(visitor_locations: pd.DataFrame, range_text: str) -> go.Figure:
|
| 167 |
+
map_df = (
|
| 168 |
+
visitor_locations[visitor_locations["country_code"] != "Unknown"].copy()
|
| 169 |
+
if not visitor_locations.empty
|
| 170 |
+
else visitor_locations.copy()
|
| 171 |
+
)
|
| 172 |
+
if map_df.empty:
|
| 173 |
+
return _empty_map(f"Visitor locations by country (no mapped data for {range_text})")
|
| 174 |
+
|
| 175 |
+
max_pv = max(int(map_df["pv"].max()), 1)
|
| 176 |
+
size_ref = 2.0 * max_pv / (52**2)
|
| 177 |
+
figure = go.Figure(
|
| 178 |
+
go.Scattergeo(
|
| 179 |
+
locationmode="country names",
|
| 180 |
+
locations=map_df["country_name"],
|
| 181 |
+
mode="markers",
|
| 182 |
+
text=map_df["country_name"],
|
| 183 |
+
customdata=map_df[["country_code", "pv", "ip_count"]],
|
| 184 |
+
hovertemplate=(
|
| 185 |
+
"<b>%{text}</b><br>"
|
| 186 |
+
"Country code: %{customdata[0]}<br>"
|
| 187 |
+
"PV: %{customdata[1]:,}<br>"
|
| 188 |
+
"Users/IPs: %{customdata[2]:,}<extra></extra>"
|
| 189 |
+
),
|
| 190 |
+
marker={
|
| 191 |
+
"size": map_df["pv"],
|
| 192 |
+
"sizemode": "area",
|
| 193 |
+
"sizeref": size_ref,
|
| 194 |
+
"sizemin": 8,
|
| 195 |
+
"color": "rgba(59, 130, 246, 0.55)",
|
| 196 |
+
"line": {"color": "rgba(37, 99, 235, 0.92)", "width": 1.2},
|
| 197 |
+
},
|
| 198 |
+
)
|
| 199 |
+
)
|
| 200 |
+
_style_visitor_location_map(figure, "Visitor locations by country")
|
| 201 |
+
figure.add_annotation(
|
| 202 |
+
x=0.02,
|
| 203 |
+
y=0.08,
|
| 204 |
+
xref="paper",
|
| 205 |
+
yref="paper",
|
| 206 |
+
text=(
|
| 207 |
+
f"Mapped regions: {len(map_df)}<br>"
|
| 208 |
+
f"Mapped PV: {int(map_df['pv'].sum()):,}<br>"
|
| 209 |
+
f"Users/IPs: {int(map_df['ip_count'].sum()):,}"
|
| 210 |
+
),
|
| 211 |
+
showarrow=False,
|
| 212 |
+
align="left",
|
| 213 |
+
bgcolor="rgba(255, 255, 255, 0.88)",
|
| 214 |
+
bordercolor="rgba(148, 163, 184, 0.55)",
|
| 215 |
+
borderwidth=1,
|
| 216 |
+
font={"color": "#1f2937", "size": 12},
|
| 217 |
+
)
|
| 218 |
+
return figure
|
| 219 |
+
|
| 220 |
+
|
| 221 |
def build_dashboard(service: AnalyticsService) -> gr.Blocks:
|
| 222 |
default_end = datetime.now(tz=UTC)
|
| 223 |
default_start = (default_end - timedelta(days=7)).replace(microsecond=0)
|
|
|
|
| 264 |
filter_df = service.get_filter_distribution(filters)
|
| 265 |
funnel_df = service.get_funnel(filters)
|
| 266 |
visitors_df = service.get_new_vs_returning(filters)
|
| 267 |
+
visitor_locations_df, ip_debug = service.get_visitor_location_details(filters)
|
| 268 |
visitor_locations_top_df = _visitor_location_top_table(visitor_locations_df)
|
| 269 |
visitor_locations_debug = _visitor_location_debug_text(
|
| 270 |
visitor_locations_df,
|
| 271 |
service.get_geoip_debug_info(),
|
| 272 |
+
ip_debug,
|
| 273 |
)
|
| 274 |
|
| 275 |
range_text = _query_range_text(filters)
|
|
|
|
| 322 |
if not visitors_df.empty
|
| 323 |
else px.bar(title=f"New vs returning visitors (no data for {range_text})")
|
| 324 |
)
|
| 325 |
+
visitor_locations_plot = _visitor_location_map(visitor_locations_df, range_text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 326 |
csv_archive = _write_csv_archive(
|
| 327 |
{
|
| 328 |
"overview": overview_df,
|