q275343119 commited on
Commit
e4b03ab
·
1 Parent(s): 8252fc5

feat - ip map

Browse files
Files changed (2) hide show
  1. .gitignore +1 -0
  2. src/leaderboard_analytics/ui.py +117 -26
.gitignore CHANGED
@@ -49,6 +49,7 @@ coverage.xml
49
  *.py.cover
50
  .hypothesis/
51
  .pytest_cache/
 
52
  cover/
53
 
54
  # Translations
 
49
  *.py.cover
50
  .hypothesis/
51
  .pytest_cache/
52
+ .pytest_tmp/
53
  cover/
54
 
55
  # Translations
src/leaderboard_analytics/ui.py CHANGED
@@ -8,6 +8,7 @@ from typing import Any
8
  import gradio as gr
9
  import pandas as pd
10
  import plotly.express as px
 
11
 
12
  from leaderboard_analytics.schemas import Granularity, QueryFilters
13
  from leaderboard_analytics.services import AnalyticsService
@@ -40,7 +41,9 @@ def _empty_plot(title: str):
40
 
41
 
42
  def _empty_map(title: str):
43
- return px.choropleth(title=title)
 
 
44
 
45
 
46
  def _query_range_text(filters: QueryFilters) -> str:
@@ -74,7 +77,9 @@ def _visitor_location_top_table(visitor_locations: pd.DataFrame) -> pd.DataFrame
74
 
75
 
76
  def _visitor_location_debug_text(
77
- visitor_locations: pd.DataFrame, geoip_debug: dict[str, object]
 
 
78
  ) -> str:
79
  if visitor_locations.empty:
80
  total_pv = 0
@@ -96,6 +101,16 @@ def _visitor_location_debug_text(
96
  loaded = "yes" if geoip_debug.get("reader_loaded") else "no"
97
  attempted = "yes" if geoip_debug.get("load_attempted") else "no"
98
  path = geoip_debug.get("database_path") or "(not configured)"
 
 
 
 
 
 
 
 
 
 
99
 
100
  return (
101
  f"GeoIP DB: configured={configured}, exists={exists}, loaded={loaded}, "
@@ -103,10 +118,106 @@ def _visitor_location_debug_text(
103
  f"GeoIP path: `{path}` \n"
104
  f"Total location PV: {total_pv} | Users/IPs: {total_users} | "
105
  f"Mapped regions: {mapped_regions} \n"
106
- f"Unknown PV: {unknown_pv} | Unknown users/IPs: {unknown_users}"
 
 
 
 
 
107
  )
108
 
109
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  def build_dashboard(service: AnalyticsService) -> gr.Blocks:
111
  default_end = datetime.now(tz=UTC)
112
  default_start = (default_end - timedelta(days=7)).replace(microsecond=0)
@@ -153,11 +264,12 @@ def build_dashboard(service: AnalyticsService) -> gr.Blocks:
153
  filter_df = service.get_filter_distribution(filters)
154
  funnel_df = service.get_funnel(filters)
155
  visitors_df = service.get_new_vs_returning(filters)
156
- visitor_locations_df = service.get_visitor_locations(filters)
157
  visitor_locations_top_df = _visitor_location_top_table(visitor_locations_df)
158
  visitor_locations_debug = _visitor_location_debug_text(
159
  visitor_locations_df,
160
  service.get_geoip_debug_info(),
 
161
  )
162
 
163
  range_text = _query_range_text(filters)
@@ -210,28 +322,7 @@ def build_dashboard(service: AnalyticsService) -> gr.Blocks:
210
  if not visitors_df.empty
211
  else px.bar(title=f"New vs returning visitors (no data for {range_text})")
212
  )
213
- map_df = (
214
- visitor_locations_df[visitor_locations_df["country_code"] != "Unknown"]
215
- if not visitor_locations_df.empty
216
- else visitor_locations_df
217
- )
218
- visitor_locations_plot = (
219
- px.choropleth(
220
- map_df,
221
- locations="country_code",
222
- color="pv",
223
- hover_name="country_name",
224
- hover_data={"country_code": True, "pv": True, "ip_count": True},
225
- color_continuous_scale="Blues",
226
- title="Visitor locations by country",
227
- )
228
- if not map_df.empty
229
- else _empty_map(f"Visitor locations by country (no data for {range_text})")
230
- )
231
- visitor_locations_plot.update_layout(
232
- height=520,
233
- margin={"l": 0, "r": 0, "t": 48, "b": 0},
234
- )
235
  csv_archive = _write_csv_archive(
236
  {
237
  "overview": overview_df,
 
8
  import gradio as gr
9
  import pandas as pd
10
  import plotly.express as px
11
+ import plotly.graph_objects as go
12
 
13
  from leaderboard_analytics.schemas import Granularity, QueryFilters
14
  from leaderboard_analytics.services import AnalyticsService
 
41
 
42
 
43
  def _empty_map(title: str):
44
+ figure = go.Figure()
45
+ _style_visitor_location_map(figure, title)
46
+ return figure
47
 
48
 
49
  def _query_range_text(filters: QueryFilters) -> str:
 
77
 
78
 
79
  def _visitor_location_debug_text(
80
+ visitor_locations: pd.DataFrame,
81
+ geoip_debug: dict[str, object],
82
+ ip_debug: dict[str, object] | None = None,
83
  ) -> str:
84
  if visitor_locations.empty:
85
  total_pv = 0
 
101
  loaded = "yes" if geoip_debug.get("reader_loaded") else "no"
102
  attempted = "yes" if geoip_debug.get("load_attempted") else "no"
103
  path = geoip_debug.get("database_path") or "(not configured)"
104
+ ip_debug = ip_debug or {}
105
+ global_ips = int(ip_debug.get("global_ips", 0))
106
+ global_pv = int(ip_debug.get("global_ip_pv", 0))
107
+ private_ips = int(ip_debug.get("private_ips", 0))
108
+ private_pv = int(ip_debug.get("private_ip_pv", 0))
109
+ loopback_ips = int(ip_debug.get("loopback_ips", 0))
110
+ loopback_pv = int(ip_debug.get("loopback_ip_pv", 0))
111
+ invalid_ips = int(ip_debug.get("invalid_ips", 0))
112
+ invalid_pv = int(ip_debug.get("invalid_ip_pv", 0))
113
+ buckets = ip_debug.get("top_ip_pv_buckets", {})
114
 
115
  return (
116
  f"GeoIP DB: configured={configured}, exists={exists}, loaded={loaded}, "
 
118
  f"GeoIP path: `{path}` \n"
119
  f"Total location PV: {total_pv} | Users/IPs: {total_users} | "
120
  f"Mapped regions: {mapped_regions} \n"
121
+ f"Unknown PV: {unknown_pv} | Unknown users/IPs: {unknown_users} \n"
122
+ f"Public IPs: {global_ips} ({global_pv} PV) | Private IPs: {private_ips} "
123
+ f"({private_pv} PV) \n"
124
+ f"Loopback IPs: {loopback_ips} ({loopback_pv} PV) | Invalid IPs: {invalid_ips} "
125
+ f"({invalid_pv} PV) \n"
126
+ f"PV/IP buckets: {buckets}"
127
  )
128
 
129
 
130
+ def _style_visitor_location_map(figure: go.Figure, title: str) -> None:
131
+ figure.update_geos(
132
+ projection_type="mercator",
133
+ showframe=False,
134
+ showcoastlines=True,
135
+ coastlinecolor="#cfd6df",
136
+ coastlinewidth=0.6,
137
+ showcountries=True,
138
+ countrycolor="#cfd6df",
139
+ countrywidth=0.7,
140
+ showland=True,
141
+ landcolor="#eef2f7",
142
+ showocean=True,
143
+ oceancolor="#f8fafc",
144
+ showlakes=True,
145
+ lakecolor="#f8fafc",
146
+ bgcolor="#ffffff",
147
+ lataxis_range=[-55, 75],
148
+ lonaxis_range=[-180, 180],
149
+ )
150
+ figure.update_layout(
151
+ title={"text": title, "x": 0.02, "xanchor": "left"},
152
+ height=560,
153
+ paper_bgcolor="#ffffff",
154
+ plot_bgcolor="#ffffff",
155
+ font={"color": "#1f2937"},
156
+ margin={"l": 0, "r": 0, "t": 52, "b": 0},
157
+ showlegend=False,
158
+ hoverlabel={
159
+ "bgcolor": "#ffffff",
160
+ "bordercolor": "#3b82f6",
161
+ "font_color": "#111827",
162
+ },
163
+ )
164
+
165
+
166
+ def _visitor_location_map(visitor_locations: pd.DataFrame, range_text: str) -> go.Figure:
167
+ map_df = (
168
+ visitor_locations[visitor_locations["country_code"] != "Unknown"].copy()
169
+ if not visitor_locations.empty
170
+ else visitor_locations.copy()
171
+ )
172
+ if map_df.empty:
173
+ return _empty_map(f"Visitor locations by country (no mapped data for {range_text})")
174
+
175
+ max_pv = max(int(map_df["pv"].max()), 1)
176
+ size_ref = 2.0 * max_pv / (52**2)
177
+ figure = go.Figure(
178
+ go.Scattergeo(
179
+ locationmode="country names",
180
+ locations=map_df["country_name"],
181
+ mode="markers",
182
+ text=map_df["country_name"],
183
+ customdata=map_df[["country_code", "pv", "ip_count"]],
184
+ hovertemplate=(
185
+ "<b>%{text}</b><br>"
186
+ "Country code: %{customdata[0]}<br>"
187
+ "PV: %{customdata[1]:,}<br>"
188
+ "Users/IPs: %{customdata[2]:,}<extra></extra>"
189
+ ),
190
+ marker={
191
+ "size": map_df["pv"],
192
+ "sizemode": "area",
193
+ "sizeref": size_ref,
194
+ "sizemin": 8,
195
+ "color": "rgba(59, 130, 246, 0.55)",
196
+ "line": {"color": "rgba(37, 99, 235, 0.92)", "width": 1.2},
197
+ },
198
+ )
199
+ )
200
+ _style_visitor_location_map(figure, "Visitor locations by country")
201
+ figure.add_annotation(
202
+ x=0.02,
203
+ y=0.08,
204
+ xref="paper",
205
+ yref="paper",
206
+ text=(
207
+ f"Mapped regions: {len(map_df)}<br>"
208
+ f"Mapped PV: {int(map_df['pv'].sum()):,}<br>"
209
+ f"Users/IPs: {int(map_df['ip_count'].sum()):,}"
210
+ ),
211
+ showarrow=False,
212
+ align="left",
213
+ bgcolor="rgba(255, 255, 255, 0.88)",
214
+ bordercolor="rgba(148, 163, 184, 0.55)",
215
+ borderwidth=1,
216
+ font={"color": "#1f2937", "size": 12},
217
+ )
218
+ return figure
219
+
220
+
221
  def build_dashboard(service: AnalyticsService) -> gr.Blocks:
222
  default_end = datetime.now(tz=UTC)
223
  default_start = (default_end - timedelta(days=7)).replace(microsecond=0)
 
264
  filter_df = service.get_filter_distribution(filters)
265
  funnel_df = service.get_funnel(filters)
266
  visitors_df = service.get_new_vs_returning(filters)
267
+ visitor_locations_df, ip_debug = service.get_visitor_location_details(filters)
268
  visitor_locations_top_df = _visitor_location_top_table(visitor_locations_df)
269
  visitor_locations_debug = _visitor_location_debug_text(
270
  visitor_locations_df,
271
  service.get_geoip_debug_info(),
272
+ ip_debug,
273
  )
274
 
275
  range_text = _query_range_text(filters)
 
322
  if not visitors_df.empty
323
  else px.bar(title=f"New vs returning visitors (no data for {range_text})")
324
  )
325
+ visitor_locations_plot = _visitor_location_map(visitor_locations_df, range_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
326
  csv_archive = _write_csv_archive(
327
  {
328
  "overview": overview_df,