19arjun89 commited on
Commit
0763cc5
·
verified ·
1 Parent(s): c46fbf1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -42
app.py CHANGED
@@ -1,9 +1,11 @@
 
1
  import os
2
  from collections import Counter
3
 
4
  import gradio as gr
5
  import pandas as pd
6
  import plotly.express as px
 
7
  import pycountry
8
  from datasets import load_dataset
9
 
@@ -13,7 +15,7 @@ VISITS_URL = os.getenv(
13
  "https://huggingface.co/datasets/19arjun89/ai_recruiting_agent_usage/resolve/main/usage/visits.jsonl",
14
  )
15
 
16
- # Add this as a Hugging Face Space SECRET named MAPBOX_TOKEN
17
  MAPBOX_TOKEN = os.getenv("MAPBOX_TOKEN", "").strip()
18
 
19
  # Safety cap for very large jsonl files
@@ -53,41 +55,32 @@ def load_rows_streaming():
53
  break
54
 
55
 
56
- def build_report(url_contains: str):
57
  """
58
  Aggregate usage events by country and render:
59
- - Mapbox choropleth (ISO3 internally, country name on hover)
60
  - Table with country name + usage events
61
  """
62
- url_contains = (url_contains or "").strip().lower()
63
 
64
- # Count by country name
65
  country_counts = Counter()
66
 
67
- # For map: count by iso3, also remember a "display name" per iso3
68
  iso3_counts = Counter()
69
  iso3_to_name = {}
70
 
71
  scanned = 0
72
- matched_url = 0
73
  mappable = 0
74
 
75
  for row in load_rows_streaming():
76
  scanned += 1
77
 
78
- space_url = str(row.get("space_url", "") or "")
79
- if url_contains and url_contains not in space_url.lower():
80
- continue
81
- matched_url += 1
82
-
83
  country = normalize_country_name(row.get("country"))
84
  if not country:
85
  continue
86
 
87
- # Table count uses raw country field (normalized)
88
  country_counts[country] += 1
89
 
90
- # Map count uses ISO3 (skip if we can't resolve)
91
  iso3 = country_name_to_iso3(country)
92
  if not iso3:
93
  continue
@@ -96,10 +89,10 @@ def build_report(url_contains: str):
96
  iso3_to_name.setdefault(iso3, country)
97
  mappable += 1
98
 
99
- # Table dataframe
100
  table_df = (
101
- pd.DataFrame([{"country": k, "usage_events": v} for k, v in country_counts.items()])
102
- .sort_values("usage_events", ascending=False)
103
  .reset_index(drop=True)
104
  )
105
 
@@ -107,53 +100,66 @@ def build_report(url_contains: str):
107
  map_df = (
108
  pd.DataFrame(
109
  [
110
- {"iso3": iso3, "country": iso3_to_name.get(iso3, iso3), "usage_events": usage_events}
111
- for iso3, usage_events in iso3_counts.items()
112
  ]
113
  )
114
- .sort_values("usage_events", ascending=False)
115
  .reset_index(drop=True)
116
  )
117
 
118
- # Build figure
119
  if map_df.empty:
120
  fig = px.scatter(title="No mappable data found")
121
  fig.update_layout(height=720, margin=dict(l=0, r=0, t=40, b=0))
122
  summary = (
123
- f"Rows scanned: {scanned:,} • Rows after URL filter: {matched_url:,} • "
124
- f"Countries (table): {len(table_df):,} • Total Usage Events: {int(table_df['usage_events'].sum()) if len(table_df) else 0:,}"
125
  )
126
  return fig, table_df.head(50), summary
127
-
128
- # Reliable, interactive choropleth with built-in country polygons
129
  fig = px.choropleth(
130
  map_df,
131
  locations="iso3",
132
- color="usage_events",
133
- hover_name="country", # English country name in tooltip
134
- hover_data={"usage_events": True, "iso3": False}, # show usage_events only
135
  projection="natural earth",
136
  title=None,
137
  )
138
-
139
- # Make it fill the plot area & look less "demo-ish"
140
  fig.update_layout(
141
  height=720,
142
  margin=dict(l=0, r=0, t=0, b=0),
143
  paper_bgcolor="white",
144
  )
145
-
146
  fig.update_geos(
147
  showframe=False,
148
  showcoastlines=False,
149
  showcountries=True,
150
  countrycolor="rgba(0,0,0,0.25)",
151
  bgcolor="rgba(0,0,0,0)",
152
- domain=dict(x=[0, 1], y=[0, 1]), # occupy full canvas
153
- fitbounds="locations", # zoom to your data so it fills more
154
  )
155
-
156
- # Add a simple dashboard-style title
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
  fig.add_annotation(
158
  text="Usage Events by Country",
159
  x=0.01,
@@ -166,11 +172,10 @@ def build_report(url_contains: str):
166
  font=dict(size=20),
167
  )
168
 
169
-
170
  summary = (
171
- f"Rows scanned: {scanned:,} • Rows after URL filter: {matched_url:,} • "
172
- f"Rows mappable: {mappable:,} • Countries (table): {len(table_df):,} • "
173
- f"Countries (map): {len(map_df):,} • Total Usage Events: {int(table_df['usage_events'].sum()) if len(table_df) else 0:,}"
174
  )
175
 
176
  return fig, table_df.head(50), summary
@@ -179,8 +184,7 @@ def build_report(url_contains: str):
179
  with gr.Blocks(title="AI Recruiting Agent — Usage Map") as demo:
180
  gr.Markdown(
181
  "# AI Recruiting Agent — Usage by Country\n"
182
- "This Space reads **only** `usage/visits.jsonl` and plots usage events by country.\n\n"
183
- "- Set **MAPBOX_TOKEN** as a Space *Secret* for the best-looking map.\n"
184
  )
185
 
186
  run = gr.Button("Generate map")
@@ -190,8 +194,9 @@ with gr.Blocks(title="AI Recruiting Agent — Usage Map") as demo:
190
 
191
  run.click(
192
  fn=build_report,
193
- inputs=[url_contains],
194
  outputs=[plot, table, summary],
195
  )
196
 
197
  demo.launch()
 
 
1
+
2
  import os
3
  from collections import Counter
4
 
5
  import gradio as gr
6
  import pandas as pd
7
  import plotly.express as px
8
+ import plotly.graph_objects as go
9
  import pycountry
10
  from datasets import load_dataset
11
 
 
15
  "https://huggingface.co/datasets/19arjun89/ai_recruiting_agent_usage/resolve/main/usage/visits.jsonl",
16
  )
17
 
18
+ # Optional: You can keep this env var, but this version uses Plotly Geo (no Mapbox needed)
19
  MAPBOX_TOKEN = os.getenv("MAPBOX_TOKEN", "").strip()
20
 
21
  # Safety cap for very large jsonl files
 
55
  break
56
 
57
 
58
+ def build_report():
59
  """
60
  Aggregate usage events by country and render:
61
+ - Choropleth map with labels (country + usage events)
62
  - Table with country name + usage events
63
  """
 
64
 
65
+ # Count by country name (table)
66
  country_counts = Counter()
67
 
68
+ # Count by ISO3 (map), also store a display name per ISO3
69
  iso3_counts = Counter()
70
  iso3_to_name = {}
71
 
72
  scanned = 0
 
73
  mappable = 0
74
 
75
  for row in load_rows_streaming():
76
  scanned += 1
77
 
 
 
 
 
 
78
  country = normalize_country_name(row.get("country"))
79
  if not country:
80
  continue
81
 
 
82
  country_counts[country] += 1
83
 
 
84
  iso3 = country_name_to_iso3(country)
85
  if not iso3:
86
  continue
 
89
  iso3_to_name.setdefault(iso3, country)
90
  mappable += 1
91
 
92
+ # Table dataframe (country name + usage events)
93
  table_df = (
94
+ pd.DataFrame([{"country": k, "usage events": v} for k, v in country_counts.items()])
95
+ .sort_values("usage events", ascending=False)
96
  .reset_index(drop=True)
97
  )
98
 
 
100
  map_df = (
101
  pd.DataFrame(
102
  [
103
+ {"iso3": iso3, "country": iso3_to_name.get(iso3, iso3), "usage events": count}
104
+ for iso3, count in iso3_counts.items()
105
  ]
106
  )
107
+ .sort_values("usage events", ascending=False)
108
  .reset_index(drop=True)
109
  )
110
 
 
111
  if map_df.empty:
112
  fig = px.scatter(title="No mappable data found")
113
  fig.update_layout(height=720, margin=dict(l=0, r=0, t=40, b=0))
114
  summary = (
115
+ f"Rows scanned: {scanned:,} • Countries (table): {len(table_df):,} • "
116
+ f"Total usage events: {int(table_df['usage events'].sum()) if len(table_df) else 0:,}"
117
  )
118
  return fig, table_df.head(50), summary
119
+
120
+ # Choropleth (built-in polygons; reliable)
121
  fig = px.choropleth(
122
  map_df,
123
  locations="iso3",
124
+ color="usage events",
 
 
125
  projection="natural earth",
126
  title=None,
127
  )
128
+
 
129
  fig.update_layout(
130
  height=720,
131
  margin=dict(l=0, r=0, t=0, b=0),
132
  paper_bgcolor="white",
133
  )
134
+
135
  fig.update_geos(
136
  showframe=False,
137
  showcoastlines=False,
138
  showcountries=True,
139
  countrycolor="rgba(0,0,0,0.25)",
140
  bgcolor="rgba(0,0,0,0)",
141
+ domain=dict(x=[0, 1], y=[0, 1]),
142
+ fitbounds="locations",
143
  )
144
+
145
+ # Labels overlay (always visible)
146
+ # Tip: keep labels to top N to avoid clutter if you grow beyond ~30 countries
147
+ labels_df = map_df.copy()
148
+ labels_df["label"] = labels_df["country"] + "<br>" + labels_df["usage events"].astype(str)
149
+
150
+ fig.add_trace(
151
+ go.Scattergeo(
152
+ locations=labels_df["iso3"],
153
+ locationmode="ISO-3",
154
+ text=labels_df["label"],
155
+ mode="text",
156
+ textfont=dict(size=11, color="black", family="Arial"),
157
+ hoverinfo="skip",
158
+ showlegend=False,
159
+ )
160
+ )
161
+
162
+ # Title
163
  fig.add_annotation(
164
  text="Usage Events by Country",
165
  x=0.01,
 
172
  font=dict(size=20),
173
  )
174
 
 
175
  summary = (
176
+ f"Rows scanned: {scanned:,} • Rows mappable: {mappable:,} • "
177
+ f"Countries (table): {len(table_df):,} • Countries (map): {len(map_df):,} • "
178
+ f"Total usage events: {int(table_df['usage events'].sum()) if len(table_df) else 0:,}"
179
  )
180
 
181
  return fig, table_df.head(50), summary
 
184
  with gr.Blocks(title="AI Recruiting Agent — Usage Map") as demo:
185
  gr.Markdown(
186
  "# AI Recruiting Agent — Usage by Country\n"
187
+ "This Space reads **only** `usage/visits.jsonl` and plots **usage events** by country."
 
188
  )
189
 
190
  run = gr.Button("Generate map")
 
194
 
195
  run.click(
196
  fn=build_report,
197
+ inputs=[],
198
  outputs=[plot, table, summary],
199
  )
200
 
201
  demo.launch()
202
+