Spaces:

19arjun89
/

AI_Recruiting_Agent_Usage

Sleeping

App Files Files Community

19arjun89 commited on Feb 5

Commit

0763cc5

verified ·

1 Parent(s): c46fbf1

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -42

app.py CHANGED Viewed

@@ -1,9 +1,11 @@
 import os
 from collections import Counter
 import gradio as gr
 import pandas as pd
 import plotly.express as px
 import pycountry
 from datasets import load_dataset
@@ -13,7 +15,7 @@ VISITS_URL = os.getenv(
     "https://huggingface.co/datasets/19arjun89/ai_recruiting_agent_usage/resolve/main/usage/visits.jsonl",
 )
-# Add this as a Hugging Face Space SECRET named MAPBOX_TOKEN
 MAPBOX_TOKEN = os.getenv("MAPBOX_TOKEN", "").strip()
 # Safety cap for very large jsonl files
@@ -53,41 +55,32 @@ def load_rows_streaming():
             break
-def build_report(url_contains: str):
     """
     Aggregate usage events by country and render:
-    - Mapbox choropleth (ISO3 internally, country name on hover)
     - Table with country name + usage events
     """
-    url_contains = (url_contains or "").strip().lower()
-    # Count by country name
     country_counts = Counter()
-    # For map: count by iso3, also remember a "display name" per iso3
     iso3_counts = Counter()
     iso3_to_name = {}
     scanned = 0
-    matched_url = 0
     mappable = 0
     for row in load_rows_streaming():
         scanned += 1
-        space_url = str(row.get("space_url", "") or "")
-        if url_contains and url_contains not in space_url.lower():
-            continue
-        matched_url += 1
         country = normalize_country_name(row.get("country"))
         if not country:
             continue
-        # Table count uses raw country field (normalized)
         country_counts[country] += 1
-        # Map count uses ISO3 (skip if we can't resolve)
         iso3 = country_name_to_iso3(country)
         if not iso3:
             continue
@@ -96,10 +89,10 @@ def build_report(url_contains: str):
         iso3_to_name.setdefault(iso3, country)
         mappable += 1
-    # Table dataframe
     table_df = (
-        pd.DataFrame([{"country": k, "usage_events": v} for k, v in country_counts.items()])
-        .sort_values("usage_events", ascending=False)
         .reset_index(drop=True)
     )
@@ -107,53 +100,66 @@ def build_report(url_contains: str):
     map_df = (
         pd.DataFrame(
             [
-                {"iso3": iso3, "country": iso3_to_name.get(iso3, iso3), "usage_events": usage_events}
-                for iso3, usage_events in iso3_counts.items()
             ]
         )
-        .sort_values("usage_events", ascending=False)
         .reset_index(drop=True)
     )
-    # Build figure
     if map_df.empty:
         fig = px.scatter(title="No mappable data found")
         fig.update_layout(height=720, margin=dict(l=0, r=0, t=40, b=0))
         summary = (
-            f"Rows scanned: {scanned:,} • Rows after URL filter: {matched_url:,} • "
-            f"Countries (table): {len(table_df):,} • Total Usage Events: {int(table_df['usage_events'].sum()) if len(table_df) else 0:,}"
         )
         return fig, table_df.head(50), summary
-    # ✅ Reliable, interactive choropleth with built-in country polygons
     fig = px.choropleth(
         map_df,
         locations="iso3",
-        color="usage_events",
-        hover_name="country",                       # English country name in tooltip
-        hover_data={"usage_events": True, "iso3": False},   # show usage_events only
         projection="natural earth",
         title=None,
     )
-    # Make it fill the plot area & look less "demo-ish"
     fig.update_layout(
         height=720,
         margin=dict(l=0, r=0, t=0, b=0),
         paper_bgcolor="white",
     )
     fig.update_geos(
         showframe=False,
         showcoastlines=False,
         showcountries=True,
         countrycolor="rgba(0,0,0,0.25)",
         bgcolor="rgba(0,0,0,0)",
-        domain=dict(x=[0, 1], y=[0, 1]),  # occupy full canvas
-        fitbounds="locations",            # zoom to your data so it fills more
     )
-    # Add a simple dashboard-style title
     fig.add_annotation(
         text="Usage Events by Country",
         x=0.01,
@@ -166,11 +172,10 @@ def build_report(url_contains: str):
         font=dict(size=20),
     )
     summary = (
-        f"Rows scanned: {scanned:,} • Rows after URL filter: {matched_url:,} • "
-        f"Rows mappable: {mappable:,} • Countries (table): {len(table_df):,} • "
-        f"Countries (map): {len(map_df):,} • Total Usage Events: {int(table_df['usage_events'].sum()) if len(table_df) else 0:,}"
     )
     return fig, table_df.head(50), summary
@@ -179,8 +184,7 @@ def build_report(url_contains: str):
 with gr.Blocks(title="AI Recruiting Agent — Usage Map") as demo:
     gr.Markdown(
         "# AI Recruiting Agent — Usage by Country\n"
-        "This Space reads **only** `usage/visits.jsonl` and plots usage events by country.\n\n"
-        "- Set **MAPBOX_TOKEN** as a Space *Secret* for the best-looking map.\n"
     )
     run = gr.Button("Generate map")
@@ -190,8 +194,9 @@ with gr.Blocks(title="AI Recruiting Agent — Usage Map") as demo:
     run.click(
         fn=build_report,
-        inputs=[url_contains],
         outputs=[plot, table, summary],
     )
 demo.launch()

 import os
 from collections import Counter
 import gradio as gr
 import pandas as pd
 import plotly.express as px
+import plotly.graph_objects as go
 import pycountry
 from datasets import load_dataset
     "https://huggingface.co/datasets/19arjun89/ai_recruiting_agent_usage/resolve/main/usage/visits.jsonl",
 )
+# Optional: You can keep this env var, but this version uses Plotly Geo (no Mapbox needed)
 MAPBOX_TOKEN = os.getenv("MAPBOX_TOKEN", "").strip()
 # Safety cap for very large jsonl files
             break
+def build_report():
     """
     Aggregate usage events by country and render:
+    - Choropleth map with labels (country + usage events)
     - Table with country name + usage events
     """
+    # Count by country name (table)
     country_counts = Counter()
+    # Count by ISO3 (map), also store a display name per ISO3
     iso3_counts = Counter()
     iso3_to_name = {}
     scanned = 0
     mappable = 0
     for row in load_rows_streaming():
         scanned += 1
         country = normalize_country_name(row.get("country"))
         if not country:
             continue
         country_counts[country] += 1
         iso3 = country_name_to_iso3(country)
         if not iso3:
             continue
         iso3_to_name.setdefault(iso3, country)
         mappable += 1
+    # Table dataframe (country name + usage events)
     table_df = (
+        pd.DataFrame([{"country": k, "usage events": v} for k, v in country_counts.items()])
+        .sort_values("usage events", ascending=False)
         .reset_index(drop=True)
     )
     map_df = (
         pd.DataFrame(
             [
+                {"iso3": iso3, "country": iso3_to_name.get(iso3, iso3), "usage events": count}
+                for iso3, count in iso3_counts.items()
             ]
         )
+        .sort_values("usage events", ascending=False)
         .reset_index(drop=True)
     )
     if map_df.empty:
         fig = px.scatter(title="No mappable data found")
         fig.update_layout(height=720, margin=dict(l=0, r=0, t=40, b=0))
         summary = (
+            f"Rows scanned: {scanned:,} • Countries (table): {len(table_df):,} • "
+            f"Total usage events: {int(table_df['usage events'].sum()) if len(table_df) else 0:,}"
         )
         return fig, table_df.head(50), summary
+    # Choropleth (built-in polygons; reliable)
     fig = px.choropleth(
         map_df,
         locations="iso3",
+        color="usage events",
         projection="natural earth",
         title=None,
     )
     fig.update_layout(
         height=720,
         margin=dict(l=0, r=0, t=0, b=0),
         paper_bgcolor="white",
     )
     fig.update_geos(
         showframe=False,
         showcoastlines=False,
         showcountries=True,
         countrycolor="rgba(0,0,0,0.25)",
         bgcolor="rgba(0,0,0,0)",
+        domain=dict(x=[0, 1], y=[0, 1]),
+        fitbounds="locations",
     )
+    # Labels overlay (always visible)
+    # Tip: keep labels to top N to avoid clutter if you grow beyond ~30 countries
+    labels_df = map_df.copy()
+    labels_df["label"] = labels_df["country"] + "<br>" + labels_df["usage events"].astype(str)
+    fig.add_trace(
+        go.Scattergeo(
+            locations=labels_df["iso3"],
+            locationmode="ISO-3",
+            text=labels_df["label"],
+            mode="text",
+            textfont=dict(size=11, color="black", family="Arial"),
+            hoverinfo="skip",
+            showlegend=False,
+        )
+    )
+    # Title
     fig.add_annotation(
         text="Usage Events by Country",
         x=0.01,
         font=dict(size=20),
     )
     summary = (
+        f"Rows scanned: {scanned:,} • Rows mappable: {mappable:,} • "
+        f"Countries (table): {len(table_df):,} • Countries (map): {len(map_df):,} • "
+        f"Total usage events: {int(table_df['usage events'].sum()) if len(table_df) else 0:,}"
     )
     return fig, table_df.head(50), summary
 with gr.Blocks(title="AI Recruiting Agent — Usage Map") as demo:
     gr.Markdown(
         "# AI Recruiting Agent — Usage by Country\n"
+        "This Space reads **only** `usage/visits.jsonl` and plots **usage events** by country."
     )
     run = gr.Button("Generate map")
     run.click(
         fn=build_report,
+        inputs=[],
         outputs=[plot, table, summary],
     )
 demo.launch()