Spaces:
Sleeping
Sleeping
Commit
·
2bedd25
1
Parent(s):
b7f99ba
Fix country selector by loading data from local JSON file
Browse files- Replace datasets library loading with direct JSON file loading
- Add data.json (32MB) containing combined GVFD data with 104,564 records
- Update all functions to use correct column names (country, topic, value, iso_code)
- Simplify data loading and remove dependency on datasets library
- Fix get_countries() and get_categories() to properly extract from loaded data
- Update all visualization functions with correct column references
- Country selector now shows all 268 available countries/locations
- Enable Git LFS for JSON files
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
- .gitattributes +1 -0
- app.py +65 -124
- data.json +3 -0
- requirements.txt +0 -1
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
*.json filter=lfs diff=lfs merge=lfs -text
|
app.py
CHANGED
|
@@ -2,16 +2,20 @@ import gradio as gr
|
|
| 2 |
import pandas as pd
|
| 3 |
import plotly.express as px
|
| 4 |
import plotly.graph_objects as go
|
| 5 |
-
|
|
|
|
| 6 |
import numpy as np
|
| 7 |
|
| 8 |
# Load the dataset
|
| 9 |
def load_data():
|
| 10 |
-
"""Load the GVFD dataset from
|
| 11 |
try:
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
| 15 |
return df
|
| 16 |
except Exception as e:
|
| 17 |
print(f"Error loading dataset: {e}")
|
|
@@ -25,22 +29,19 @@ def get_countries():
|
|
| 25 |
"""Get sorted list of unique countries from the dataset"""
|
| 26 |
if df.empty:
|
| 27 |
return []
|
| 28 |
-
#
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
return sorted(df[country_col[0]].dropna().unique().tolist())
|
| 32 |
return []
|
| 33 |
|
| 34 |
def get_categories():
|
| 35 |
"""Get available categories from the dataset"""
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
]
|
| 43 |
-
return categories
|
| 44 |
|
| 45 |
def filter_data(countries, categories, min_value=None, max_value=None):
|
| 46 |
"""Filter dataset based on user selections"""
|
|
@@ -51,22 +52,18 @@ def filter_data(countries, categories, min_value=None, max_value=None):
|
|
| 51 |
|
| 52 |
# Filter by countries
|
| 53 |
if countries and len(countries) > 0:
|
| 54 |
-
|
| 55 |
-
filtered_df = filtered_df[filtered_df[country_col].isin(countries)]
|
| 56 |
|
| 57 |
-
# Filter by categories
|
| 58 |
if categories and len(categories) > 0:
|
| 59 |
-
|
| 60 |
-
if category_col:
|
| 61 |
-
filtered_df = filtered_df[filtered_df[category_col[0]].isin(categories)]
|
| 62 |
|
| 63 |
# Filter by value range
|
| 64 |
-
|
| 65 |
-
if value_col and (min_value is not None or max_value is not None):
|
| 66 |
if min_value is not None:
|
| 67 |
-
filtered_df = filtered_df[filtered_df[
|
| 68 |
if max_value is not None:
|
| 69 |
-
filtered_df = filtered_df[filtered_df[
|
| 70 |
|
| 71 |
return filtered_df
|
| 72 |
|
|
@@ -83,32 +80,17 @@ def create_bar_chart(countries, categories):
|
|
| 83 |
)
|
| 84 |
return fig
|
| 85 |
|
| 86 |
-
#
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
y=value_col,
|
| 98 |
-
color=category_col[0],
|
| 99 |
-
title="Value Factors by Country and Category",
|
| 100 |
-
labels={value_col: "Value Factor (USD)", country_col: "Country"},
|
| 101 |
-
barmode='group'
|
| 102 |
-
)
|
| 103 |
-
else:
|
| 104 |
-
grouped = filtered_df.groupby(country_col)[value_col].mean().reset_index()
|
| 105 |
-
fig = px.bar(
|
| 106 |
-
grouped,
|
| 107 |
-
x=country_col,
|
| 108 |
-
y=value_col,
|
| 109 |
-
title="Value Factors by Country",
|
| 110 |
-
labels={value_col: "Value Factor (USD)", country_col: "Country"}
|
| 111 |
-
)
|
| 112 |
|
| 113 |
fig.update_layout(xaxis_tickangle=-45, height=600)
|
| 114 |
return fig
|
|
@@ -126,32 +108,21 @@ def create_map_visualization(countries, categories):
|
|
| 126 |
)
|
| 127 |
return fig
|
| 128 |
|
| 129 |
-
# Identify columns
|
| 130 |
-
country_col = [col for col in filtered_df.columns if 'country' in col.lower()][0]
|
| 131 |
-
value_col = [col for col in filtered_df.columns if 'value' in col.lower() or 'factor' in col.lower()][0]
|
| 132 |
-
iso_col = [col for col in filtered_df.columns if 'iso' in col.lower() or 'code' in col.lower()]
|
| 133 |
-
|
| 134 |
# Aggregate by country
|
| 135 |
-
country_data = filtered_df.groupby(
|
| 136 |
-
|
| 137 |
-
#
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
country_data = country_data.merge(iso_data, on=country_col)
|
| 141 |
-
location_col = iso_col[0]
|
| 142 |
-
locationmode = 'ISO-3'
|
| 143 |
-
else:
|
| 144 |
-
location_col = country_col
|
| 145 |
-
locationmode = 'country names'
|
| 146 |
|
| 147 |
fig = px.choropleth(
|
| 148 |
country_data,
|
| 149 |
-
locations=
|
| 150 |
-
locationmode=
|
| 151 |
-
color=
|
| 152 |
-
hover_name=
|
| 153 |
title="Global Value Factors by Country",
|
| 154 |
-
labels={
|
| 155 |
color_continuous_scale="Viridis"
|
| 156 |
)
|
| 157 |
|
|
@@ -171,31 +142,17 @@ def create_comparison_chart(countries, categories):
|
|
| 171 |
)
|
| 172 |
return fig
|
| 173 |
|
| 174 |
-
#
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
color=country_col,
|
| 186 |
-
title="Category Comparison Across Countries",
|
| 187 |
-
labels={value_col: "Value Factor (USD)", category_col[0]: "Category"},
|
| 188 |
-
barmode='group'
|
| 189 |
-
)
|
| 190 |
-
else:
|
| 191 |
-
grouped = filtered_df.groupby(country_col)[value_col].mean().reset_index()
|
| 192 |
-
fig = px.bar(
|
| 193 |
-
grouped,
|
| 194 |
-
x=country_col,
|
| 195 |
-
y=value_col,
|
| 196 |
-
title="Value Factors by Country",
|
| 197 |
-
labels={value_col: "Value Factor (USD)", country_col: "Country"}
|
| 198 |
-
)
|
| 199 |
|
| 200 |
fig.update_layout(xaxis_tickangle=-45, height=600)
|
| 201 |
return fig
|
|
@@ -213,28 +170,14 @@ def create_box_plot(countries, categories):
|
|
| 213 |
)
|
| 214 |
return fig
|
| 215 |
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
x=category_col[0],
|
| 225 |
-
y=value_col,
|
| 226 |
-
color=country_col,
|
| 227 |
-
title="Distribution of Value Factors",
|
| 228 |
-
labels={value_col: "Value Factor (USD)", category_col[0]: "Category"}
|
| 229 |
-
)
|
| 230 |
-
else:
|
| 231 |
-
fig = px.box(
|
| 232 |
-
filtered_df,
|
| 233 |
-
x=country_col,
|
| 234 |
-
y=value_col,
|
| 235 |
-
title="Distribution of Value Factors by Country",
|
| 236 |
-
labels={value_col: "Value Factor (USD)", country_col: "Country"}
|
| 237 |
-
)
|
| 238 |
|
| 239 |
fig.update_layout(xaxis_tickangle=-45, height=600)
|
| 240 |
return fig
|
|
@@ -246,9 +189,7 @@ def get_summary_stats(countries, categories):
|
|
| 246 |
if filtered_df.empty:
|
| 247 |
return "No data available for the selected filters"
|
| 248 |
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
stats = filtered_df[value_col].describe()
|
| 252 |
|
| 253 |
summary = f"""
|
| 254 |
### Summary Statistics
|
|
|
|
| 2 |
import pandas as pd
|
| 3 |
import plotly.express as px
|
| 4 |
import plotly.graph_objects as go
|
| 5 |
+
import json
|
| 6 |
+
import os
|
| 7 |
import numpy as np
|
| 8 |
|
| 9 |
# Load the dataset
|
| 10 |
def load_data():
|
| 11 |
+
"""Load the GVFD dataset from local JSON file"""
|
| 12 |
try:
|
| 13 |
+
json_path = os.path.join(os.path.dirname(__file__), 'data.json')
|
| 14 |
+
with open(json_path, 'r') as f:
|
| 15 |
+
data = json.load(f)
|
| 16 |
+
# Extract records from the JSON structure
|
| 17 |
+
records = data.get('records', [])
|
| 18 |
+
df = pd.DataFrame(records)
|
| 19 |
return df
|
| 20 |
except Exception as e:
|
| 21 |
print(f"Error loading dataset: {e}")
|
|
|
|
| 29 |
"""Get sorted list of unique countries from the dataset"""
|
| 30 |
if df.empty:
|
| 31 |
return []
|
| 32 |
+
# The column is named 'country' in the JSON data
|
| 33 |
+
if 'country' in df.columns:
|
| 34 |
+
return sorted(df['country'].dropna().unique().tolist())
|
|
|
|
| 35 |
return []
|
| 36 |
|
| 37 |
def get_categories():
|
| 38 |
"""Get available categories from the dataset"""
|
| 39 |
+
if df.empty:
|
| 40 |
+
return []
|
| 41 |
+
# Get unique topics from the data (topic column contains the categories)
|
| 42 |
+
if 'topic' in df.columns:
|
| 43 |
+
return sorted(df['topic'].dropna().unique().tolist())
|
| 44 |
+
return []
|
|
|
|
|
|
|
| 45 |
|
| 46 |
def filter_data(countries, categories, min_value=None, max_value=None):
|
| 47 |
"""Filter dataset based on user selections"""
|
|
|
|
| 52 |
|
| 53 |
# Filter by countries
|
| 54 |
if countries and len(countries) > 0:
|
| 55 |
+
filtered_df = filtered_df[filtered_df['country'].isin(countries)]
|
|
|
|
| 56 |
|
| 57 |
+
# Filter by categories (using 'topic' column)
|
| 58 |
if categories and len(categories) > 0:
|
| 59 |
+
filtered_df = filtered_df[filtered_df['topic'].isin(categories)]
|
|
|
|
|
|
|
| 60 |
|
| 61 |
# Filter by value range
|
| 62 |
+
if min_value is not None or max_value is not None:
|
|
|
|
| 63 |
if min_value is not None:
|
| 64 |
+
filtered_df = filtered_df[filtered_df['value'] >= min_value]
|
| 65 |
if max_value is not None:
|
| 66 |
+
filtered_df = filtered_df[filtered_df['value'] <= max_value]
|
| 67 |
|
| 68 |
return filtered_df
|
| 69 |
|
|
|
|
| 80 |
)
|
| 81 |
return fig
|
| 82 |
|
| 83 |
+
# Group by country and topic (category)
|
| 84 |
+
grouped = filtered_df.groupby(['country', 'topic'])['value'].mean().reset_index()
|
| 85 |
+
fig = px.bar(
|
| 86 |
+
grouped,
|
| 87 |
+
x='country',
|
| 88 |
+
y='value',
|
| 89 |
+
color='topic',
|
| 90 |
+
title="Value Factors by Country and Category",
|
| 91 |
+
labels={'value': "Value Factor (USD)", 'country': "Country", 'topic': "Category"},
|
| 92 |
+
barmode='group'
|
| 93 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
|
| 95 |
fig.update_layout(xaxis_tickangle=-45, height=600)
|
| 96 |
return fig
|
|
|
|
| 108 |
)
|
| 109 |
return fig
|
| 110 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
# Aggregate by country
|
| 112 |
+
country_data = filtered_df.groupby('country')['value'].mean().reset_index()
|
| 113 |
+
|
| 114 |
+
# Get ISO codes for the map
|
| 115 |
+
iso_data = filtered_df.groupby('country')['iso_code'].first().reset_index()
|
| 116 |
+
country_data = country_data.merge(iso_data, on='country')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
|
| 118 |
fig = px.choropleth(
|
| 119 |
country_data,
|
| 120 |
+
locations='iso_code',
|
| 121 |
+
locationmode='ISO-3',
|
| 122 |
+
color='value',
|
| 123 |
+
hover_name='country',
|
| 124 |
title="Global Value Factors by Country",
|
| 125 |
+
labels={'value': "Avg Value Factor (USD)"},
|
| 126 |
color_continuous_scale="Viridis"
|
| 127 |
)
|
| 128 |
|
|
|
|
| 142 |
)
|
| 143 |
return fig
|
| 144 |
|
| 145 |
+
# Group by topic (category) and country
|
| 146 |
+
grouped = filtered_df.groupby(['topic', 'country'])['value'].mean().reset_index()
|
| 147 |
+
fig = px.bar(
|
| 148 |
+
grouped,
|
| 149 |
+
x='topic',
|
| 150 |
+
y='value',
|
| 151 |
+
color='country',
|
| 152 |
+
title="Category Comparison Across Countries",
|
| 153 |
+
labels={'value': "Value Factor (USD)", 'topic': "Category"},
|
| 154 |
+
barmode='group'
|
| 155 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
|
| 157 |
fig.update_layout(xaxis_tickangle=-45, height=600)
|
| 158 |
return fig
|
|
|
|
| 170 |
)
|
| 171 |
return fig
|
| 172 |
|
| 173 |
+
fig = px.box(
|
| 174 |
+
filtered_df,
|
| 175 |
+
x='topic',
|
| 176 |
+
y='value',
|
| 177 |
+
color='country',
|
| 178 |
+
title="Distribution of Value Factors",
|
| 179 |
+
labels={'value': "Value Factor (USD)", 'topic': "Category"}
|
| 180 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 181 |
|
| 182 |
fig.update_layout(xaxis_tickangle=-45, height=600)
|
| 183 |
return fig
|
|
|
|
| 189 |
if filtered_df.empty:
|
| 190 |
return "No data available for the selected filters"
|
| 191 |
|
| 192 |
+
stats = filtered_df['value'].describe()
|
|
|
|
|
|
|
| 193 |
|
| 194 |
summary = f"""
|
| 195 |
### Summary Statistics
|
data.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7ee86343b07d7781ed0d742c5fed758df71ef6ba8cfd28a0686ed2bf7be2c815
|
| 3 |
+
size 33633568
|
requirements.txt
CHANGED
|
@@ -1,5 +1,4 @@
|
|
| 1 |
gradio==5.49.1
|
| 2 |
pandas>=2.0.0
|
| 3 |
plotly>=5.18.0
|
| 4 |
-
datasets>=2.14.0
|
| 5 |
numpy>=1.24.0
|
|
|
|
| 1 |
gradio==5.49.1
|
| 2 |
pandas>=2.0.0
|
| 3 |
plotly>=5.18.0
|
|
|
|
| 4 |
numpy>=1.24.0
|