Spaces:

danielrosehill
/

GVFD-Navigator

Sleeping

danielrosehill Claude commited on Oct 14, 2025

Commit

2bedd25

1 Parent(s): b7f99ba

Fix country selector by loading data from local JSON file

- Replace datasets library loading with direct JSON file loading
- Add data.json (32MB) containing combined GVFD data with 104,564 records
- Update all functions to use correct column names (country, topic, value, iso_code)
- Simplify data loading and remove dependency on datasets library
- Fix get_countries() and get_categories() to properly extract from loaded data
- Update all visualization functions with correct column references
- Country selector now shows all 268 available countries/locations
- Enable Git LFS for JSON files

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (4) hide show

.gitattributes +1 -0
app.py +65 -124
data.json +3 -0
requirements.txt +0 -1

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.json filter=lfs diff=lfs merge=lfs -text

app.py CHANGED Viewed

@@ -2,16 +2,20 @@ import gradio as gr
 import pandas as pd
 import plotly.express as px
 import plotly.graph_objects as go
-from datasets import load_dataset
 import numpy as np
 # Load the dataset
 def load_data():
-    """Load the GVFD dataset from Hugging Face"""
     try:
-        dataset = load_dataset("danielrosehill/Global-Value-Factor-Database-Refactor-V2")
-        # Assuming the main data is in the 'train' split - adjust if needed
-        df = pd.DataFrame(dataset['train'])
         return df
     except Exception as e:
         print(f"Error loading dataset: {e}")
@@ -25,22 +29,19 @@ def get_countries():
     """Get sorted list of unique countries from the dataset"""
     if df.empty:
         return []
-    # Assuming there's a 'country' column - adjust based on actual column name
-    country_col = [col for col in df.columns if 'country' in col.lower()]
-    if country_col:
-        return sorted(df[country_col[0]].dropna().unique().tolist())
     return []
 def get_categories():
     """Get available categories from the dataset"""
-    categories = [
-        "Air Pollution",
-        "Land Use and Conservation",
-        "Waste Generation",
-        "Water Consumption",
-        "Water Pollution"
-    ]
-    return categories
 def filter_data(countries, categories, min_value=None, max_value=None):
     """Filter dataset based on user selections"""
@@ -51,22 +52,18 @@ def filter_data(countries, categories, min_value=None, max_value=None):
     # Filter by countries
     if countries and len(countries) > 0:
-        country_col = [col for col in df.columns if 'country' in col.lower()][0]
-        filtered_df = filtered_df[filtered_df[country_col].isin(countries)]
-    # Filter by categories
     if categories and len(categories) > 0:
-        category_col = [col for col in df.columns if 'category' in col.lower()]
-        if category_col:
-            filtered_df = filtered_df[filtered_df[category_col[0]].isin(categories)]
     # Filter by value range
-    value_col = [col for col in df.columns if 'value' in col.lower() or 'factor' in col.lower()]
-    if value_col and (min_value is not None or max_value is not None):
         if min_value is not None:
-            filtered_df = filtered_df[filtered_df[value_col[0]] >= min_value]
         if max_value is not None:
-            filtered_df = filtered_df[filtered_df[value_col[0]] <= max_value]
     return filtered_df
@@ -83,32 +80,17 @@ def create_bar_chart(countries, categories):
         )
         return fig
-    # Identify columns
-    country_col = [col for col in filtered_df.columns if 'country' in col.lower()][0]
-    category_col = [col for col in filtered_df.columns if 'category' in col.lower()]
-    value_col = [col for col in filtered_df.columns if 'value' in col.lower() or 'factor' in col.lower()][0]
-    # Group by country and category
-    if category_col:
-        grouped = filtered_df.groupby([country_col, category_col[0]])[value_col].mean().reset_index()
-        fig = px.bar(
-            grouped,
-            x=country_col,
-            y=value_col,
-            color=category_col[0],
-            title="Value Factors by Country and Category",
-            labels={value_col: "Value Factor (USD)", country_col: "Country"},
-            barmode='group'
-        )
-    else:
-        grouped = filtered_df.groupby(country_col)[value_col].mean().reset_index()
-        fig = px.bar(
-            grouped,
-            x=country_col,
-            y=value_col,
-            title="Value Factors by Country",
-            labels={value_col: "Value Factor (USD)", country_col: "Country"}
-        )
     fig.update_layout(xaxis_tickangle=-45, height=600)
     return fig
@@ -126,32 +108,21 @@ def create_map_visualization(countries, categories):
         )
         return fig
-    # Identify columns
-    country_col = [col for col in filtered_df.columns if 'country' in col.lower()][0]
-    value_col = [col for col in filtered_df.columns if 'value' in col.lower() or 'factor' in col.lower()][0]
-    iso_col = [col for col in filtered_df.columns if 'iso' in col.lower() or 'code' in col.lower()]
     # Aggregate by country
-    country_data = filtered_df.groupby(country_col)[value_col].mean().reset_index()
-    # Try to use ISO codes if available
-    if iso_col:
-        iso_data = filtered_df.groupby(country_col)[iso_col[0]].first().reset_index()
-        country_data = country_data.merge(iso_data, on=country_col)
-        location_col = iso_col[0]
-        locationmode = 'ISO-3'
-    else:
-        location_col = country_col
-        locationmode = 'country names'
     fig = px.choropleth(
         country_data,
-        locations=location_col,
-        locationmode=locationmode,
-        color=value_col,
-        hover_name=country_col,
         title="Global Value Factors by Country",
-        labels={value_col: "Avg Value Factor (USD)"},
         color_continuous_scale="Viridis"
     )
@@ -171,31 +142,17 @@ def create_comparison_chart(countries, categories):
         )
         return fig
-    # Identify columns
-    country_col = [col for col in filtered_df.columns if 'country' in col.lower()][0]
-    category_col = [col for col in filtered_df.columns if 'category' in col.lower()]
-    value_col = [col for col in filtered_df.columns if 'value' in col.lower() or 'factor' in col.lower()][0]
-    if category_col:
-        grouped = filtered_df.groupby([category_col[0], country_col])[value_col].mean().reset_index()
-        fig = px.bar(
-            grouped,
-            x=category_col[0],
-            y=value_col,
-            color=country_col,
-            title="Category Comparison Across Countries",
-            labels={value_col: "Value Factor (USD)", category_col[0]: "Category"},
-            barmode='group'
-        )
-    else:
-        grouped = filtered_df.groupby(country_col)[value_col].mean().reset_index()
-        fig = px.bar(
-            grouped,
-            x=country_col,
-            y=value_col,
-            title="Value Factors by Country",
-            labels={value_col: "Value Factor (USD)", country_col: "Country"}
-        )
     fig.update_layout(xaxis_tickangle=-45, height=600)
     return fig
@@ -213,28 +170,14 @@ def create_box_plot(countries, categories):
         )
         return fig
-    # Identify columns
-    country_col = [col for col in filtered_df.columns if 'country' in col.lower()][0]
-    category_col = [col for col in filtered_df.columns if 'category' in col.lower()]
-    value_col = [col for col in filtered_df.columns if 'value' in col.lower() or 'factor' in col.lower()][0]
-    if category_col:
-        fig = px.box(
-            filtered_df,
-            x=category_col[0],
-            y=value_col,
-            color=country_col,
-            title="Distribution of Value Factors",
-            labels={value_col: "Value Factor (USD)", category_col[0]: "Category"}
-        )
-    else:
-        fig = px.box(
-            filtered_df,
-            x=country_col,
-            y=value_col,
-            title="Distribution of Value Factors by Country",
-            labels={value_col: "Value Factor (USD)", country_col: "Country"}
-        )
     fig.update_layout(xaxis_tickangle=-45, height=600)
     return fig
@@ -246,9 +189,7 @@ def get_summary_stats(countries, categories):
     if filtered_df.empty:
         return "No data available for the selected filters"
-    value_col = [col for col in filtered_df.columns if 'value' in col.lower() or 'factor' in col.lower()][0]
-    stats = filtered_df[value_col].describe()
     summary = f"""
 ### Summary Statistics

 import pandas as pd
 import plotly.express as px
 import plotly.graph_objects as go
+import json
+import os
 import numpy as np
 # Load the dataset
 def load_data():
+    """Load the GVFD dataset from local JSON file"""
     try:
+        json_path = os.path.join(os.path.dirname(__file__), 'data.json')
+        with open(json_path, 'r') as f:
+            data = json.load(f)
+        # Extract records from the JSON structure
+        records = data.get('records', [])
+        df = pd.DataFrame(records)
         return df
     except Exception as e:
         print(f"Error loading dataset: {e}")
     """Get sorted list of unique countries from the dataset"""
     if df.empty:
         return []
+    # The column is named 'country' in the JSON data
+    if 'country' in df.columns:
+        return sorted(df['country'].dropna().unique().tolist())
     return []
 def get_categories():
     """Get available categories from the dataset"""
+    if df.empty:
+        return []
+    # Get unique topics from the data (topic column contains the categories)
+    if 'topic' in df.columns:
+        return sorted(df['topic'].dropna().unique().tolist())
+    return []
 def filter_data(countries, categories, min_value=None, max_value=None):
     """Filter dataset based on user selections"""
     # Filter by countries
     if countries and len(countries) > 0:
+        filtered_df = filtered_df[filtered_df['country'].isin(countries)]
+    # Filter by categories (using 'topic' column)
     if categories and len(categories) > 0:
+        filtered_df = filtered_df[filtered_df['topic'].isin(categories)]
     # Filter by value range
+    if min_value is not None or max_value is not None:
         if min_value is not None:
+            filtered_df = filtered_df[filtered_df['value'] >= min_value]
         if max_value is not None:
+            filtered_df = filtered_df[filtered_df['value'] <= max_value]
     return filtered_df
         )
         return fig
+    # Group by country and topic (category)
+    grouped = filtered_df.groupby(['country', 'topic'])['value'].mean().reset_index()
+    fig = px.bar(
+        grouped,
+        x='country',
+        y='value',
+        color='topic',
+        title="Value Factors by Country and Category",
+        labels={'value': "Value Factor (USD)", 'country': "Country", 'topic': "Category"},
+        barmode='group'
+    )
     fig.update_layout(xaxis_tickangle=-45, height=600)
     return fig
         )
         return fig
     # Aggregate by country
+    country_data = filtered_df.groupby('country')['value'].mean().reset_index()
+    # Get ISO codes for the map
+    iso_data = filtered_df.groupby('country')['iso_code'].first().reset_index()
+    country_data = country_data.merge(iso_data, on='country')
     fig = px.choropleth(
         country_data,
+        locations='iso_code',
+        locationmode='ISO-3',
+        color='value',
+        hover_name='country',
         title="Global Value Factors by Country",
+        labels={'value': "Avg Value Factor (USD)"},
         color_continuous_scale="Viridis"
     )
         )
         return fig
+    # Group by topic (category) and country
+    grouped = filtered_df.groupby(['topic', 'country'])['value'].mean().reset_index()
+    fig = px.bar(
+        grouped,
+        x='topic',
+        y='value',
+        color='country',
+        title="Category Comparison Across Countries",
+        labels={'value': "Value Factor (USD)", 'topic': "Category"},
+        barmode='group'
+    )
     fig.update_layout(xaxis_tickangle=-45, height=600)
     return fig
         )
         return fig
+    fig = px.box(
+        filtered_df,
+        x='topic',
+        y='value',
+        color='country',
+        title="Distribution of Value Factors",
+        labels={'value': "Value Factor (USD)", 'topic': "Category"}
+    )
     fig.update_layout(xaxis_tickangle=-45, height=600)
     return fig
     if filtered_df.empty:
         return "No data available for the selected filters"
+    stats = filtered_df['value'].describe()
     summary = f"""
 ### Summary Statistics

data.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7ee86343b07d7781ed0d742c5fed758df71ef6ba8cfd28a0686ed2bf7be2c815
+size 33633568

requirements.txt CHANGED Viewed

@@ -1,5 +1,4 @@
 gradio==5.49.1
 pandas>=2.0.0
 plotly>=5.18.0
-datasets>=2.14.0
 numpy>=1.24.0

 gradio==5.49.1
 pandas>=2.0.0
 plotly>=5.18.0
 numpy>=1.24.0