Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pandas as pd | |
| import plotly.express as px | |
| import plotly.graph_objects as go | |
| import json | |
| import os | |
| import numpy as np | |
| from functools import lru_cache | |
| # Global variable to hold the dataframe - lazy loaded | |
| _df_cache = None | |
| def load_data(): | |
| """Load the GVFD dataset from local JSON file with lazy initialization""" | |
| global _df_cache | |
| if _df_cache is not None: | |
| return _df_cache | |
| try: | |
| json_path = os.path.join(os.path.dirname(__file__), 'data.json') | |
| print(f"Loading data from {json_path}...") | |
| with open(json_path, 'r') as f: | |
| data = json.load(f) | |
| # Extract records from the JSON structure | |
| records = data.get('records', []) | |
| _df_cache = pd.DataFrame(records) | |
| # Optimize data types to reduce memory usage | |
| for col in _df_cache.columns: | |
| if _df_cache[col].dtype == 'object': | |
| # Try to convert to categorical if reasonable number of unique values | |
| nunique = _df_cache[col].nunique() | |
| if nunique / len(_df_cache) < 0.5: # If less than 50% unique, use categorical | |
| _df_cache[col] = _df_cache[col].astype('category') | |
| print(f"Data loaded: {len(_df_cache)} records, {_df_cache.memory_usage(deep=True).sum() / 1024**2:.2f} MB") | |
| return _df_cache | |
| except Exception as e: | |
| print(f"Error loading dataset: {e}") | |
| # Return empty dataframe if loading fails | |
| _df_cache = pd.DataFrame() | |
| return _df_cache | |
| def get_df(): | |
| """Helper function to get the dataframe, loading it if necessary""" | |
| return load_data() | |
| def get_countries(): | |
| """Get sorted list of unique countries from the dataset""" | |
| df = get_df() | |
| if df.empty: | |
| return [] | |
| # The column is named 'country' in the JSON data | |
| if 'country' in df.columns: | |
| return sorted(df['country'].dropna().unique().tolist()) | |
| return [] | |
| def get_topics(): | |
| """Get available topics from the dataset""" | |
| df = get_df() | |
| if df.empty: | |
| return [] | |
| # Get unique topics from the data (topic column contains the categories) | |
| if 'topic' in df.columns: | |
| return sorted(df['topic'].dropna().unique().tolist()) | |
| return [] | |
| def get_specific_categories(topics=None): | |
| """Get unique specific categories filtered by topics""" | |
| df = get_df() | |
| if df.empty: | |
| return [] | |
| # Convert topics to tuple for caching (lists aren't hashable) | |
| if topics is not None and not isinstance(topics, tuple): | |
| topics = tuple(topics) if topics else None | |
| filtered_df = df | |
| if topics and len(topics) > 0: | |
| filtered_df = df[df['topic'].isin(topics)] | |
| if 'category' in filtered_df.columns: | |
| return sorted(filtered_df['category'].dropna().unique().tolist()) | |
| return [] | |
| def get_locations(topics=None): | |
| """Get unique locations filtered by topics""" | |
| df = get_df() | |
| if df.empty: | |
| return [] | |
| # Convert topics to tuple for caching (lists aren't hashable) | |
| if topics is not None and not isinstance(topics, tuple): | |
| topics = tuple(topics) if topics else None | |
| filtered_df = df | |
| if topics and len(topics) > 0: | |
| filtered_df = df[df['topic'].isin(topics)] | |
| if 'location' in filtered_df.columns: | |
| return sorted(filtered_df['location'].dropna().unique().tolist()) | |
| return [] | |
| def get_impacts(topics=None): | |
| """Get unique impact types filtered by topics""" | |
| df = get_df() | |
| if df.empty: | |
| return [] | |
| # Convert topics to tuple for caching (lists aren't hashable) | |
| if topics is not None and not isinstance(topics, tuple): | |
| topics = tuple(topics) if topics else None | |
| filtered_df = df | |
| if topics and len(topics) > 0: | |
| filtered_df = df[df['topic'].isin(topics)] | |
| if 'impact' in filtered_df.columns: | |
| return sorted(filtered_df['impact'].dropna().unique().tolist()) | |
| return [] | |
| def get_regions(): | |
| """Get unique regions""" | |
| df = get_df() | |
| if df.empty: | |
| return [] | |
| if 'region' in df.columns: | |
| return sorted(df['region'].dropna().unique().tolist()) | |
| return [] | |
| def filter_data(countries=None, topics=None, categories=None, locations=None, impacts=None, regions=None, min_value=None, max_value=None, search_text=None): | |
| """Filter dataset based on user selections""" | |
| df = get_df() | |
| if df.empty: | |
| return pd.DataFrame() | |
| # Use view instead of copy for better performance - only copy at the end if needed | |
| filtered_df = df | |
| # Filter by countries | |
| if countries and len(countries) > 0: | |
| filtered_df = filtered_df[filtered_df['country'].isin(countries)] | |
| # Filter by topics (Air Pollution, Water Pollution, etc.) | |
| if topics and len(topics) > 0: | |
| filtered_df = filtered_df[filtered_df['topic'].isin(topics)] | |
| # Filter by specific categories (PM2.5, NOx, etc.) | |
| if categories and len(categories) > 0: | |
| filtered_df = filtered_df[filtered_df['category'].isin(categories)] | |
| # Filter by locations (Urban, Rural, etc.) | |
| if locations and len(locations) > 0: | |
| filtered_df = filtered_df[filtered_df['location'].isin(locations)] | |
| # Filter by impacts (Primary Health, etc.) | |
| if impacts and len(impacts) > 0: | |
| filtered_df = filtered_df[filtered_df['impact'].isin(impacts)] | |
| # Filter by regions | |
| if regions and len(regions) > 0: | |
| filtered_df = filtered_df[filtered_df['region'].isin(regions)] | |
| # Filter by value range | |
| if min_value is not None or max_value is not None: | |
| if min_value is not None: | |
| filtered_df = filtered_df[filtered_df['value'] >= min_value] | |
| if max_value is not None: | |
| filtered_df = filtered_df[filtered_df['value'] <= max_value] | |
| # Search filter - search across multiple text columns | |
| if search_text and search_text.strip(): | |
| search_text = search_text.strip().lower() | |
| mask = ( | |
| filtered_df['country'].str.lower().str.contains(search_text, na=False) | | |
| filtered_df['topic'].str.lower().str.contains(search_text, na=False) | | |
| filtered_df['category'].str.lower().str.contains(search_text, na=False) | | |
| filtered_df['location'].str.lower().str.contains(search_text, na=False) | | |
| filtered_df['impact'].str.lower().str.contains(search_text, na=False) | | |
| filtered_df['region'].str.lower().str.contains(search_text, na=False) | |
| ) | |
| filtered_df = filtered_df[mask] | |
| return filtered_df | |
| def create_bar_chart(filtered_df): | |
| """Create a bar chart showing value factors by country and specific impact category""" | |
| if filtered_df.empty: | |
| fig = go.Figure() | |
| fig.add_annotation( | |
| text="No data available for the selected filters", | |
| xref="paper", yref="paper", | |
| x=0.5, y=0.5, showarrow=False | |
| ) | |
| return fig | |
| # Create a composite key for proper comparison level: category + location + impact | |
| filtered_df = filtered_df.copy() | |
| filtered_df['impact_category'] = ( | |
| filtered_df['category'].astype(str) + ' (' + | |
| filtered_df['location'].astype(str) + ', ' + | |
| filtered_df['impact'].astype(str) + ')' | |
| ) | |
| # Group by country and the composite impact category | |
| grouped = filtered_df.groupby(['country', 'impact_category', 'topic'])['value'].mean().reset_index() | |
| fig = px.bar( | |
| grouped, | |
| x='country', | |
| y='value', | |
| color='impact_category', | |
| title="Value Factors by Country and Specific Impact Category", | |
| labels={'value': "Value Factor (USD)", 'country': "Country", 'impact_category': "Impact Category"}, | |
| barmode='group', | |
| hover_data=['topic'] | |
| ) | |
| fig.update_layout(xaxis_tickangle=-45, height=600) | |
| return fig | |
| def create_map_visualization(filtered_df): | |
| """Create a choropleth map showing value factors by country""" | |
| if filtered_df.empty: | |
| fig = go.Figure() | |
| fig.add_annotation( | |
| text="No data available for the selected filters", | |
| xref="paper", yref="paper", | |
| x=0.5, y=0.5, showarrow=False | |
| ) | |
| return fig | |
| # Aggregate by country | |
| country_data = filtered_df.groupby('country')['value'].mean().reset_index() | |
| # Get ISO codes for the map | |
| iso_data = filtered_df.groupby('country')['iso_code'].first().reset_index() | |
| country_data = country_data.merge(iso_data, on='country') | |
| fig = px.choropleth( | |
| country_data, | |
| locations='iso_code', | |
| locationmode='ISO-3', | |
| color='value', | |
| hover_name='country', | |
| title="Global Value Factors by Country", | |
| labels={'value': "Avg Value Factor (USD)"}, | |
| color_continuous_scale="Viridis" | |
| ) | |
| fig.update_layout(height=600) | |
| return fig | |
| def create_comparison_chart(filtered_df): | |
| """Create a comparison chart showing specific impact categories across selected countries""" | |
| if filtered_df.empty: | |
| fig = go.Figure() | |
| fig.add_annotation( | |
| text="No data available for the selected filters", | |
| xref="paper", yref="paper", | |
| x=0.5, y=0.5, showarrow=False | |
| ) | |
| return fig | |
| # Create a composite key for proper comparison level: category + location + impact | |
| filtered_df = filtered_df.copy() | |
| filtered_df['impact_category'] = ( | |
| filtered_df['category'].astype(str) + ' (' + | |
| filtered_df['location'].astype(str) + ', ' + | |
| filtered_df['impact'].astype(str) + ')' | |
| ) | |
| # Group by the composite impact category and country | |
| grouped = filtered_df.groupby(['impact_category', 'country', 'topic'])['value'].mean().reset_index() | |
| fig = px.bar( | |
| grouped, | |
| x='impact_category', | |
| y='value', | |
| color='country', | |
| title="Specific Impact Category Comparison Across Countries", | |
| labels={'value': "Value Factor (USD)", 'impact_category': "Impact Category"}, | |
| barmode='group', | |
| hover_data=['topic'] | |
| ) | |
| fig.update_layout(xaxis_tickangle=-45, height=600) | |
| return fig | |
| def create_box_plot(filtered_df): | |
| """Create a box plot showing distribution of value factors by specific impact categories""" | |
| if filtered_df.empty: | |
| fig = go.Figure() | |
| fig.add_annotation( | |
| text="No data available for the selected filters", | |
| xref="paper", yref="paper", | |
| x=0.5, y=0.5, showarrow=False | |
| ) | |
| return fig | |
| # Create a composite key for proper comparison level: category + location + impact | |
| filtered_df = filtered_df.copy() | |
| filtered_df['impact_category'] = ( | |
| filtered_df['category'].astype(str) + ' (' + | |
| filtered_df['location'].astype(str) + ', ' + | |
| filtered_df['impact'].astype(str) + ')' | |
| ) | |
| fig = px.box( | |
| filtered_df, | |
| x='impact_category', | |
| y='value', | |
| color='country', | |
| title="Distribution of Value Factors by Specific Impact Category", | |
| labels={'value': "Value Factor (USD)", 'impact_category': "Impact Category"}, | |
| hover_data=['topic'] | |
| ) | |
| fig.update_layout(xaxis_tickangle=-45, height=600) | |
| return fig | |
| def get_data_table(filtered_df, max_rows=500): | |
| """Return filtered data as a dataframe with formatted values | |
| Reduced max_rows to 500 for better performance with large datasets | |
| """ | |
| if filtered_df.empty: | |
| return pd.DataFrame({"Message": ["No data available for the selected filters"]}) | |
| # Only take the first max_rows to avoid loading entire dataset | |
| display_df = filtered_df.head(max_rows).copy() | |
| # Format the value column with dollar sign and commas | |
| if 'value' in display_df.columns: | |
| display_df['value'] = display_df['value'].apply(lambda x: f"${x:,.2f}" if pd.notna(x) else "") | |
| return display_df | |
| # Create Gradio interface | |
| with gr.Blocks(title="GVFD Navigator", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown(""" | |
| # Global Value Factor Database Navigator | |
| Explore environmental and social impact value factors by country from the IFVI Global Value Factor Database. | |
| This visualization tool allows you to: | |
| - Filter and search data by multiple parameters (country, impact type, location, etc.) | |
| - View filtered data in an interactive table | |
| - Visualize patterns through charts and maps downstream of your filtered selection | |
| **Important**: Value factors are comparable at the **category + location + impact** level within each topic. | |
| For example, within "Air Pollution", individual measurements like "PM2.5 (Urban, Primary Health)" are comparable across countries. | |
| **Data Source**: [IFVI Global Value Factor Database V2](https://huggingface.co/datasets/danielrosehill/Global-Value-Factor-Database-Refactor-V2) | |
| """) | |
| # Filters and Search section at the top | |
| gr.Markdown("## Filters and Search") | |
| gr.Markdown("Set your filter parameters below, then click 'Apply Filters' to update the table and visualizations.") | |
| with gr.Row(): | |
| search_box = gr.Textbox( | |
| label="Search", | |
| placeholder="Search across all fields (country, category, location, impact, region, topic)...", | |
| scale=3 | |
| ) | |
| refresh_btn = gr.Button("Apply Filters", variant="primary", size="sm", scale=1) | |
| with gr.Row(): | |
| with gr.Column(): | |
| country_selector = gr.Dropdown( | |
| choices=get_countries(), | |
| multiselect=True, | |
| label="Countries", | |
| info="Select one or more countries", | |
| value=None | |
| ) | |
| with gr.Column(): | |
| topic_selector = gr.Dropdown( | |
| choices=get_topics(), | |
| multiselect=True, | |
| label="Topics", | |
| info="Air Pollution, Water Pollution, Land Use, etc.", | |
| value=None | |
| ) | |
| with gr.Column(): | |
| region_selector = gr.Dropdown( | |
| choices=get_regions(), | |
| multiselect=True, | |
| label="Regions", | |
| info="Geographic regions", | |
| value=None | |
| ) | |
| with gr.Row(): | |
| with gr.Column(): | |
| category_selector = gr.Dropdown( | |
| choices=get_specific_categories(), | |
| multiselect=True, | |
| label="Specific Categories", | |
| info="PM2.5, NOx, BOD, etc.", | |
| value=None | |
| ) | |
| with gr.Column(): | |
| location_selector = gr.Dropdown( | |
| choices=get_locations(), | |
| multiselect=True, | |
| label="Locations", | |
| info="Urban, Rural, etc.", | |
| value=None | |
| ) | |
| with gr.Column(): | |
| impact_selector = gr.Dropdown( | |
| choices=get_impacts(), | |
| multiselect=True, | |
| label="Impact Types", | |
| info="Primary Health, Secondary Health, etc.", | |
| value=None | |
| ) | |
| with gr.Row(): | |
| with gr.Column(): | |
| min_value = gr.Number(label="Min Value (USD)", value=None, precision=2) | |
| with gr.Column(): | |
| max_value = gr.Number(label="Max Value (USD)", value=None, precision=2) | |
| # Data table as primary visualization | |
| gr.Markdown("## Data Table") | |
| gr.Markdown("Filtered data appears below (showing up to 500 rows). Values are formatted with dollar signs and comma separators. Use filters to narrow down the dataset.") | |
| data_table = gr.Dataframe( | |
| label="Filtered Value Factors", | |
| wrap=True, | |
| interactive=False, | |
| value=None, # Don't load data initially - wait for user interaction | |
| column_widths=["10%", "12%", "12%", "12%", "12%", "10%", "12%", "10%", "10%"] | |
| ) | |
| # Visualizations below the table | |
| gr.Markdown("## Visualizations") | |
| gr.Markdown("The charts and maps below reflect your filtered data selection from above.") | |
| with gr.Tabs(): | |
| with gr.Tab("Bar Chart"): | |
| bar_chart = gr.Plot(label="Value Factors by Country", value=None) | |
| with gr.Tab("World Map"): | |
| map_chart = gr.Plot(label="Global Value Factor Distribution", value=None) | |
| with gr.Tab("Category Comparison"): | |
| comparison_chart = gr.Plot(label="Category Comparison", value=None) | |
| with gr.Tab("Distribution"): | |
| box_plot = gr.Plot(label="Value Factor Distribution", value=None) | |
| with gr.Tab("About"): | |
| gr.Markdown(""" | |
| # About GVFD Navigator | |
| ## Purpose of This Tool | |
| The **GVFD Navigator** is an interactive visualization tool designed to help researchers, analysts, policymakers, | |
| and sustainability professionals explore the Global Value Factor Database (GVFD). This navigator enables you to: | |
| - **Filter and explore** environmental and social impact value factors by country and category | |
| - **Visualize patterns** in how different countries value environmental impacts | |
| - **Compare regions** to identify global trends and outliers | |
| - **Export and analyze** filtered data for your own research or reporting needs | |
| - **Understand monetary valuations** of environmental impacts across 229 countries | |
| This tool transforms the raw GVFD dataset into accessible, interactive visualizations that make it easier to | |
| understand how environmental and social impacts translate into economic terms across different regions. | |
| --- | |
| ## About the Global Value Factor Database (GVFD) | |
| ### What is the GVFD? | |
| The **Global Value Factor Database** is a pioneering dataset developed by the [International Foundation for | |
| Valuing Impacts (IFVI)](https://www.ifvi.org/) that converts non-financial environmental and social impacts | |
| into standardized monetary values (US Dollars). | |
| The database represents a groundbreaking framework for evaluating global value creation by translating | |
| companies' environmental and social impacts into financial equivalents, enabling a more holistic assessment | |
| of corporate and organizational performance. | |
| ### Methodology | |
| The GVFD uses a rigorous methodology to: | |
| - Convert non-financial environmental and social impacts into standardized monetary values | |
| - Provide value factors as multipliers to calculate monetary equivalents of impacts | |
| - Standardize impact accounting across different domains and geographies | |
| - Enable currency conversion for non-USD jurisdictions | |
| - Support integration into financial reporting and impact accounting systems | |
| ### Coverage | |
| - **229 countries and territories** worldwide | |
| - **205 countries with ISO codes** (89.5% coverage) | |
| - **~115,000 individual measurements** across all categories | |
| - **7 major world regions** represented | |
| - **50 US states** included for detailed US analysis | |
| ### Impact Categories | |
| The GVFD covers five major environmental impact categories: | |
| 1. **Air Pollution** - Value factors for atmospheric emissions and air quality impacts | |
| 2. **Land Use and Conservation** - Monetary values for land use changes and conservation impacts | |
| 3. **Waste Generation** - Economic valuations of waste production and management | |
| 4. **Water Consumption** - Value factors for water use and depletion | |
| 5. **Water Pollution** - Monetary values for water quality degradation and contamination | |
| ### Unique Features | |
| - **Standardized monetary conversion** enables comparison across impact types and geographies | |
| - **Comprehensive global coverage** includes nearly all countries and territories | |
| - **Detailed methodological documentation** ensures transparency and reproducibility | |
| - **Currency flexibility** allows conversion to local currencies for regional analysis | |
| - **Integration-ready** format supports incorporation into existing impact accounting systems | |
| ### Use Cases | |
| The GVFD and this navigator can support: | |
| - **Corporate sustainability reporting** - Quantify environmental impacts in financial terms | |
| - **ESG analysis** - Evaluate environmental performance with monetary metrics | |
| - **Policy modeling** - Assess economic costs of environmental impacts for policy decisions | |
| - **Impact investing** - Evaluate and compare environmental impact of investments | |
| - **AI and machine learning** - Train models on environmental impact valuations | |
| - **Academic research** - Study relationships between environmental impacts and economic values | |
| - **Correlation analysis** - Identify patterns in how different countries value environmental impacts | |
| --- | |
| ## Data Source and Attribution | |
| **Original Data**: [IFVI Global Value Factor Database V2](https://huggingface.co/datasets/danielrosehill/Global-Value-Factor-Database-Refactor-V2) | |
| **Dataset Developer**: International Foundation for Valuing Impacts (IFVI) | |
| **Official Website**: [https://www.ifvi.org/](https://www.ifvi.org/) | |
| **Navigator Tool**: This is an **unofficial visualization tool** created to make the GVFD more accessible. | |
| For official data, methodologies, and authoritative guidance, please consult IFVI's official resources. | |
| --- | |
| ## Disclaimer | |
| This navigator is an independent visualization tool and is not officially endorsed by IFVI. While every effort | |
| has been made to accurately represent the data, users should refer to the original GVFD dataset and IFVI's | |
| official documentation for authoritative information and methodology details. | |
| The monetary values provided represent economic valuations of environmental impacts based on IFVI's methodology | |
| and should be interpreted within the context of their methodological framework. | |
| --- | |
| ## Technical Details | |
| - **Built with**: Gradio, Plotly, Pandas, Hugging Face Datasets | |
| - **Data Format**: JSON files loaded locally | |
| - **Visualizations**: Interactive charts using Plotly for exploration and analysis | |
| - **Filtering**: Dynamic filtering by country, category, location, impact, region, and value ranges | |
| For questions, feedback, or issues with this navigator tool, please visit the | |
| [GitHub repository](https://huggingface.co/spaces/danielrosehill/GVFD-Navigator) or contact the tool maintainer. | |
| """) | |
| gr.Markdown(""" | |
| --- | |
| ### About the Data | |
| The Global Value Factor Database (GVFD) by the International Foundation for Valuing Impacts (IFVI) | |
| provides standardized methods to convert environmental and social impacts into monetary values. | |
| **Categories**: | |
| - Air Pollution | |
| - Land Use and Conservation | |
| - Waste Generation | |
| - Water Consumption | |
| - Water Pollution | |
| **Coverage**: 229 countries and territories, 50 US states, 7 world regions | |
| **Disclaimer**: This is an unofficial visualization tool. For official data and methodology, | |
| please visit [IFVI's website](https://www.ifvi.org/). | |
| """) | |
| # Event handlers | |
| def update_dropdowns_on_topic_change(topics): | |
| """Update category, location, and impact dropdowns based on selected topics""" | |
| # Convert to tuple for caching | |
| topics_tuple = tuple(topics) if topics else None | |
| return ( | |
| gr.Dropdown(choices=get_specific_categories(topics_tuple), value=None), | |
| gr.Dropdown(choices=get_locations(topics_tuple), value=None), | |
| gr.Dropdown(choices=get_impacts(topics_tuple), value=None) | |
| ) | |
| def update_all(search, countries, topics, categories, locations, impacts, regions, min_val, max_val): | |
| """Update all views when filters are applied""" | |
| # First filter the data | |
| filtered_df = filter_data( | |
| countries=countries, | |
| topics=topics, | |
| categories=categories, | |
| locations=locations, | |
| impacts=impacts, | |
| regions=regions, | |
| min_value=min_val, | |
| max_value=max_val, | |
| search_text=search | |
| ) | |
| # Then pass the filtered dataframe to all visualization functions | |
| return ( | |
| get_data_table(filtered_df), | |
| create_bar_chart(filtered_df), | |
| create_map_visualization(filtered_df), | |
| create_comparison_chart(filtered_df), | |
| create_box_plot(filtered_df) | |
| ) | |
| def load_initial_view(): | |
| """Load initial view with a small sample of data""" | |
| df = get_df() | |
| # Show a small sample initially to avoid loading everything | |
| sample_df = df.head(500) if not df.empty else df | |
| return ( | |
| get_data_table(sample_df), | |
| create_bar_chart(sample_df), | |
| create_map_visualization(sample_df), | |
| create_comparison_chart(sample_df), | |
| create_box_plot(sample_df) | |
| ) | |
| # Wire up topic selector to update dependent dropdowns | |
| topic_selector.change( | |
| fn=update_dropdowns_on_topic_change, | |
| inputs=[topic_selector], | |
| outputs=[category_selector, location_selector, impact_selector] | |
| ) | |
| # Wire up the unified filter button | |
| refresh_btn.click( | |
| fn=update_all, | |
| inputs=[ | |
| search_box, | |
| country_selector, | |
| topic_selector, | |
| category_selector, | |
| location_selector, | |
| impact_selector, | |
| region_selector, | |
| min_value, | |
| max_value | |
| ], | |
| outputs=[data_table, bar_chart, map_chart, comparison_chart, box_plot] | |
| ) | |
| # Load initial view when the app opens | |
| demo.load( | |
| fn=load_initial_view, | |
| inputs=None, | |
| outputs=[data_table, bar_chart, map_chart, comparison_chart, box_plot] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |