Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pandas as pd | |
| import plotly.express as px | |
| import plotly.graph_objects as go | |
| import json | |
| import os | |
| import numpy as np | |
| # Load the dataset | |
| def load_data(): | |
| """Load the GVFD dataset from local JSON file""" | |
| try: | |
| json_path = os.path.join(os.path.dirname(__file__), 'data.json') | |
| with open(json_path, 'r') as f: | |
| data = json.load(f) | |
| # Extract records from the JSON structure | |
| records = data.get('records', []) | |
| df = pd.DataFrame(records) | |
| return df | |
| except Exception as e: | |
| print(f"Error loading dataset: {e}") | |
| # Return empty dataframe if loading fails | |
| return pd.DataFrame() | |
| # Initialize data | |
| df = load_data() | |
| def get_countries(): | |
| """Get sorted list of unique countries from the dataset""" | |
| if df.empty: | |
| return [] | |
| # The column is named 'country' in the JSON data | |
| if 'country' in df.columns: | |
| return sorted(df['country'].dropna().unique().tolist()) | |
| return [] | |
| def get_categories(): | |
| """Get available categories from the dataset""" | |
| if df.empty: | |
| return [] | |
| # Get unique topics from the data (topic column contains the categories) | |
| if 'topic' in df.columns: | |
| return sorted(df['topic'].dropna().unique().tolist()) | |
| return [] | |
| def get_specific_categories(): | |
| """Get unique specific categories (e.g., PM2.5, NOx, etc.)""" | |
| if df.empty: | |
| return [] | |
| if 'category' in df.columns: | |
| return sorted(df['category'].dropna().unique().tolist()) | |
| return [] | |
| def get_locations(): | |
| """Get unique locations (e.g., Urban, Rural, etc.)""" | |
| if df.empty: | |
| return [] | |
| if 'location' in df.columns: | |
| return sorted(df['location'].dropna().unique().tolist()) | |
| return [] | |
| def get_impacts(): | |
| """Get unique impact types (e.g., Primary Health, etc.)""" | |
| if df.empty: | |
| return [] | |
| if 'impact' in df.columns: | |
| return sorted(df['impact'].dropna().unique().tolist()) | |
| return [] | |
| def get_regions(): | |
| """Get unique regions""" | |
| if df.empty: | |
| return [] | |
| if 'region' in df.columns: | |
| return sorted(df['region'].dropna().unique().tolist()) | |
| return [] | |
| def filter_data(countries=None, topics=None, categories=None, locations=None, impacts=None, regions=None, min_value=None, max_value=None, search_text=None): | |
| """Filter dataset based on user selections""" | |
| if df.empty: | |
| return pd.DataFrame() | |
| filtered_df = df.copy() | |
| # Filter by countries | |
| if countries and len(countries) > 0: | |
| filtered_df = filtered_df[filtered_df['country'].isin(countries)] | |
| # Filter by topics (Air Pollution, Water Pollution, etc.) | |
| if topics and len(topics) > 0: | |
| filtered_df = filtered_df[filtered_df['topic'].isin(topics)] | |
| # Filter by specific categories (PM2.5, NOx, etc.) | |
| if categories and len(categories) > 0: | |
| filtered_df = filtered_df[filtered_df['category'].isin(categories)] | |
| # Filter by locations (Urban, Rural, etc.) | |
| if locations and len(locations) > 0: | |
| filtered_df = filtered_df[filtered_df['location'].isin(locations)] | |
| # Filter by impacts (Primary Health, etc.) | |
| if impacts and len(impacts) > 0: | |
| filtered_df = filtered_df[filtered_df['impact'].isin(impacts)] | |
| # Filter by regions | |
| if regions and len(regions) > 0: | |
| filtered_df = filtered_df[filtered_df['region'].isin(regions)] | |
| # Filter by value range | |
| if min_value is not None or max_value is not None: | |
| if min_value is not None: | |
| filtered_df = filtered_df[filtered_df['value'] >= min_value] | |
| if max_value is not None: | |
| filtered_df = filtered_df[filtered_df['value'] <= max_value] | |
| # Search filter - search across multiple text columns | |
| if search_text and search_text.strip(): | |
| search_text = search_text.strip().lower() | |
| mask = ( | |
| filtered_df['country'].str.lower().str.contains(search_text, na=False) | | |
| filtered_df['topic'].str.lower().str.contains(search_text, na=False) | | |
| filtered_df['category'].str.lower().str.contains(search_text, na=False) | | |
| filtered_df['location'].str.lower().str.contains(search_text, na=False) | | |
| filtered_df['impact'].str.lower().str.contains(search_text, na=False) | | |
| filtered_df['region'].str.lower().str.contains(search_text, na=False) | |
| ) | |
| filtered_df = filtered_df[mask] | |
| return filtered_df | |
| def create_bar_chart(filtered_df): | |
| """Create a bar chart showing value factors by country and specific impact category""" | |
| if filtered_df.empty: | |
| fig = go.Figure() | |
| fig.add_annotation( | |
| text="No data available for the selected filters", | |
| xref="paper", yref="paper", | |
| x=0.5, y=0.5, showarrow=False | |
| ) | |
| return fig | |
| # Create a composite key for proper comparison level: category + location + impact | |
| filtered_df['impact_category'] = ( | |
| filtered_df['category'].astype(str) + ' (' + | |
| filtered_df['location'].astype(str) + ', ' + | |
| filtered_df['impact'].astype(str) + ')' | |
| ) | |
| # Group by country and the composite impact category | |
| grouped = filtered_df.groupby(['country', 'impact_category', 'topic'])['value'].mean().reset_index() | |
| fig = px.bar( | |
| grouped, | |
| x='country', | |
| y='value', | |
| color='impact_category', | |
| title="Value Factors by Country and Specific Impact Category", | |
| labels={'value': "Value Factor (USD)", 'country': "Country", 'impact_category': "Impact Category"}, | |
| barmode='group', | |
| hover_data=['topic'] | |
| ) | |
| fig.update_layout(xaxis_tickangle=-45, height=600) | |
| return fig | |
| def create_map_visualization(filtered_df): | |
| """Create a choropleth map showing value factors by country""" | |
| if filtered_df.empty: | |
| fig = go.Figure() | |
| fig.add_annotation( | |
| text="No data available for the selected filters", | |
| xref="paper", yref="paper", | |
| x=0.5, y=0.5, showarrow=False | |
| ) | |
| return fig | |
| # Aggregate by country | |
| country_data = filtered_df.groupby('country')['value'].mean().reset_index() | |
| # Get ISO codes for the map | |
| iso_data = filtered_df.groupby('country')['iso_code'].first().reset_index() | |
| country_data = country_data.merge(iso_data, on='country') | |
| fig = px.choropleth( | |
| country_data, | |
| locations='iso_code', | |
| locationmode='ISO-3', | |
| color='value', | |
| hover_name='country', | |
| title="Global Value Factors by Country", | |
| labels={'value': "Avg Value Factor (USD)"}, | |
| color_continuous_scale="Viridis" | |
| ) | |
| fig.update_layout(height=600) | |
| return fig | |
| def create_comparison_chart(filtered_df): | |
| """Create a comparison chart showing specific impact categories across selected countries""" | |
| if filtered_df.empty: | |
| fig = go.Figure() | |
| fig.add_annotation( | |
| text="No data available for the selected filters", | |
| xref="paper", yref="paper", | |
| x=0.5, y=0.5, showarrow=False | |
| ) | |
| return fig | |
| # Create a composite key for proper comparison level: category + location + impact | |
| filtered_df['impact_category'] = ( | |
| filtered_df['category'].astype(str) + ' (' + | |
| filtered_df['location'].astype(str) + ', ' + | |
| filtered_df['impact'].astype(str) + ')' | |
| ) | |
| # Group by the composite impact category and country | |
| grouped = filtered_df.groupby(['impact_category', 'country', 'topic'])['value'].mean().reset_index() | |
| fig = px.bar( | |
| grouped, | |
| x='impact_category', | |
| y='value', | |
| color='country', | |
| title="Specific Impact Category Comparison Across Countries", | |
| labels={'value': "Value Factor (USD)", 'impact_category': "Impact Category"}, | |
| barmode='group', | |
| hover_data=['topic'] | |
| ) | |
| fig.update_layout(xaxis_tickangle=-45, height=600) | |
| return fig | |
| def create_box_plot(filtered_df): | |
| """Create a box plot showing distribution of value factors by specific impact categories""" | |
| if filtered_df.empty: | |
| fig = go.Figure() | |
| fig.add_annotation( | |
| text="No data available for the selected filters", | |
| xref="paper", yref="paper", | |
| x=0.5, y=0.5, showarrow=False | |
| ) | |
| return fig | |
| # Create a composite key for proper comparison level: category + location + impact | |
| filtered_df['impact_category'] = ( | |
| filtered_df['category'].astype(str) + ' (' + | |
| filtered_df['location'].astype(str) + ', ' + | |
| filtered_df['impact'].astype(str) + ')' | |
| ) | |
| fig = px.box( | |
| filtered_df, | |
| x='impact_category', | |
| y='value', | |
| color='country', | |
| title="Distribution of Value Factors by Specific Impact Category", | |
| labels={'value': "Value Factor (USD)", 'impact_category': "Impact Category"}, | |
| hover_data=['topic'] | |
| ) | |
| fig.update_layout(xaxis_tickangle=-45, height=600) | |
| return fig | |
| def get_summary_stats(filtered_df): | |
| """Generate summary statistics for filtered data""" | |
| if filtered_df.empty: | |
| return "No data available for the selected filters" | |
| stats = filtered_df['value'].describe() | |
| summary = f""" | |
| ### Summary Statistics | |
| - **Count**: {stats['count']:.0f} data points | |
| - **Mean**: ${stats['mean']:.4f} | |
| - **Median**: ${stats['50%']:.4f} | |
| - **Std Dev**: ${stats['std']:.4f} | |
| - **Min**: ${stats['min']:.4f} | |
| - **Max**: ${stats['max']:.4f} | |
| - **25th Percentile**: ${stats['25%']:.4f} | |
| - **75th Percentile**: ${stats['75%']:.4f} | |
| """ | |
| return summary | |
| def get_data_table(filtered_df, max_rows=1000): | |
| """Return filtered data as a dataframe with formatted values""" | |
| if filtered_df.empty: | |
| return pd.DataFrame({"Message": ["No data available for the selected filters"]}) | |
| # Create a copy and format the value column | |
| display_df = filtered_df.head(max_rows).copy() | |
| # Format the value column with dollar sign and commas | |
| if 'value' in display_df.columns: | |
| display_df['value'] = display_df['value'].apply(lambda x: f"${x:,.2f}" if pd.notna(x) else "") | |
| return display_df | |
| # Create Gradio interface | |
| with gr.Blocks(title="GVFD Navigator", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown(""" | |
| # Global Value Factor Database Navigator | |
| Explore environmental and social impact value factors by country from the IFVI Global Value Factor Database. | |
| This visualization tool allows you to: | |
| - Filter by country and impact topic (Air Pollution, Water Pollution, etc.) | |
| - Compare **specific impact categories** (e.g., PM2.5 in Urban areas for Primary Health) | |
| - View interactive data table as primary visualization | |
| - Explore charts, maps, and statistical distributions | |
| **Important**: Value factors are comparable at the **category + location + impact** level within each topic. | |
| For example, within "Air Pollution", individual measurements like "PM2.5 (Urban, Primary Health)" are comparable across countries. | |
| **Data Source**: [IFVI Global Value Factor Database V2](https://huggingface.co/datasets/danielrosehill/Global-Value-Factor-Database-Refactor-V2) | |
| """) | |
| # Filters section at the top | |
| gr.Markdown("## 🔍 Filters") | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| country_selector = gr.Dropdown( | |
| choices=get_countries(), | |
| multiselect=True, | |
| label="Select Country/Countries", | |
| info="Start typing to search...", | |
| value=None | |
| ) | |
| with gr.Column(scale=2): | |
| category_selector = gr.Dropdown( | |
| choices=get_categories(), | |
| multiselect=True, | |
| label="Select Impact Categories", | |
| info="Air Pollution, Water Pollution, Land Use, etc.", | |
| value=None | |
| ) | |
| with gr.Column(scale=1): | |
| refresh_btn = gr.Button("Apply Filters", variant="primary", size="lg") | |
| # Data table as primary visualization | |
| gr.Markdown("## 📊 Data Table") | |
| data_table = gr.Dataframe( | |
| label="Filtered Value Factors", | |
| wrap=True, | |
| interactive=False, | |
| value=df.head(100) # Show initial data | |
| ) | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown("### Summary Statistics") | |
| stats_output = gr.Markdown() | |
| # Additional visualizations below the table | |
| gr.Markdown("## 📈 Additional Visualizations") | |
| with gr.Tabs(): | |
| with gr.Tab("Bar Chart"): | |
| bar_chart = gr.Plot(label="Value Factors by Country") | |
| with gr.Tab("World Map"): | |
| map_chart = gr.Plot(label="Global Value Factor Distribution") | |
| with gr.Tab("Category Comparison"): | |
| comparison_chart = gr.Plot(label="Category Comparison") | |
| with gr.Tab("Distribution"): | |
| box_plot = gr.Plot(label="Value Factor Distribution") | |
| with gr.Tab("About"): | |
| gr.Markdown(""" | |
| # About GVFD Navigator | |
| ## Purpose of This Tool | |
| The **GVFD Navigator** is an interactive visualization tool designed to help researchers, analysts, policymakers, | |
| and sustainability professionals explore the Global Value Factor Database (GVFD). This navigator enables you to: | |
| - **Filter and explore** environmental and social impact value factors by country and category | |
| - **Visualize patterns** in how different countries value environmental impacts | |
| - **Compare regions** to identify global trends and outliers | |
| - **Export and analyze** filtered data for your own research or reporting needs | |
| - **Understand monetary valuations** of environmental impacts across 229 countries | |
| This tool transforms the raw GVFD dataset into accessible, interactive visualizations that make it easier to | |
| understand how environmental and social impacts translate into economic terms across different regions. | |
| --- | |
| ## About the Global Value Factor Database (GVFD) | |
| ### What is the GVFD? | |
| The **Global Value Factor Database** is a pioneering dataset developed by the [International Foundation for | |
| Valuing Impacts (IFVI)](https://www.ifvi.org/) that converts non-financial environmental and social impacts | |
| into standardized monetary values (US Dollars). | |
| The database represents a groundbreaking framework for evaluating global value creation by translating | |
| companies' environmental and social impacts into financial equivalents, enabling a more holistic assessment | |
| of corporate and organizational performance. | |
| ### Methodology | |
| The GVFD uses a rigorous methodology to: | |
| - Convert non-financial environmental and social impacts into standardized monetary values | |
| - Provide value factors as multipliers to calculate monetary equivalents of impacts | |
| - Standardize impact accounting across different domains and geographies | |
| - Enable currency conversion for non-USD jurisdictions | |
| - Support integration into financial reporting and impact accounting systems | |
| ### Coverage | |
| - **229 countries and territories** worldwide | |
| - **205 countries with ISO codes** (89.5% coverage) | |
| - **~115,000 individual measurements** across all categories | |
| - **7 major world regions** represented | |
| - **50 US states** included for detailed US analysis | |
| ### Impact Categories | |
| The GVFD covers five major environmental impact categories: | |
| 1. **Air Pollution** - Value factors for atmospheric emissions and air quality impacts | |
| 2. **Land Use and Conservation** - Monetary values for land use changes and conservation impacts | |
| 3. **Waste Generation** - Economic valuations of waste production and management | |
| 4. **Water Consumption** - Value factors for water use and depletion | |
| 5. **Water Pollution** - Monetary values for water quality degradation and contamination | |
| ### Unique Features | |
| - **Standardized monetary conversion** enables comparison across impact types and geographies | |
| - **Comprehensive global coverage** includes nearly all countries and territories | |
| - **Detailed methodological documentation** ensures transparency and reproducibility | |
| - **Currency flexibility** allows conversion to local currencies for regional analysis | |
| - **Integration-ready** format supports incorporation into existing impact accounting systems | |
| ### Use Cases | |
| The GVFD and this navigator can support: | |
| - **Corporate sustainability reporting** - Quantify environmental impacts in financial terms | |
| - **ESG analysis** - Evaluate environmental performance with monetary metrics | |
| - **Policy modeling** - Assess economic costs of environmental impacts for policy decisions | |
| - **Impact investing** - Evaluate and compare environmental impact of investments | |
| - **AI and machine learning** - Train models on environmental impact valuations | |
| - **Academic research** - Study relationships between environmental impacts and economic values | |
| - **Correlation analysis** - Identify patterns in how different countries value environmental impacts | |
| --- | |
| ## Data Source and Attribution | |
| **Original Data**: [IFVI Global Value Factor Database V2](https://huggingface.co/datasets/danielrosehill/Global-Value-Factor-Database-Refactor-V2) | |
| **Dataset Developer**: International Foundation for Valuing Impacts (IFVI) | |
| **Official Website**: [https://www.ifvi.org/](https://www.ifvi.org/) | |
| **Navigator Tool**: This is an **unofficial visualization tool** created to make the GVFD more accessible. | |
| For official data, methodologies, and authoritative guidance, please consult IFVI's official resources. | |
| --- | |
| ## Disclaimer | |
| This navigator is an independent visualization tool and is not officially endorsed by IFVI. While every effort | |
| has been made to accurately represent the data, users should refer to the original GVFD dataset and IFVI's | |
| official documentation for authoritative information and methodology details. | |
| The monetary values provided represent economic valuations of environmental impacts based on IFVI's methodology | |
| and should be interpreted within the context of their methodological framework. | |
| --- | |
| ## Technical Details | |
| - **Built with**: Gradio, Plotly, Pandas, Hugging Face Datasets | |
| - **Data Format**: Parquet files loaded from Hugging Face Hub | |
| - **Visualizations**: Interactive charts using Plotly for exploration and analysis | |
| - **Filtering**: Dynamic filtering by country, category, and value ranges | |
| For questions, feedback, or issues with this navigator tool, please visit the | |
| [GitHub repository](https://huggingface.co/spaces/danielrosehill/GVFD-Navigator) or contact the tool maintainer. | |
| """) | |
| gr.Markdown(""" | |
| --- | |
| ### About the Data | |
| The Global Value Factor Database (GVFD) by the International Foundation for Valuing Impacts (IFVI) | |
| provides standardized methods to convert environmental and social impacts into monetary values. | |
| **Categories**: | |
| - Air Pollution | |
| - Land Use and Conservation | |
| - Waste Generation | |
| - Water Consumption | |
| - Water Pollution | |
| **Coverage**: 229 countries and territories, 50 US states, 7 world regions | |
| **Disclaimer**: This is an unofficial visualization tool. For official data and methodology, | |
| please visit [IFVI's website](https://www.ifvi.org/). | |
| """) | |
| # Event handlers | |
| def update_all(countries, categories): | |
| """Update all views when filters are applied""" | |
| return ( | |
| get_data_table(countries, categories), | |
| get_summary_stats(countries, categories), | |
| create_bar_chart(countries, categories), | |
| create_map_visualization(countries, categories), | |
| create_comparison_chart(countries, categories), | |
| create_box_plot(countries, categories) | |
| ) | |
| # Wire up the unified filter button | |
| refresh_btn.click( | |
| fn=update_all, | |
| inputs=[country_selector, category_selector], | |
| outputs=[data_table, stats_output, bar_chart, map_chart, comparison_chart, box_plot] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |