danielrosehill commited on
Commit
b6e726c
·
1 Parent(s): 968d8f0
Files changed (2) hide show
  1. app.py +194 -63
  2. app_old.py +536 -0
app.py CHANGED
@@ -34,8 +34,8 @@ def get_countries():
34
  return sorted(df['country'].dropna().unique().tolist())
35
  return []
36
 
37
- def get_categories():
38
- """Get available categories from the dataset"""
39
  if df.empty:
40
  return []
41
  # Get unique topics from the data (topic column contains the categories)
@@ -43,7 +43,39 @@ def get_categories():
43
  return sorted(df['topic'].dropna().unique().tolist())
44
  return []
45
 
46
- def filter_data(countries, categories, min_value=None, max_value=None):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  """Filter dataset based on user selections"""
48
  if df.empty:
49
  return pd.DataFrame()
@@ -54,9 +86,25 @@ def filter_data(countries, categories, min_value=None, max_value=None):
54
  if countries and len(countries) > 0:
55
  filtered_df = filtered_df[filtered_df['country'].isin(countries)]
56
 
57
- # Filter by categories (using 'topic' column)
 
 
 
 
58
  if categories and len(categories) > 0:
59
- filtered_df = filtered_df[filtered_df['topic'].isin(categories)]
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
  # Filter by value range
62
  if min_value is not None or max_value is not None:
@@ -65,12 +113,23 @@ def filter_data(countries, categories, min_value=None, max_value=None):
65
  if max_value is not None:
66
  filtered_df = filtered_df[filtered_df['value'] <= max_value]
67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  return filtered_df
69
 
70
- def create_bar_chart(countries, categories):
71
  """Create a bar chart showing value factors by country and specific impact category"""
72
- filtered_df = filter_data(countries, categories)
73
-
74
  if filtered_df.empty:
75
  fig = go.Figure()
76
  fig.add_annotation(
@@ -81,6 +140,7 @@ def create_bar_chart(countries, categories):
81
  return fig
82
 
83
  # Create a composite key for proper comparison level: category + location + impact
 
84
  filtered_df['impact_category'] = (
85
  filtered_df['category'].astype(str) + ' (' +
86
  filtered_df['location'].astype(str) + ', ' +
@@ -104,10 +164,8 @@ def create_bar_chart(countries, categories):
104
  fig.update_layout(xaxis_tickangle=-45, height=600)
105
  return fig
106
 
107
- def create_map_visualization(countries, categories):
108
  """Create a choropleth map showing value factors by country"""
109
- filtered_df = filter_data(countries, categories)
110
-
111
  if filtered_df.empty:
112
  fig = go.Figure()
113
  fig.add_annotation(
@@ -138,10 +196,8 @@ def create_map_visualization(countries, categories):
138
  fig.update_layout(height=600)
139
  return fig
140
 
141
- def create_comparison_chart(countries, categories):
142
  """Create a comparison chart showing specific impact categories across selected countries"""
143
- filtered_df = filter_data(countries, categories)
144
-
145
  if filtered_df.empty:
146
  fig = go.Figure()
147
  fig.add_annotation(
@@ -152,6 +208,7 @@ def create_comparison_chart(countries, categories):
152
  return fig
153
 
154
  # Create a composite key for proper comparison level: category + location + impact
 
155
  filtered_df['impact_category'] = (
156
  filtered_df['category'].astype(str) + ' (' +
157
  filtered_df['location'].astype(str) + ', ' +
@@ -175,10 +232,8 @@ def create_comparison_chart(countries, categories):
175
  fig.update_layout(xaxis_tickangle=-45, height=600)
176
  return fig
177
 
178
- def create_box_plot(countries, categories):
179
  """Create a box plot showing distribution of value factors by specific impact categories"""
180
- filtered_df = filter_data(countries, categories)
181
-
182
  if filtered_df.empty:
183
  fig = go.Figure()
184
  fig.add_annotation(
@@ -189,6 +244,7 @@ def create_box_plot(countries, categories):
189
  return fig
190
 
191
  # Create a composite key for proper comparison level: category + location + impact
 
192
  filtered_df['impact_category'] = (
193
  filtered_df['category'].astype(str) + ' (' +
194
  filtered_df['location'].astype(str) + ', ' +
@@ -208,10 +264,8 @@ def create_box_plot(countries, categories):
208
  fig.update_layout(xaxis_tickangle=-45, height=600)
209
  return fig
210
 
211
- def get_summary_stats(countries, categories):
212
  """Generate summary statistics for filtered data"""
213
- filtered_df = filter_data(countries, categories)
214
-
215
  if filtered_df.empty:
216
  return "No data available for the selected filters"
217
 
@@ -221,25 +275,30 @@ def get_summary_stats(countries, categories):
221
  ### Summary Statistics
222
 
223
  - **Count**: {stats['count']:.0f} data points
224
- - **Mean**: ${stats['mean']:.4f}
225
- - **Median**: ${stats['50%']:.4f}
226
- - **Std Dev**: ${stats['std']:.4f}
227
- - **Min**: ${stats['min']:.4f}
228
- - **Max**: ${stats['max']:.4f}
229
- - **25th Percentile**: ${stats['25%']:.4f}
230
- - **75th Percentile**: ${stats['75%']:.4f}
231
  """
232
 
233
  return summary
234
 
235
- def get_data_table(countries, categories, max_rows=100):
236
- """Return filtered data as a dataframe"""
237
- filtered_df = filter_data(countries, categories)
238
-
239
  if filtered_df.empty:
240
  return pd.DataFrame({"Message": ["No data available for the selected filters"]})
241
 
242
- return filtered_df.head(max_rows)
 
 
 
 
 
 
 
243
 
244
  # Create Gradio interface
245
  with gr.Blocks(title="GVFD Navigator", theme=gr.themes.Soft()) as demo:
@@ -249,10 +308,9 @@ with gr.Blocks(title="GVFD Navigator", theme=gr.themes.Soft()) as demo:
249
  Explore environmental and social impact value factors by country from the IFVI Global Value Factor Database.
250
 
251
  This visualization tool allows you to:
252
- - Filter by country and impact topic (Air Pollution, Water Pollution, etc.)
253
- - Compare **specific impact categories** (e.g., PM2.5 in Urban areas for Primary Health)
254
- - View interactive data table as primary visualization
255
- - Explore charts, maps, and statistical distributions
256
 
257
  **Important**: Value factors are comparable at the **category + location + impact** level within each topic.
258
  For example, within "Air Pollution", individual measurements like "PM2.5 (Urban, Primary Health)" are comparable across countries.
@@ -260,46 +318,96 @@ with gr.Blocks(title="GVFD Navigator", theme=gr.themes.Soft()) as demo:
260
  **Data Source**: [IFVI Global Value Factor Database V2](https://huggingface.co/datasets/danielrosehill/Global-Value-Factor-Database-Refactor-V2)
261
  """)
262
 
263
- # Filters section at the top
264
- gr.Markdown("## 🔍 Filters")
 
 
 
 
 
 
 
 
 
265
 
266
  with gr.Row():
267
- with gr.Column(scale=2):
268
  country_selector = gr.Dropdown(
269
  choices=get_countries(),
270
  multiselect=True,
271
- label="Select Country/Countries",
272
- info="Start typing to search...",
273
  value=None
274
  )
275
- with gr.Column(scale=2):
276
- category_selector = gr.Dropdown(
277
- choices=get_categories(),
278
  multiselect=True,
279
- label="Select Impact Categories",
280
  info="Air Pollution, Water Pollution, Land Use, etc.",
281
  value=None
282
  )
283
- with gr.Column(scale=1):
284
- refresh_btn = gr.Button("Apply Filters", variant="primary", size="lg")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
285
 
286
  # Data table as primary visualization
287
- gr.Markdown("## 📊 Data Table")
 
288
 
289
  data_table = gr.Dataframe(
290
  label="Filtered Value Factors",
291
  wrap=True,
292
  interactive=False,
293
- value=df.head(100) # Show initial data
 
294
  )
295
 
296
  with gr.Row():
297
  with gr.Column():
298
  gr.Markdown("### Summary Statistics")
299
- stats_output = gr.Markdown()
300
 
301
- # Additional visualizations below the table
302
- gr.Markdown("## 📈 Additional Visualizations")
 
303
 
304
  with gr.Tabs():
305
  with gr.Tab("Bar Chart"):
@@ -423,15 +531,14 @@ with gr.Blocks(title="GVFD Navigator", theme=gr.themes.Soft()) as demo:
423
  ## Technical Details
424
 
425
  - **Built with**: Gradio, Plotly, Pandas, Hugging Face Datasets
426
- - **Data Format**: Parquet files loaded from Hugging Face Hub
427
  - **Visualizations**: Interactive charts using Plotly for exploration and analysis
428
- - **Filtering**: Dynamic filtering by country, category, and value ranges
429
 
430
  For questions, feedback, or issues with this navigator tool, please visit the
431
  [GitHub repository](https://huggingface.co/spaces/danielrosehill/GVFD-Navigator) or contact the tool maintainer.
432
  """)
433
 
434
-
435
  gr.Markdown("""
436
  ---
437
  ### About the Data
@@ -453,21 +560,45 @@ with gr.Blocks(title="GVFD Navigator", theme=gr.themes.Soft()) as demo:
453
  """)
454
 
455
  # Event handlers
456
- def update_all(countries, categories):
457
  """Update all views when filters are applied"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
458
  return (
459
- get_data_table(countries, categories),
460
- get_summary_stats(countries, categories),
461
- create_bar_chart(countries, categories),
462
- create_map_visualization(countries, categories),
463
- create_comparison_chart(countries, categories),
464
- create_box_plot(countries, categories)
465
  )
466
 
467
  # Wire up the unified filter button
468
  refresh_btn.click(
469
  fn=update_all,
470
- inputs=[country_selector, category_selector],
 
 
 
 
 
 
 
 
 
 
471
  outputs=[data_table, stats_output, bar_chart, map_chart, comparison_chart, box_plot]
472
  )
473
 
 
34
  return sorted(df['country'].dropna().unique().tolist())
35
  return []
36
 
37
+ def get_topics():
38
+ """Get available topics from the dataset"""
39
  if df.empty:
40
  return []
41
  # Get unique topics from the data (topic column contains the categories)
 
43
  return sorted(df['topic'].dropna().unique().tolist())
44
  return []
45
 
46
+ def get_specific_categories():
47
+ """Get unique specific categories (e.g., PM2.5, NOx, etc.)"""
48
+ if df.empty:
49
+ return []
50
+ if 'category' in df.columns:
51
+ return sorted(df['category'].dropna().unique().tolist())
52
+ return []
53
+
54
+ def get_locations():
55
+ """Get unique locations (e.g., Urban, Rural, etc.)"""
56
+ if df.empty:
57
+ return []
58
+ if 'location' in df.columns:
59
+ return sorted(df['location'].dropna().unique().tolist())
60
+ return []
61
+
62
+ def get_impacts():
63
+ """Get unique impact types (e.g., Primary Health, etc.)"""
64
+ if df.empty:
65
+ return []
66
+ if 'impact' in df.columns:
67
+ return sorted(df['impact'].dropna().unique().tolist())
68
+ return []
69
+
70
+ def get_regions():
71
+ """Get unique regions"""
72
+ if df.empty:
73
+ return []
74
+ if 'region' in df.columns:
75
+ return sorted(df['region'].dropna().unique().tolist())
76
+ return []
77
+
78
+ def filter_data(countries=None, topics=None, categories=None, locations=None, impacts=None, regions=None, min_value=None, max_value=None, search_text=None):
79
  """Filter dataset based on user selections"""
80
  if df.empty:
81
  return pd.DataFrame()
 
86
  if countries and len(countries) > 0:
87
  filtered_df = filtered_df[filtered_df['country'].isin(countries)]
88
 
89
+ # Filter by topics (Air Pollution, Water Pollution, etc.)
90
+ if topics and len(topics) > 0:
91
+ filtered_df = filtered_df[filtered_df['topic'].isin(topics)]
92
+
93
+ # Filter by specific categories (PM2.5, NOx, etc.)
94
  if categories and len(categories) > 0:
95
+ filtered_df = filtered_df[filtered_df['category'].isin(categories)]
96
+
97
+ # Filter by locations (Urban, Rural, etc.)
98
+ if locations and len(locations) > 0:
99
+ filtered_df = filtered_df[filtered_df['location'].isin(locations)]
100
+
101
+ # Filter by impacts (Primary Health, etc.)
102
+ if impacts and len(impacts) > 0:
103
+ filtered_df = filtered_df[filtered_df['impact'].isin(impacts)]
104
+
105
+ # Filter by regions
106
+ if regions and len(regions) > 0:
107
+ filtered_df = filtered_df[filtered_df['region'].isin(regions)]
108
 
109
  # Filter by value range
110
  if min_value is not None or max_value is not None:
 
113
  if max_value is not None:
114
  filtered_df = filtered_df[filtered_df['value'] <= max_value]
115
 
116
+ # Search filter - search across multiple text columns
117
+ if search_text and search_text.strip():
118
+ search_text = search_text.strip().lower()
119
+ mask = (
120
+ filtered_df['country'].str.lower().str.contains(search_text, na=False) |
121
+ filtered_df['topic'].str.lower().str.contains(search_text, na=False) |
122
+ filtered_df['category'].str.lower().str.contains(search_text, na=False) |
123
+ filtered_df['location'].str.lower().str.contains(search_text, na=False) |
124
+ filtered_df['impact'].str.lower().str.contains(search_text, na=False) |
125
+ filtered_df['region'].str.lower().str.contains(search_text, na=False)
126
+ )
127
+ filtered_df = filtered_df[mask]
128
+
129
  return filtered_df
130
 
131
+ def create_bar_chart(filtered_df):
132
  """Create a bar chart showing value factors by country and specific impact category"""
 
 
133
  if filtered_df.empty:
134
  fig = go.Figure()
135
  fig.add_annotation(
 
140
  return fig
141
 
142
  # Create a composite key for proper comparison level: category + location + impact
143
+ filtered_df = filtered_df.copy()
144
  filtered_df['impact_category'] = (
145
  filtered_df['category'].astype(str) + ' (' +
146
  filtered_df['location'].astype(str) + ', ' +
 
164
  fig.update_layout(xaxis_tickangle=-45, height=600)
165
  return fig
166
 
167
+ def create_map_visualization(filtered_df):
168
  """Create a choropleth map showing value factors by country"""
 
 
169
  if filtered_df.empty:
170
  fig = go.Figure()
171
  fig.add_annotation(
 
196
  fig.update_layout(height=600)
197
  return fig
198
 
199
+ def create_comparison_chart(filtered_df):
200
  """Create a comparison chart showing specific impact categories across selected countries"""
 
 
201
  if filtered_df.empty:
202
  fig = go.Figure()
203
  fig.add_annotation(
 
208
  return fig
209
 
210
  # Create a composite key for proper comparison level: category + location + impact
211
+ filtered_df = filtered_df.copy()
212
  filtered_df['impact_category'] = (
213
  filtered_df['category'].astype(str) + ' (' +
214
  filtered_df['location'].astype(str) + ', ' +
 
232
  fig.update_layout(xaxis_tickangle=-45, height=600)
233
  return fig
234
 
235
+ def create_box_plot(filtered_df):
236
  """Create a box plot showing distribution of value factors by specific impact categories"""
 
 
237
  if filtered_df.empty:
238
  fig = go.Figure()
239
  fig.add_annotation(
 
244
  return fig
245
 
246
  # Create a composite key for proper comparison level: category + location + impact
247
+ filtered_df = filtered_df.copy()
248
  filtered_df['impact_category'] = (
249
  filtered_df['category'].astype(str) + ' (' +
250
  filtered_df['location'].astype(str) + ', ' +
 
264
  fig.update_layout(xaxis_tickangle=-45, height=600)
265
  return fig
266
 
267
+ def get_summary_stats(filtered_df):
268
  """Generate summary statistics for filtered data"""
 
 
269
  if filtered_df.empty:
270
  return "No data available for the selected filters"
271
 
 
275
  ### Summary Statistics
276
 
277
  - **Count**: {stats['count']:.0f} data points
278
+ - **Mean**: ${stats['mean']:,.2f}
279
+ - **Median**: ${stats['50%']:,.2f}
280
+ - **Std Dev**: ${stats['std']:,.2f}
281
+ - **Min**: ${stats['min']:,.2f}
282
+ - **Max**: ${stats['max']:,.2f}
283
+ - **25th Percentile**: ${stats['25%']:,.2f}
284
+ - **75th Percentile**: ${stats['75%']:,.2f}
285
  """
286
 
287
  return summary
288
 
289
+ def get_data_table(filtered_df, max_rows=1000):
290
+ """Return filtered data as a dataframe with formatted values"""
 
 
291
  if filtered_df.empty:
292
  return pd.DataFrame({"Message": ["No data available for the selected filters"]})
293
 
294
+ # Create a copy and format the value column
295
+ display_df = filtered_df.head(max_rows).copy()
296
+
297
+ # Format the value column with dollar sign and commas
298
+ if 'value' in display_df.columns:
299
+ display_df['value'] = display_df['value'].apply(lambda x: f"${x:,.2f}" if pd.notna(x) else "")
300
+
301
+ return display_df
302
 
303
  # Create Gradio interface
304
  with gr.Blocks(title="GVFD Navigator", theme=gr.themes.Soft()) as demo:
 
308
  Explore environmental and social impact value factors by country from the IFVI Global Value Factor Database.
309
 
310
  This visualization tool allows you to:
311
+ - Filter and search data by multiple parameters (country, impact type, location, etc.)
312
+ - View filtered data in an interactive table
313
+ - Visualize patterns through charts and maps downstream of your filtered selection
 
314
 
315
  **Important**: Value factors are comparable at the **category + location + impact** level within each topic.
316
  For example, within "Air Pollution", individual measurements like "PM2.5 (Urban, Primary Health)" are comparable across countries.
 
318
  **Data Source**: [IFVI Global Value Factor Database V2](https://huggingface.co/datasets/danielrosehill/Global-Value-Factor-Database-Refactor-V2)
319
  """)
320
 
321
+ # Filters and Search section at the top
322
+ gr.Markdown("## Filters and Search")
323
+ gr.Markdown("Set your filter parameters below, then click 'Apply Filters' to update the table and visualizations.")
324
+
325
+ with gr.Row():
326
+ search_box = gr.Textbox(
327
+ label="Search",
328
+ placeholder="Search across all fields (country, category, location, impact, region, topic)...",
329
+ scale=3
330
+ )
331
+ refresh_btn = gr.Button("Apply Filters", variant="primary", size="sm", scale=1)
332
 
333
  with gr.Row():
334
+ with gr.Column():
335
  country_selector = gr.Dropdown(
336
  choices=get_countries(),
337
  multiselect=True,
338
+ label="Countries",
339
+ info="Select one or more countries",
340
  value=None
341
  )
342
+ with gr.Column():
343
+ topic_selector = gr.Dropdown(
344
+ choices=get_topics(),
345
  multiselect=True,
346
+ label="Topics",
347
  info="Air Pollution, Water Pollution, Land Use, etc.",
348
  value=None
349
  )
350
+ with gr.Column():
351
+ region_selector = gr.Dropdown(
352
+ choices=get_regions(),
353
+ multiselect=True,
354
+ label="Regions",
355
+ info="Geographic regions",
356
+ value=None
357
+ )
358
+
359
+ with gr.Row():
360
+ with gr.Column():
361
+ category_selector = gr.Dropdown(
362
+ choices=get_specific_categories(),
363
+ multiselect=True,
364
+ label="Specific Categories",
365
+ info="PM2.5, NOx, BOD, etc.",
366
+ value=None
367
+ )
368
+ with gr.Column():
369
+ location_selector = gr.Dropdown(
370
+ choices=get_locations(),
371
+ multiselect=True,
372
+ label="Locations",
373
+ info="Urban, Rural, etc.",
374
+ value=None
375
+ )
376
+ with gr.Column():
377
+ impact_selector = gr.Dropdown(
378
+ choices=get_impacts(),
379
+ multiselect=True,
380
+ label="Impact Types",
381
+ info="Primary Health, Secondary Health, etc.",
382
+ value=None
383
+ )
384
+
385
+ with gr.Row():
386
+ with gr.Column():
387
+ min_value = gr.Number(label="Min Value (USD)", value=None, precision=2)
388
+ with gr.Column():
389
+ max_value = gr.Number(label="Max Value (USD)", value=None, precision=2)
390
 
391
  # Data table as primary visualization
392
+ gr.Markdown("## Data Table")
393
+ gr.Markdown("Filtered data appears below. Values are formatted with dollar signs and comma separators.")
394
 
395
  data_table = gr.Dataframe(
396
  label="Filtered Value Factors",
397
  wrap=True,
398
  interactive=False,
399
+ value=get_data_table(df.head(100)), # Show initial data formatted
400
+ column_widths=["10%", "12%", "12%", "12%", "12%", "10%", "12%", "10%", "10%"]
401
  )
402
 
403
  with gr.Row():
404
  with gr.Column():
405
  gr.Markdown("### Summary Statistics")
406
+ stats_output = gr.Markdown(value=get_summary_stats(df))
407
 
408
+ # Visualizations below the table
409
+ gr.Markdown("## Visualizations")
410
+ gr.Markdown("The charts and maps below reflect your filtered data selection from above.")
411
 
412
  with gr.Tabs():
413
  with gr.Tab("Bar Chart"):
 
531
  ## Technical Details
532
 
533
  - **Built with**: Gradio, Plotly, Pandas, Hugging Face Datasets
534
+ - **Data Format**: JSON files loaded locally
535
  - **Visualizations**: Interactive charts using Plotly for exploration and analysis
536
+ - **Filtering**: Dynamic filtering by country, category, location, impact, region, and value ranges
537
 
538
  For questions, feedback, or issues with this navigator tool, please visit the
539
  [GitHub repository](https://huggingface.co/spaces/danielrosehill/GVFD-Navigator) or contact the tool maintainer.
540
  """)
541
 
 
542
  gr.Markdown("""
543
  ---
544
  ### About the Data
 
560
  """)
561
 
562
  # Event handlers
563
+ def update_all(search, countries, topics, categories, locations, impacts, regions, min_val, max_val):
564
  """Update all views when filters are applied"""
565
+ # First filter the data
566
+ filtered_df = filter_data(
567
+ countries=countries,
568
+ topics=topics,
569
+ categories=categories,
570
+ locations=locations,
571
+ impacts=impacts,
572
+ regions=regions,
573
+ min_value=min_val,
574
+ max_value=max_val,
575
+ search_text=search
576
+ )
577
+
578
+ # Then pass the filtered dataframe to all visualization functions
579
  return (
580
+ get_data_table(filtered_df),
581
+ get_summary_stats(filtered_df),
582
+ create_bar_chart(filtered_df),
583
+ create_map_visualization(filtered_df),
584
+ create_comparison_chart(filtered_df),
585
+ create_box_plot(filtered_df)
586
  )
587
 
588
  # Wire up the unified filter button
589
  refresh_btn.click(
590
  fn=update_all,
591
+ inputs=[
592
+ search_box,
593
+ country_selector,
594
+ topic_selector,
595
+ category_selector,
596
+ location_selector,
597
+ impact_selector,
598
+ region_selector,
599
+ min_value,
600
+ max_value
601
+ ],
602
  outputs=[data_table, stats_output, bar_chart, map_chart, comparison_chart, box_plot]
603
  )
604
 
app_old.py ADDED
@@ -0,0 +1,536 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import plotly.express as px
4
+ import plotly.graph_objects as go
5
+ import json
6
+ import os
7
+ import numpy as np
8
+
9
+ # Load the dataset
10
+ def load_data():
11
+ """Load the GVFD dataset from local JSON file"""
12
+ try:
13
+ json_path = os.path.join(os.path.dirname(__file__), 'data.json')
14
+ with open(json_path, 'r') as f:
15
+ data = json.load(f)
16
+ # Extract records from the JSON structure
17
+ records = data.get('records', [])
18
+ df = pd.DataFrame(records)
19
+ return df
20
+ except Exception as e:
21
+ print(f"Error loading dataset: {e}")
22
+ # Return empty dataframe if loading fails
23
+ return pd.DataFrame()
24
+
25
+ # Initialize data
26
+ df = load_data()
27
+
28
+ def get_countries():
29
+ """Get sorted list of unique countries from the dataset"""
30
+ if df.empty:
31
+ return []
32
+ # The column is named 'country' in the JSON data
33
+ if 'country' in df.columns:
34
+ return sorted(df['country'].dropna().unique().tolist())
35
+ return []
36
+
37
+ def get_categories():
38
+ """Get available categories from the dataset"""
39
+ if df.empty:
40
+ return []
41
+ # Get unique topics from the data (topic column contains the categories)
42
+ if 'topic' in df.columns:
43
+ return sorted(df['topic'].dropna().unique().tolist())
44
+ return []
45
+
46
+ def get_specific_categories():
47
+ """Get unique specific categories (e.g., PM2.5, NOx, etc.)"""
48
+ if df.empty:
49
+ return []
50
+ if 'category' in df.columns:
51
+ return sorted(df['category'].dropna().unique().tolist())
52
+ return []
53
+
54
+ def get_locations():
55
+ """Get unique locations (e.g., Urban, Rural, etc.)"""
56
+ if df.empty:
57
+ return []
58
+ if 'location' in df.columns:
59
+ return sorted(df['location'].dropna().unique().tolist())
60
+ return []
61
+
62
+ def get_impacts():
63
+ """Get unique impact types (e.g., Primary Health, etc.)"""
64
+ if df.empty:
65
+ return []
66
+ if 'impact' in df.columns:
67
+ return sorted(df['impact'].dropna().unique().tolist())
68
+ return []
69
+
70
+ def get_regions():
71
+ """Get unique regions"""
72
+ if df.empty:
73
+ return []
74
+ if 'region' in df.columns:
75
+ return sorted(df['region'].dropna().unique().tolist())
76
+ return []
77
+
78
+ def filter_data(countries=None, topics=None, categories=None, locations=None, impacts=None, regions=None, min_value=None, max_value=None, search_text=None):
79
+ """Filter dataset based on user selections"""
80
+ if df.empty:
81
+ return pd.DataFrame()
82
+
83
+ filtered_df = df.copy()
84
+
85
+ # Filter by countries
86
+ if countries and len(countries) > 0:
87
+ filtered_df = filtered_df[filtered_df['country'].isin(countries)]
88
+
89
+ # Filter by topics (Air Pollution, Water Pollution, etc.)
90
+ if topics and len(topics) > 0:
91
+ filtered_df = filtered_df[filtered_df['topic'].isin(topics)]
92
+
93
+ # Filter by specific categories (PM2.5, NOx, etc.)
94
+ if categories and len(categories) > 0:
95
+ filtered_df = filtered_df[filtered_df['category'].isin(categories)]
96
+
97
+ # Filter by locations (Urban, Rural, etc.)
98
+ if locations and len(locations) > 0:
99
+ filtered_df = filtered_df[filtered_df['location'].isin(locations)]
100
+
101
+ # Filter by impacts (Primary Health, etc.)
102
+ if impacts and len(impacts) > 0:
103
+ filtered_df = filtered_df[filtered_df['impact'].isin(impacts)]
104
+
105
+ # Filter by regions
106
+ if regions and len(regions) > 0:
107
+ filtered_df = filtered_df[filtered_df['region'].isin(regions)]
108
+
109
+ # Filter by value range
110
+ if min_value is not None or max_value is not None:
111
+ if min_value is not None:
112
+ filtered_df = filtered_df[filtered_df['value'] >= min_value]
113
+ if max_value is not None:
114
+ filtered_df = filtered_df[filtered_df['value'] <= max_value]
115
+
116
+ # Search filter - search across multiple text columns
117
+ if search_text and search_text.strip():
118
+ search_text = search_text.strip().lower()
119
+ mask = (
120
+ filtered_df['country'].str.lower().str.contains(search_text, na=False) |
121
+ filtered_df['topic'].str.lower().str.contains(search_text, na=False) |
122
+ filtered_df['category'].str.lower().str.contains(search_text, na=False) |
123
+ filtered_df['location'].str.lower().str.contains(search_text, na=False) |
124
+ filtered_df['impact'].str.lower().str.contains(search_text, na=False) |
125
+ filtered_df['region'].str.lower().str.contains(search_text, na=False)
126
+ )
127
+ filtered_df = filtered_df[mask]
128
+
129
+ return filtered_df
130
+
131
+ def create_bar_chart(filtered_df):
132
+ """Create a bar chart showing value factors by country and specific impact category"""
133
+
134
+ if filtered_df.empty:
135
+ fig = go.Figure()
136
+ fig.add_annotation(
137
+ text="No data available for the selected filters",
138
+ xref="paper", yref="paper",
139
+ x=0.5, y=0.5, showarrow=False
140
+ )
141
+ return fig
142
+
143
+ # Create a composite key for proper comparison level: category + location + impact
144
+ filtered_df['impact_category'] = (
145
+ filtered_df['category'].astype(str) + ' (' +
146
+ filtered_df['location'].astype(str) + ', ' +
147
+ filtered_df['impact'].astype(str) + ')'
148
+ )
149
+
150
+ # Group by country and the composite impact category
151
+ grouped = filtered_df.groupby(['country', 'impact_category', 'topic'])['value'].mean().reset_index()
152
+
153
+ fig = px.bar(
154
+ grouped,
155
+ x='country',
156
+ y='value',
157
+ color='impact_category',
158
+ title="Value Factors by Country and Specific Impact Category",
159
+ labels={'value': "Value Factor (USD)", 'country': "Country", 'impact_category': "Impact Category"},
160
+ barmode='group',
161
+ hover_data=['topic']
162
+ )
163
+
164
+ fig.update_layout(xaxis_tickangle=-45, height=600)
165
+ return fig
166
+
167
+ def create_map_visualization(filtered_df):
168
+ """Create a choropleth map showing value factors by country"""
169
+
170
+ if filtered_df.empty:
171
+ fig = go.Figure()
172
+ fig.add_annotation(
173
+ text="No data available for the selected filters",
174
+ xref="paper", yref="paper",
175
+ x=0.5, y=0.5, showarrow=False
176
+ )
177
+ return fig
178
+
179
+ # Aggregate by country
180
+ country_data = filtered_df.groupby('country')['value'].mean().reset_index()
181
+
182
+ # Get ISO codes for the map
183
+ iso_data = filtered_df.groupby('country')['iso_code'].first().reset_index()
184
+ country_data = country_data.merge(iso_data, on='country')
185
+
186
+ fig = px.choropleth(
187
+ country_data,
188
+ locations='iso_code',
189
+ locationmode='ISO-3',
190
+ color='value',
191
+ hover_name='country',
192
+ title="Global Value Factors by Country",
193
+ labels={'value': "Avg Value Factor (USD)"},
194
+ color_continuous_scale="Viridis"
195
+ )
196
+
197
+ fig.update_layout(height=600)
198
+ return fig
199
+
200
+ def create_comparison_chart(filtered_df):
201
+ """Create a comparison chart showing specific impact categories across selected countries"""
202
+
203
+ if filtered_df.empty:
204
+ fig = go.Figure()
205
+ fig.add_annotation(
206
+ text="No data available for the selected filters",
207
+ xref="paper", yref="paper",
208
+ x=0.5, y=0.5, showarrow=False
209
+ )
210
+ return fig
211
+
212
+ # Create a composite key for proper comparison level: category + location + impact
213
+ filtered_df['impact_category'] = (
214
+ filtered_df['category'].astype(str) + ' (' +
215
+ filtered_df['location'].astype(str) + ', ' +
216
+ filtered_df['impact'].astype(str) + ')'
217
+ )
218
+
219
+ # Group by the composite impact category and country
220
+ grouped = filtered_df.groupby(['impact_category', 'country', 'topic'])['value'].mean().reset_index()
221
+
222
+ fig = px.bar(
223
+ grouped,
224
+ x='impact_category',
225
+ y='value',
226
+ color='country',
227
+ title="Specific Impact Category Comparison Across Countries",
228
+ labels={'value': "Value Factor (USD)", 'impact_category': "Impact Category"},
229
+ barmode='group',
230
+ hover_data=['topic']
231
+ )
232
+
233
+ fig.update_layout(xaxis_tickangle=-45, height=600)
234
+ return fig
235
+
236
+ def create_box_plot(filtered_df):
237
+ """Create a box plot showing distribution of value factors by specific impact categories"""
238
+
239
+ if filtered_df.empty:
240
+ fig = go.Figure()
241
+ fig.add_annotation(
242
+ text="No data available for the selected filters",
243
+ xref="paper", yref="paper",
244
+ x=0.5, y=0.5, showarrow=False
245
+ )
246
+ return fig
247
+
248
+ # Create a composite key for proper comparison level: category + location + impact
249
+ filtered_df['impact_category'] = (
250
+ filtered_df['category'].astype(str) + ' (' +
251
+ filtered_df['location'].astype(str) + ', ' +
252
+ filtered_df['impact'].astype(str) + ')'
253
+ )
254
+
255
+ fig = px.box(
256
+ filtered_df,
257
+ x='impact_category',
258
+ y='value',
259
+ color='country',
260
+ title="Distribution of Value Factors by Specific Impact Category",
261
+ labels={'value': "Value Factor (USD)", 'impact_category': "Impact Category"},
262
+ hover_data=['topic']
263
+ )
264
+
265
+ fig.update_layout(xaxis_tickangle=-45, height=600)
266
+ return fig
267
+
268
+ def get_summary_stats(filtered_df):
269
+ """Generate summary statistics for filtered data"""
270
+
271
+ if filtered_df.empty:
272
+ return "No data available for the selected filters"
273
+
274
+ stats = filtered_df['value'].describe()
275
+
276
+ summary = f"""
277
+ ### Summary Statistics
278
+
279
+ - **Count**: {stats['count']:.0f} data points
280
+ - **Mean**: ${stats['mean']:.4f}
281
+ - **Median**: ${stats['50%']:.4f}
282
+ - **Std Dev**: ${stats['std']:.4f}
283
+ - **Min**: ${stats['min']:.4f}
284
+ - **Max**: ${stats['max']:.4f}
285
+ - **25th Percentile**: ${stats['25%']:.4f}
286
+ - **75th Percentile**: ${stats['75%']:.4f}
287
+ """
288
+
289
+ return summary
290
+
291
+ def get_data_table(filtered_df, max_rows=1000):
292
+ """Return filtered data as a dataframe with formatted values"""
293
+ if filtered_df.empty:
294
+ return pd.DataFrame({"Message": ["No data available for the selected filters"]})
295
+
296
+ # Create a copy and format the value column
297
+ display_df = filtered_df.head(max_rows).copy()
298
+
299
+ # Format the value column with dollar sign and commas
300
+ if 'value' in display_df.columns:
301
+ display_df['value'] = display_df['value'].apply(lambda x: f"${x:,.2f}" if pd.notna(x) else "")
302
+
303
+ return display_df
304
+
305
+ # Create Gradio interface
306
+ with gr.Blocks(title="GVFD Navigator", theme=gr.themes.Soft()) as demo:
307
+ gr.Markdown("""
308
+ # Global Value Factor Database Navigator
309
+
310
+ Explore environmental and social impact value factors by country from the IFVI Global Value Factor Database.
311
+
312
+ This visualization tool allows you to:
313
+ - Filter by country and impact topic (Air Pollution, Water Pollution, etc.)
314
+ - Compare **specific impact categories** (e.g., PM2.5 in Urban areas for Primary Health)
315
+ - View interactive data table as primary visualization
316
+ - Explore charts, maps, and statistical distributions
317
+
318
+ **Important**: Value factors are comparable at the **category + location + impact** level within each topic.
319
+ For example, within "Air Pollution", individual measurements like "PM2.5 (Urban, Primary Health)" are comparable across countries.
320
+
321
+ **Data Source**: [IFVI Global Value Factor Database V2](https://huggingface.co/datasets/danielrosehill/Global-Value-Factor-Database-Refactor-V2)
322
+ """)
323
+
324
+ # Filters section at the top
325
+ gr.Markdown("## 🔍 Filters")
326
+
327
+ with gr.Row():
328
+ with gr.Column(scale=2):
329
+ country_selector = gr.Dropdown(
330
+ choices=get_countries(),
331
+ multiselect=True,
332
+ label="Select Country/Countries",
333
+ info="Start typing to search...",
334
+ value=None
335
+ )
336
+ with gr.Column(scale=2):
337
+ category_selector = gr.Dropdown(
338
+ choices=get_categories(),
339
+ multiselect=True,
340
+ label="Select Impact Categories",
341
+ info="Air Pollution, Water Pollution, Land Use, etc.",
342
+ value=None
343
+ )
344
+ with gr.Column(scale=1):
345
+ refresh_btn = gr.Button("Apply Filters", variant="primary", size="lg")
346
+
347
+ # Data table as primary visualization
348
+ gr.Markdown("## 📊 Data Table")
349
+
350
+ data_table = gr.Dataframe(
351
+ label="Filtered Value Factors",
352
+ wrap=True,
353
+ interactive=False,
354
+ value=df.head(100) # Show initial data
355
+ )
356
+
357
+ with gr.Row():
358
+ with gr.Column():
359
+ gr.Markdown("### Summary Statistics")
360
+ stats_output = gr.Markdown()
361
+
362
+ # Additional visualizations below the table
363
+ gr.Markdown("## 📈 Additional Visualizations")
364
+
365
+ with gr.Tabs():
366
+ with gr.Tab("Bar Chart"):
367
+ bar_chart = gr.Plot(label="Value Factors by Country")
368
+
369
+ with gr.Tab("World Map"):
370
+ map_chart = gr.Plot(label="Global Value Factor Distribution")
371
+
372
+ with gr.Tab("Category Comparison"):
373
+ comparison_chart = gr.Plot(label="Category Comparison")
374
+
375
+ with gr.Tab("Distribution"):
376
+ box_plot = gr.Plot(label="Value Factor Distribution")
377
+
378
+ with gr.Tab("About"):
379
+ gr.Markdown("""
380
+ # About GVFD Navigator
381
+
382
+ ## Purpose of This Tool
383
+
384
+ The **GVFD Navigator** is an interactive visualization tool designed to help researchers, analysts, policymakers,
385
+ and sustainability professionals explore the Global Value Factor Database (GVFD). This navigator enables you to:
386
+
387
+ - **Filter and explore** environmental and social impact value factors by country and category
388
+ - **Visualize patterns** in how different countries value environmental impacts
389
+ - **Compare regions** to identify global trends and outliers
390
+ - **Export and analyze** filtered data for your own research or reporting needs
391
+ - **Understand monetary valuations** of environmental impacts across 229 countries
392
+
393
+ This tool transforms the raw GVFD dataset into accessible, interactive visualizations that make it easier to
394
+ understand how environmental and social impacts translate into economic terms across different regions.
395
+
396
+ ---
397
+
398
+ ## About the Global Value Factor Database (GVFD)
399
+
400
+ ### What is the GVFD?
401
+
402
+ The **Global Value Factor Database** is a pioneering dataset developed by the [International Foundation for
403
+ Valuing Impacts (IFVI)](https://www.ifvi.org/) that converts non-financial environmental and social impacts
404
+ into standardized monetary values (US Dollars).
405
+
406
+ The database represents a groundbreaking framework for evaluating global value creation by translating
407
+ companies' environmental and social impacts into financial equivalents, enabling a more holistic assessment
408
+ of corporate and organizational performance.
409
+
410
+ ### Methodology
411
+
412
+ The GVFD uses a rigorous methodology to:
413
+
414
+ - Convert non-financial environmental and social impacts into standardized monetary values
415
+ - Provide value factors as multipliers to calculate monetary equivalents of impacts
416
+ - Standardize impact accounting across different domains and geographies
417
+ - Enable currency conversion for non-USD jurisdictions
418
+ - Support integration into financial reporting and impact accounting systems
419
+
420
+ ### Coverage
421
+
422
+ - **229 countries and territories** worldwide
423
+ - **205 countries with ISO codes** (89.5% coverage)
424
+ - **~115,000 individual measurements** across all categories
425
+ - **7 major world regions** represented
426
+ - **50 US states** included for detailed US analysis
427
+
428
+ ### Impact Categories
429
+
430
+ The GVFD covers five major environmental impact categories:
431
+
432
+ 1. **Air Pollution** - Value factors for atmospheric emissions and air quality impacts
433
+ 2. **Land Use and Conservation** - Monetary values for land use changes and conservation impacts
434
+ 3. **Waste Generation** - Economic valuations of waste production and management
435
+ 4. **Water Consumption** - Value factors for water use and depletion
436
+ 5. **Water Pollution** - Monetary values for water quality degradation and contamination
437
+
438
+ ### Unique Features
439
+
440
+ - **Standardized monetary conversion** enables comparison across impact types and geographies
441
+ - **Comprehensive global coverage** includes nearly all countries and territories
442
+ - **Detailed methodological documentation** ensures transparency and reproducibility
443
+ - **Currency flexibility** allows conversion to local currencies for regional analysis
444
+ - **Integration-ready** format supports incorporation into existing impact accounting systems
445
+
446
+ ### Use Cases
447
+
448
+ The GVFD and this navigator can support:
449
+
450
+ - **Corporate sustainability reporting** - Quantify environmental impacts in financial terms
451
+ - **ESG analysis** - Evaluate environmental performance with monetary metrics
452
+ - **Policy modeling** - Assess economic costs of environmental impacts for policy decisions
453
+ - **Impact investing** - Evaluate and compare environmental impact of investments
454
+ - **AI and machine learning** - Train models on environmental impact valuations
455
+ - **Academic research** - Study relationships between environmental impacts and economic values
456
+ - **Correlation analysis** - Identify patterns in how different countries value environmental impacts
457
+
458
+ ---
459
+
460
+ ## Data Source and Attribution
461
+
462
+ **Original Data**: [IFVI Global Value Factor Database V2](https://huggingface.co/datasets/danielrosehill/Global-Value-Factor-Database-Refactor-V2)
463
+
464
+ **Dataset Developer**: International Foundation for Valuing Impacts (IFVI)
465
+
466
+ **Official Website**: [https://www.ifvi.org/](https://www.ifvi.org/)
467
+
468
+ **Navigator Tool**: This is an **unofficial visualization tool** created to make the GVFD more accessible.
469
+ For official data, methodologies, and authoritative guidance, please consult IFVI's official resources.
470
+
471
+ ---
472
+
473
+ ## Disclaimer
474
+
475
+ This navigator is an independent visualization tool and is not officially endorsed by IFVI. While every effort
476
+ has been made to accurately represent the data, users should refer to the original GVFD dataset and IFVI's
477
+ official documentation for authoritative information and methodology details.
478
+
479
+ The monetary values provided represent economic valuations of environmental impacts based on IFVI's methodology
480
+ and should be interpreted within the context of their methodological framework.
481
+
482
+ ---
483
+
484
+ ## Technical Details
485
+
486
+ - **Built with**: Gradio, Plotly, Pandas, Hugging Face Datasets
487
+ - **Data Format**: Parquet files loaded from Hugging Face Hub
488
+ - **Visualizations**: Interactive charts using Plotly for exploration and analysis
489
+ - **Filtering**: Dynamic filtering by country, category, and value ranges
490
+
491
+ For questions, feedback, or issues with this navigator tool, please visit the
492
+ [GitHub repository](https://huggingface.co/spaces/danielrosehill/GVFD-Navigator) or contact the tool maintainer.
493
+ """)
494
+
495
+
496
+ gr.Markdown("""
497
+ ---
498
+ ### About the Data
499
+
500
+ The Global Value Factor Database (GVFD) by the International Foundation for Valuing Impacts (IFVI)
501
+ provides standardized methods to convert environmental and social impacts into monetary values.
502
+
503
+ **Categories**:
504
+ - Air Pollution
505
+ - Land Use and Conservation
506
+ - Waste Generation
507
+ - Water Consumption
508
+ - Water Pollution
509
+
510
+ **Coverage**: 229 countries and territories, 50 US states, 7 world regions
511
+
512
+ **Disclaimer**: This is an unofficial visualization tool. For official data and methodology,
513
+ please visit [IFVI's website](https://www.ifvi.org/).
514
+ """)
515
+
516
+ # Event handlers
517
+ def update_all(countries, categories):
518
+ """Update all views when filters are applied"""
519
+ return (
520
+ get_data_table(countries, categories),
521
+ get_summary_stats(countries, categories),
522
+ create_bar_chart(countries, categories),
523
+ create_map_visualization(countries, categories),
524
+ create_comparison_chart(countries, categories),
525
+ create_box_plot(countries, categories)
526
+ )
527
+
528
+ # Wire up the unified filter button
529
+ refresh_btn.click(
530
+ fn=update_all,
531
+ inputs=[country_selector, category_selector],
532
+ outputs=[data_table, stats_output, bar_chart, map_chart, comparison_chart, box_plot]
533
+ )
534
+
535
+ if __name__ == "__main__":
536
+ demo.launch()