Spaces:
Sleeping
Sleeping
Commit ·
15f09c9
1
Parent(s): 711f885
commit
Browse files
app.py
CHANGED
|
@@ -5,28 +5,54 @@ import plotly.graph_objects as go
|
|
| 5 |
import json
|
| 6 |
import os
|
| 7 |
import numpy as np
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
-
# Load the dataset
|
| 10 |
def load_data():
|
| 11 |
-
"""Load the GVFD dataset from local JSON file"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
try:
|
| 13 |
json_path = os.path.join(os.path.dirname(__file__), 'data.json')
|
|
|
|
|
|
|
| 14 |
with open(json_path, 'r') as f:
|
| 15 |
data = json.load(f)
|
|
|
|
| 16 |
# Extract records from the JSON structure
|
| 17 |
records = data.get('records', [])
|
| 18 |
-
|
| 19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
except Exception as e:
|
| 21 |
print(f"Error loading dataset: {e}")
|
| 22 |
# Return empty dataframe if loading fails
|
| 23 |
-
|
|
|
|
| 24 |
|
| 25 |
-
|
| 26 |
-
|
|
|
|
| 27 |
|
|
|
|
| 28 |
def get_countries():
|
| 29 |
"""Get sorted list of unique countries from the dataset"""
|
|
|
|
| 30 |
if df.empty:
|
| 31 |
return []
|
| 32 |
# The column is named 'country' in the JSON data
|
|
@@ -34,8 +60,10 @@ def get_countries():
|
|
| 34 |
return sorted(df['country'].dropna().unique().tolist())
|
| 35 |
return []
|
| 36 |
|
|
|
|
| 37 |
def get_topics():
|
| 38 |
"""Get available topics from the dataset"""
|
|
|
|
| 39 |
if df.empty:
|
| 40 |
return []
|
| 41 |
# Get unique topics from the data (topic column contains the categories)
|
|
@@ -43,11 +71,17 @@ def get_topics():
|
|
| 43 |
return sorted(df['topic'].dropna().unique().tolist())
|
| 44 |
return []
|
| 45 |
|
|
|
|
| 46 |
def get_specific_categories(topics=None):
|
| 47 |
"""Get unique specific categories filtered by topics"""
|
|
|
|
| 48 |
if df.empty:
|
| 49 |
return []
|
| 50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
filtered_df = df
|
| 52 |
if topics and len(topics) > 0:
|
| 53 |
filtered_df = df[df['topic'].isin(topics)]
|
|
@@ -56,11 +90,17 @@ def get_specific_categories(topics=None):
|
|
| 56 |
return sorted(filtered_df['category'].dropna().unique().tolist())
|
| 57 |
return []
|
| 58 |
|
|
|
|
| 59 |
def get_locations(topics=None):
|
| 60 |
"""Get unique locations filtered by topics"""
|
|
|
|
| 61 |
if df.empty:
|
| 62 |
return []
|
| 63 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
filtered_df = df
|
| 65 |
if topics and len(topics) > 0:
|
| 66 |
filtered_df = df[df['topic'].isin(topics)]
|
|
@@ -69,11 +109,17 @@ def get_locations(topics=None):
|
|
| 69 |
return sorted(filtered_df['location'].dropna().unique().tolist())
|
| 70 |
return []
|
| 71 |
|
|
|
|
| 72 |
def get_impacts(topics=None):
|
| 73 |
"""Get unique impact types filtered by topics"""
|
|
|
|
| 74 |
if df.empty:
|
| 75 |
return []
|
| 76 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
filtered_df = df
|
| 78 |
if topics and len(topics) > 0:
|
| 79 |
filtered_df = df[df['topic'].isin(topics)]
|
|
@@ -82,8 +128,10 @@ def get_impacts(topics=None):
|
|
| 82 |
return sorted(filtered_df['impact'].dropna().unique().tolist())
|
| 83 |
return []
|
| 84 |
|
|
|
|
| 85 |
def get_regions():
|
| 86 |
"""Get unique regions"""
|
|
|
|
| 87 |
if df.empty:
|
| 88 |
return []
|
| 89 |
if 'region' in df.columns:
|
|
@@ -92,10 +140,12 @@ def get_regions():
|
|
| 92 |
|
| 93 |
def filter_data(countries=None, topics=None, categories=None, locations=None, impacts=None, regions=None, min_value=None, max_value=None, search_text=None):
|
| 94 |
"""Filter dataset based on user selections"""
|
|
|
|
| 95 |
if df.empty:
|
| 96 |
return pd.DataFrame()
|
| 97 |
|
| 98 |
-
|
|
|
|
| 99 |
|
| 100 |
# Filter by countries
|
| 101 |
if countries and len(countries) > 0:
|
|
@@ -280,12 +330,15 @@ def create_box_plot(filtered_df):
|
|
| 280 |
return fig
|
| 281 |
|
| 282 |
|
| 283 |
-
def get_data_table(filtered_df, max_rows=
|
| 284 |
-
"""Return filtered data as a dataframe with formatted values
|
|
|
|
|
|
|
|
|
|
| 285 |
if filtered_df.empty:
|
| 286 |
return pd.DataFrame({"Message": ["No data available for the selected filters"]})
|
| 287 |
|
| 288 |
-
#
|
| 289 |
display_df = filtered_df.head(max_rows).copy()
|
| 290 |
|
| 291 |
# Format the value column with dollar sign and commas
|
|
@@ -384,13 +437,13 @@ with gr.Blocks(title="GVFD Navigator", theme=gr.themes.Soft()) as demo:
|
|
| 384 |
|
| 385 |
# Data table as primary visualization
|
| 386 |
gr.Markdown("## Data Table")
|
| 387 |
-
gr.Markdown("Filtered data appears below. Values are formatted with dollar signs and comma separators.")
|
| 388 |
|
| 389 |
data_table = gr.Dataframe(
|
| 390 |
label="Filtered Value Factors",
|
| 391 |
wrap=True,
|
| 392 |
interactive=False,
|
| 393 |
-
value=
|
| 394 |
column_widths=["10%", "12%", "12%", "12%", "12%", "10%", "12%", "10%", "10%"]
|
| 395 |
)
|
| 396 |
|
|
@@ -400,16 +453,16 @@ with gr.Blocks(title="GVFD Navigator", theme=gr.themes.Soft()) as demo:
|
|
| 400 |
|
| 401 |
with gr.Tabs():
|
| 402 |
with gr.Tab("Bar Chart"):
|
| 403 |
-
bar_chart = gr.Plot(label="Value Factors by Country", value=
|
| 404 |
|
| 405 |
with gr.Tab("World Map"):
|
| 406 |
-
map_chart = gr.Plot(label="Global Value Factor Distribution", value=
|
| 407 |
|
| 408 |
with gr.Tab("Category Comparison"):
|
| 409 |
-
comparison_chart = gr.Plot(label="Category Comparison", value=
|
| 410 |
|
| 411 |
with gr.Tab("Distribution"):
|
| 412 |
-
box_plot = gr.Plot(label="Value Factor Distribution", value=
|
| 413 |
|
| 414 |
with gr.Tab("About"):
|
| 415 |
gr.Markdown("""
|
|
@@ -551,10 +604,12 @@ with gr.Blocks(title="GVFD Navigator", theme=gr.themes.Soft()) as demo:
|
|
| 551 |
# Event handlers
|
| 552 |
def update_dropdowns_on_topic_change(topics):
|
| 553 |
"""Update category, location, and impact dropdowns based on selected topics"""
|
|
|
|
|
|
|
| 554 |
return (
|
| 555 |
-
gr.Dropdown(choices=get_specific_categories(
|
| 556 |
-
gr.Dropdown(choices=get_locations(
|
| 557 |
-
gr.Dropdown(choices=get_impacts(
|
| 558 |
)
|
| 559 |
|
| 560 |
def update_all(search, countries, topics, categories, locations, impacts, regions, min_val, max_val):
|
|
@@ -581,6 +636,19 @@ with gr.Blocks(title="GVFD Navigator", theme=gr.themes.Soft()) as demo:
|
|
| 581 |
create_box_plot(filtered_df)
|
| 582 |
)
|
| 583 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 584 |
# Wire up topic selector to update dependent dropdowns
|
| 585 |
topic_selector.change(
|
| 586 |
fn=update_dropdowns_on_topic_change,
|
|
@@ -605,5 +673,12 @@ with gr.Blocks(title="GVFD Navigator", theme=gr.themes.Soft()) as demo:
|
|
| 605 |
outputs=[data_table, bar_chart, map_chart, comparison_chart, box_plot]
|
| 606 |
)
|
| 607 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 608 |
if __name__ == "__main__":
|
| 609 |
demo.launch()
|
|
|
|
| 5 |
import json
|
| 6 |
import os
|
| 7 |
import numpy as np
|
| 8 |
+
from functools import lru_cache
|
| 9 |
+
|
| 10 |
+
# Global variable to hold the dataframe - lazy loaded
|
| 11 |
+
_df_cache = None
|
| 12 |
|
|
|
|
| 13 |
def load_data():
|
| 14 |
+
"""Load the GVFD dataset from local JSON file with lazy initialization"""
|
| 15 |
+
global _df_cache
|
| 16 |
+
|
| 17 |
+
if _df_cache is not None:
|
| 18 |
+
return _df_cache
|
| 19 |
+
|
| 20 |
try:
|
| 21 |
json_path = os.path.join(os.path.dirname(__file__), 'data.json')
|
| 22 |
+
print(f"Loading data from {json_path}...")
|
| 23 |
+
|
| 24 |
with open(json_path, 'r') as f:
|
| 25 |
data = json.load(f)
|
| 26 |
+
|
| 27 |
# Extract records from the JSON structure
|
| 28 |
records = data.get('records', [])
|
| 29 |
+
_df_cache = pd.DataFrame(records)
|
| 30 |
+
|
| 31 |
+
# Optimize data types to reduce memory usage
|
| 32 |
+
for col in _df_cache.columns:
|
| 33 |
+
if _df_cache[col].dtype == 'object':
|
| 34 |
+
# Try to convert to categorical if reasonable number of unique values
|
| 35 |
+
nunique = _df_cache[col].nunique()
|
| 36 |
+
if nunique / len(_df_cache) < 0.5: # If less than 50% unique, use categorical
|
| 37 |
+
_df_cache[col] = _df_cache[col].astype('category')
|
| 38 |
+
|
| 39 |
+
print(f"Data loaded: {len(_df_cache)} records, {_df_cache.memory_usage(deep=True).sum() / 1024**2:.2f} MB")
|
| 40 |
+
return _df_cache
|
| 41 |
+
|
| 42 |
except Exception as e:
|
| 43 |
print(f"Error loading dataset: {e}")
|
| 44 |
# Return empty dataframe if loading fails
|
| 45 |
+
_df_cache = pd.DataFrame()
|
| 46 |
+
return _df_cache
|
| 47 |
|
| 48 |
+
def get_df():
|
| 49 |
+
"""Helper function to get the dataframe, loading it if necessary"""
|
| 50 |
+
return load_data()
|
| 51 |
|
| 52 |
+
@lru_cache(maxsize=1)
|
| 53 |
def get_countries():
|
| 54 |
"""Get sorted list of unique countries from the dataset"""
|
| 55 |
+
df = get_df()
|
| 56 |
if df.empty:
|
| 57 |
return []
|
| 58 |
# The column is named 'country' in the JSON data
|
|
|
|
| 60 |
return sorted(df['country'].dropna().unique().tolist())
|
| 61 |
return []
|
| 62 |
|
| 63 |
+
@lru_cache(maxsize=1)
|
| 64 |
def get_topics():
|
| 65 |
"""Get available topics from the dataset"""
|
| 66 |
+
df = get_df()
|
| 67 |
if df.empty:
|
| 68 |
return []
|
| 69 |
# Get unique topics from the data (topic column contains the categories)
|
|
|
|
| 71 |
return sorted(df['topic'].dropna().unique().tolist())
|
| 72 |
return []
|
| 73 |
|
| 74 |
+
@lru_cache(maxsize=128)
|
| 75 |
def get_specific_categories(topics=None):
|
| 76 |
"""Get unique specific categories filtered by topics"""
|
| 77 |
+
df = get_df()
|
| 78 |
if df.empty:
|
| 79 |
return []
|
| 80 |
|
| 81 |
+
# Convert topics to tuple for caching (lists aren't hashable)
|
| 82 |
+
if topics is not None and not isinstance(topics, tuple):
|
| 83 |
+
topics = tuple(topics) if topics else None
|
| 84 |
+
|
| 85 |
filtered_df = df
|
| 86 |
if topics and len(topics) > 0:
|
| 87 |
filtered_df = df[df['topic'].isin(topics)]
|
|
|
|
| 90 |
return sorted(filtered_df['category'].dropna().unique().tolist())
|
| 91 |
return []
|
| 92 |
|
| 93 |
+
@lru_cache(maxsize=128)
|
| 94 |
def get_locations(topics=None):
|
| 95 |
"""Get unique locations filtered by topics"""
|
| 96 |
+
df = get_df()
|
| 97 |
if df.empty:
|
| 98 |
return []
|
| 99 |
|
| 100 |
+
# Convert topics to tuple for caching (lists aren't hashable)
|
| 101 |
+
if topics is not None and not isinstance(topics, tuple):
|
| 102 |
+
topics = tuple(topics) if topics else None
|
| 103 |
+
|
| 104 |
filtered_df = df
|
| 105 |
if topics and len(topics) > 0:
|
| 106 |
filtered_df = df[df['topic'].isin(topics)]
|
|
|
|
| 109 |
return sorted(filtered_df['location'].dropna().unique().tolist())
|
| 110 |
return []
|
| 111 |
|
| 112 |
+
@lru_cache(maxsize=128)
|
| 113 |
def get_impacts(topics=None):
|
| 114 |
"""Get unique impact types filtered by topics"""
|
| 115 |
+
df = get_df()
|
| 116 |
if df.empty:
|
| 117 |
return []
|
| 118 |
|
| 119 |
+
# Convert topics to tuple for caching (lists aren't hashable)
|
| 120 |
+
if topics is not None and not isinstance(topics, tuple):
|
| 121 |
+
topics = tuple(topics) if topics else None
|
| 122 |
+
|
| 123 |
filtered_df = df
|
| 124 |
if topics and len(topics) > 0:
|
| 125 |
filtered_df = df[df['topic'].isin(topics)]
|
|
|
|
| 128 |
return sorted(filtered_df['impact'].dropna().unique().tolist())
|
| 129 |
return []
|
| 130 |
|
| 131 |
+
@lru_cache(maxsize=1)
|
| 132 |
def get_regions():
|
| 133 |
"""Get unique regions"""
|
| 134 |
+
df = get_df()
|
| 135 |
if df.empty:
|
| 136 |
return []
|
| 137 |
if 'region' in df.columns:
|
|
|
|
| 140 |
|
| 141 |
def filter_data(countries=None, topics=None, categories=None, locations=None, impacts=None, regions=None, min_value=None, max_value=None, search_text=None):
|
| 142 |
"""Filter dataset based on user selections"""
|
| 143 |
+
df = get_df()
|
| 144 |
if df.empty:
|
| 145 |
return pd.DataFrame()
|
| 146 |
|
| 147 |
+
# Use view instead of copy for better performance - only copy at the end if needed
|
| 148 |
+
filtered_df = df
|
| 149 |
|
| 150 |
# Filter by countries
|
| 151 |
if countries and len(countries) > 0:
|
|
|
|
| 330 |
return fig
|
| 331 |
|
| 332 |
|
| 333 |
+
def get_data_table(filtered_df, max_rows=500):
|
| 334 |
+
"""Return filtered data as a dataframe with formatted values
|
| 335 |
+
|
| 336 |
+
Reduced max_rows to 500 for better performance with large datasets
|
| 337 |
+
"""
|
| 338 |
if filtered_df.empty:
|
| 339 |
return pd.DataFrame({"Message": ["No data available for the selected filters"]})
|
| 340 |
|
| 341 |
+
# Only take the first max_rows to avoid loading entire dataset
|
| 342 |
display_df = filtered_df.head(max_rows).copy()
|
| 343 |
|
| 344 |
# Format the value column with dollar sign and commas
|
|
|
|
| 437 |
|
| 438 |
# Data table as primary visualization
|
| 439 |
gr.Markdown("## Data Table")
|
| 440 |
+
gr.Markdown("Filtered data appears below (showing up to 500 rows). Values are formatted with dollar signs and comma separators. Use filters to narrow down the dataset.")
|
| 441 |
|
| 442 |
data_table = gr.Dataframe(
|
| 443 |
label="Filtered Value Factors",
|
| 444 |
wrap=True,
|
| 445 |
interactive=False,
|
| 446 |
+
value=None, # Don't load data initially - wait for user interaction
|
| 447 |
column_widths=["10%", "12%", "12%", "12%", "12%", "10%", "12%", "10%", "10%"]
|
| 448 |
)
|
| 449 |
|
|
|
|
| 453 |
|
| 454 |
with gr.Tabs():
|
| 455 |
with gr.Tab("Bar Chart"):
|
| 456 |
+
bar_chart = gr.Plot(label="Value Factors by Country", value=None)
|
| 457 |
|
| 458 |
with gr.Tab("World Map"):
|
| 459 |
+
map_chart = gr.Plot(label="Global Value Factor Distribution", value=None)
|
| 460 |
|
| 461 |
with gr.Tab("Category Comparison"):
|
| 462 |
+
comparison_chart = gr.Plot(label="Category Comparison", value=None)
|
| 463 |
|
| 464 |
with gr.Tab("Distribution"):
|
| 465 |
+
box_plot = gr.Plot(label="Value Factor Distribution", value=None)
|
| 466 |
|
| 467 |
with gr.Tab("About"):
|
| 468 |
gr.Markdown("""
|
|
|
|
| 604 |
# Event handlers
|
| 605 |
def update_dropdowns_on_topic_change(topics):
|
| 606 |
"""Update category, location, and impact dropdowns based on selected topics"""
|
| 607 |
+
# Convert to tuple for caching
|
| 608 |
+
topics_tuple = tuple(topics) if topics else None
|
| 609 |
return (
|
| 610 |
+
gr.Dropdown(choices=get_specific_categories(topics_tuple), value=None),
|
| 611 |
+
gr.Dropdown(choices=get_locations(topics_tuple), value=None),
|
| 612 |
+
gr.Dropdown(choices=get_impacts(topics_tuple), value=None)
|
| 613 |
)
|
| 614 |
|
| 615 |
def update_all(search, countries, topics, categories, locations, impacts, regions, min_val, max_val):
|
|
|
|
| 636 |
create_box_plot(filtered_df)
|
| 637 |
)
|
| 638 |
|
| 639 |
+
def load_initial_view():
|
| 640 |
+
"""Load initial view with a small sample of data"""
|
| 641 |
+
df = get_df()
|
| 642 |
+
# Show a small sample initially to avoid loading everything
|
| 643 |
+
sample_df = df.head(500) if not df.empty else df
|
| 644 |
+
return (
|
| 645 |
+
get_data_table(sample_df),
|
| 646 |
+
create_bar_chart(sample_df),
|
| 647 |
+
create_map_visualization(sample_df),
|
| 648 |
+
create_comparison_chart(sample_df),
|
| 649 |
+
create_box_plot(sample_df)
|
| 650 |
+
)
|
| 651 |
+
|
| 652 |
# Wire up topic selector to update dependent dropdowns
|
| 653 |
topic_selector.change(
|
| 654 |
fn=update_dropdowns_on_topic_change,
|
|
|
|
| 673 |
outputs=[data_table, bar_chart, map_chart, comparison_chart, box_plot]
|
| 674 |
)
|
| 675 |
|
| 676 |
+
# Load initial view when the app opens
|
| 677 |
+
demo.load(
|
| 678 |
+
fn=load_initial_view,
|
| 679 |
+
inputs=None,
|
| 680 |
+
outputs=[data_table, bar_chart, map_chart, comparison_chart, box_plot]
|
| 681 |
+
)
|
| 682 |
+
|
| 683 |
if __name__ == "__main__":
|
| 684 |
demo.launch()
|