Spaces:

danielrosehill
/

GVFD-Navigator

Sleeping

App Files Files Community

danielrosehill commited on Oct 14, 2025

Commit

15f09c9

1 Parent(s): 711f885

commit

Browse files

Files changed (1) hide show

app.py +95 -20

app.py CHANGED Viewed

@@ -5,28 +5,54 @@ import plotly.graph_objects as go
 import json
 import os
 import numpy as np
-# Load the dataset
 def load_data():
-    """Load the GVFD dataset from local JSON file"""
     try:
         json_path = os.path.join(os.path.dirname(__file__), 'data.json')
         with open(json_path, 'r') as f:
             data = json.load(f)
         # Extract records from the JSON structure
         records = data.get('records', [])
-        df = pd.DataFrame(records)
-        return df
     except Exception as e:
         print(f"Error loading dataset: {e}")
         # Return empty dataframe if loading fails
-        return pd.DataFrame()
-# Initialize data
-df = load_data()
 def get_countries():
     """Get sorted list of unique countries from the dataset"""
     if df.empty:
         return []
     # The column is named 'country' in the JSON data
@@ -34,8 +60,10 @@ def get_countries():
         return sorted(df['country'].dropna().unique().tolist())
     return []
 def get_topics():
     """Get available topics from the dataset"""
     if df.empty:
         return []
     # Get unique topics from the data (topic column contains the categories)
@@ -43,11 +71,17 @@ def get_topics():
         return sorted(df['topic'].dropna().unique().tolist())
     return []
 def get_specific_categories(topics=None):
     """Get unique specific categories filtered by topics"""
     if df.empty:
         return []
     filtered_df = df
     if topics and len(topics) > 0:
         filtered_df = df[df['topic'].isin(topics)]
@@ -56,11 +90,17 @@ def get_specific_categories(topics=None):
         return sorted(filtered_df['category'].dropna().unique().tolist())
     return []
 def get_locations(topics=None):
     """Get unique locations filtered by topics"""
     if df.empty:
         return []
     filtered_df = df
     if topics and len(topics) > 0:
         filtered_df = df[df['topic'].isin(topics)]
@@ -69,11 +109,17 @@ def get_locations(topics=None):
         return sorted(filtered_df['location'].dropna().unique().tolist())
     return []
 def get_impacts(topics=None):
     """Get unique impact types filtered by topics"""
     if df.empty:
         return []
     filtered_df = df
     if topics and len(topics) > 0:
         filtered_df = df[df['topic'].isin(topics)]
@@ -82,8 +128,10 @@ def get_impacts(topics=None):
         return sorted(filtered_df['impact'].dropna().unique().tolist())
     return []
 def get_regions():
     """Get unique regions"""
     if df.empty:
         return []
     if 'region' in df.columns:
@@ -92,10 +140,12 @@ def get_regions():
 def filter_data(countries=None, topics=None, categories=None, locations=None, impacts=None, regions=None, min_value=None, max_value=None, search_text=None):
     """Filter dataset based on user selections"""
     if df.empty:
         return pd.DataFrame()
-    filtered_df = df.copy()
     # Filter by countries
     if countries and len(countries) > 0:
@@ -280,12 +330,15 @@ def create_box_plot(filtered_df):
     return fig
-def get_data_table(filtered_df, max_rows=1000):
-    """Return filtered data as a dataframe with formatted values"""
     if filtered_df.empty:
         return pd.DataFrame({"Message": ["No data available for the selected filters"]})
-    # Create a copy and format the value column
     display_df = filtered_df.head(max_rows).copy()
     # Format the value column with dollar sign and commas
@@ -384,13 +437,13 @@ with gr.Blocks(title="GVFD Navigator", theme=gr.themes.Soft()) as demo:
     # Data table as primary visualization
     gr.Markdown("## Data Table")
-    gr.Markdown("Filtered data appears below. Values are formatted with dollar signs and comma separators.")
     data_table = gr.Dataframe(
         label="Filtered Value Factors",
         wrap=True,
         interactive=False,
-        value=get_data_table(df),  # Show all data initially (up to max_rows limit)
         column_widths=["10%", "12%", "12%", "12%", "12%", "10%", "12%", "10%", "10%"]
     )
@@ -400,16 +453,16 @@ with gr.Blocks(title="GVFD Navigator", theme=gr.themes.Soft()) as demo:
     with gr.Tabs():
         with gr.Tab("Bar Chart"):
-            bar_chart = gr.Plot(label="Value Factors by Country", value=create_bar_chart(df))
         with gr.Tab("World Map"):
-            map_chart = gr.Plot(label="Global Value Factor Distribution", value=create_map_visualization(df))
         with gr.Tab("Category Comparison"):
-            comparison_chart = gr.Plot(label="Category Comparison", value=create_comparison_chart(df))
         with gr.Tab("Distribution"):
-            box_plot = gr.Plot(label="Value Factor Distribution", value=create_box_plot(df))
         with gr.Tab("About"):
             gr.Markdown("""
@@ -551,10 +604,12 @@ with gr.Blocks(title="GVFD Navigator", theme=gr.themes.Soft()) as demo:
     # Event handlers
     def update_dropdowns_on_topic_change(topics):
         """Update category, location, and impact dropdowns based on selected topics"""
         return (
-            gr.Dropdown(choices=get_specific_categories(topics), value=None),
-            gr.Dropdown(choices=get_locations(topics), value=None),
-            gr.Dropdown(choices=get_impacts(topics), value=None)
         )
     def update_all(search, countries, topics, categories, locations, impacts, regions, min_val, max_val):
@@ -581,6 +636,19 @@ with gr.Blocks(title="GVFD Navigator", theme=gr.themes.Soft()) as demo:
             create_box_plot(filtered_df)
         )
     # Wire up topic selector to update dependent dropdowns
     topic_selector.change(
         fn=update_dropdowns_on_topic_change,
@@ -605,5 +673,12 @@ with gr.Blocks(title="GVFD Navigator", theme=gr.themes.Soft()) as demo:
         outputs=[data_table, bar_chart, map_chart, comparison_chart, box_plot]
     )
 if __name__ == "__main__":
     demo.launch()

 import json
 import os
 import numpy as np
+from functools import lru_cache
+# Global variable to hold the dataframe - lazy loaded
+_df_cache = None
 def load_data():
+    """Load the GVFD dataset from local JSON file with lazy initialization"""
+    global _df_cache
+    if _df_cache is not None:
+        return _df_cache
     try:
         json_path = os.path.join(os.path.dirname(__file__), 'data.json')
+        print(f"Loading data from {json_path}...")
         with open(json_path, 'r') as f:
             data = json.load(f)
         # Extract records from the JSON structure
         records = data.get('records', [])
+        _df_cache = pd.DataFrame(records)
+        # Optimize data types to reduce memory usage
+        for col in _df_cache.columns:
+            if _df_cache[col].dtype == 'object':
+                # Try to convert to categorical if reasonable number of unique values
+                nunique = _df_cache[col].nunique()
+                if nunique / len(_df_cache) < 0.5:  # If less than 50% unique, use categorical
+                    _df_cache[col] = _df_cache[col].astype('category')
+        print(f"Data loaded: {len(_df_cache)} records, {_df_cache.memory_usage(deep=True).sum() / 1024**2:.2f} MB")
+        return _df_cache
     except Exception as e:
         print(f"Error loading dataset: {e}")
         # Return empty dataframe if loading fails
+        _df_cache = pd.DataFrame()
+        return _df_cache
+def get_df():
+    """Helper function to get the dataframe, loading it if necessary"""
+    return load_data()
+@lru_cache(maxsize=1)
 def get_countries():
     """Get sorted list of unique countries from the dataset"""
+    df = get_df()
     if df.empty:
         return []
     # The column is named 'country' in the JSON data
         return sorted(df['country'].dropna().unique().tolist())
     return []
+@lru_cache(maxsize=1)
 def get_topics():
     """Get available topics from the dataset"""
+    df = get_df()
     if df.empty:
         return []
     # Get unique topics from the data (topic column contains the categories)
         return sorted(df['topic'].dropna().unique().tolist())
     return []
+@lru_cache(maxsize=128)
 def get_specific_categories(topics=None):
     """Get unique specific categories filtered by topics"""
+    df = get_df()
     if df.empty:
         return []
+    # Convert topics to tuple for caching (lists aren't hashable)
+    if topics is not None and not isinstance(topics, tuple):
+        topics = tuple(topics) if topics else None
     filtered_df = df
     if topics and len(topics) > 0:
         filtered_df = df[df['topic'].isin(topics)]
         return sorted(filtered_df['category'].dropna().unique().tolist())
     return []
+@lru_cache(maxsize=128)
 def get_locations(topics=None):
     """Get unique locations filtered by topics"""
+    df = get_df()
     if df.empty:
         return []
+    # Convert topics to tuple for caching (lists aren't hashable)
+    if topics is not None and not isinstance(topics, tuple):
+        topics = tuple(topics) if topics else None
     filtered_df = df
     if topics and len(topics) > 0:
         filtered_df = df[df['topic'].isin(topics)]
         return sorted(filtered_df['location'].dropna().unique().tolist())
     return []
+@lru_cache(maxsize=128)
 def get_impacts(topics=None):
     """Get unique impact types filtered by topics"""
+    df = get_df()
     if df.empty:
         return []
+    # Convert topics to tuple for caching (lists aren't hashable)
+    if topics is not None and not isinstance(topics, tuple):
+        topics = tuple(topics) if topics else None
     filtered_df = df
     if topics and len(topics) > 0:
         filtered_df = df[df['topic'].isin(topics)]
         return sorted(filtered_df['impact'].dropna().unique().tolist())
     return []
+@lru_cache(maxsize=1)
 def get_regions():
     """Get unique regions"""
+    df = get_df()
     if df.empty:
         return []
     if 'region' in df.columns:
 def filter_data(countries=None, topics=None, categories=None, locations=None, impacts=None, regions=None, min_value=None, max_value=None, search_text=None):
     """Filter dataset based on user selections"""
+    df = get_df()
     if df.empty:
         return pd.DataFrame()
+    # Use view instead of copy for better performance - only copy at the end if needed
+    filtered_df = df
     # Filter by countries
     if countries and len(countries) > 0:
     return fig
+def get_data_table(filtered_df, max_rows=500):
+    """Return filtered data as a dataframe with formatted values
+    Reduced max_rows to 500 for better performance with large datasets
+    """
     if filtered_df.empty:
         return pd.DataFrame({"Message": ["No data available for the selected filters"]})
+    # Only take the first max_rows to avoid loading entire dataset
     display_df = filtered_df.head(max_rows).copy()
     # Format the value column with dollar sign and commas
     # Data table as primary visualization
     gr.Markdown("## Data Table")
+    gr.Markdown("Filtered data appears below (showing up to 500 rows). Values are formatted with dollar signs and comma separators. Use filters to narrow down the dataset.")
     data_table = gr.Dataframe(
         label="Filtered Value Factors",
         wrap=True,
         interactive=False,
+        value=None,  # Don't load data initially - wait for user interaction
         column_widths=["10%", "12%", "12%", "12%", "12%", "10%", "12%", "10%", "10%"]
     )
     with gr.Tabs():
         with gr.Tab("Bar Chart"):
+            bar_chart = gr.Plot(label="Value Factors by Country", value=None)
         with gr.Tab("World Map"):
+            map_chart = gr.Plot(label="Global Value Factor Distribution", value=None)
         with gr.Tab("Category Comparison"):
+            comparison_chart = gr.Plot(label="Category Comparison", value=None)
         with gr.Tab("Distribution"):
+            box_plot = gr.Plot(label="Value Factor Distribution", value=None)
         with gr.Tab("About"):
             gr.Markdown("""
     # Event handlers
     def update_dropdowns_on_topic_change(topics):
         """Update category, location, and impact dropdowns based on selected topics"""
+        # Convert to tuple for caching
+        topics_tuple = tuple(topics) if topics else None
         return (
+            gr.Dropdown(choices=get_specific_categories(topics_tuple), value=None),
+            gr.Dropdown(choices=get_locations(topics_tuple), value=None),
+            gr.Dropdown(choices=get_impacts(topics_tuple), value=None)
         )
     def update_all(search, countries, topics, categories, locations, impacts, regions, min_val, max_val):
             create_box_plot(filtered_df)
         )
+    def load_initial_view():
+        """Load initial view with a small sample of data"""
+        df = get_df()
+        # Show a small sample initially to avoid loading everything
+        sample_df = df.head(500) if not df.empty else df
+        return (
+            get_data_table(sample_df),
+            create_bar_chart(sample_df),
+            create_map_visualization(sample_df),
+            create_comparison_chart(sample_df),
+            create_box_plot(sample_df)
+        )
     # Wire up topic selector to update dependent dropdowns
     topic_selector.change(
         fn=update_dropdowns_on_topic_change,
         outputs=[data_table, bar_chart, map_chart, comparison_chart, box_plot]
     )
+    # Load initial view when the app opens
+    demo.load(
+        fn=load_initial_view,
+        inputs=None,
+        outputs=[data_table, bar_chart, map_chart, comparison_chart, box_plot]
+    )
 if __name__ == "__main__":
     demo.launch()