danielrosehill commited on
Commit
15f09c9
·
1 Parent(s): 711f885
Files changed (1) hide show
  1. app.py +95 -20
app.py CHANGED
@@ -5,28 +5,54 @@ import plotly.graph_objects as go
5
  import json
6
  import os
7
  import numpy as np
 
 
 
 
8
 
9
- # Load the dataset
10
  def load_data():
11
- """Load the GVFD dataset from local JSON file"""
 
 
 
 
 
12
  try:
13
  json_path = os.path.join(os.path.dirname(__file__), 'data.json')
 
 
14
  with open(json_path, 'r') as f:
15
  data = json.load(f)
 
16
  # Extract records from the JSON structure
17
  records = data.get('records', [])
18
- df = pd.DataFrame(records)
19
- return df
 
 
 
 
 
 
 
 
 
 
 
20
  except Exception as e:
21
  print(f"Error loading dataset: {e}")
22
  # Return empty dataframe if loading fails
23
- return pd.DataFrame()
 
24
 
25
- # Initialize data
26
- df = load_data()
 
27
 
 
28
  def get_countries():
29
  """Get sorted list of unique countries from the dataset"""
 
30
  if df.empty:
31
  return []
32
  # The column is named 'country' in the JSON data
@@ -34,8 +60,10 @@ def get_countries():
34
  return sorted(df['country'].dropna().unique().tolist())
35
  return []
36
 
 
37
  def get_topics():
38
  """Get available topics from the dataset"""
 
39
  if df.empty:
40
  return []
41
  # Get unique topics from the data (topic column contains the categories)
@@ -43,11 +71,17 @@ def get_topics():
43
  return sorted(df['topic'].dropna().unique().tolist())
44
  return []
45
 
 
46
  def get_specific_categories(topics=None):
47
  """Get unique specific categories filtered by topics"""
 
48
  if df.empty:
49
  return []
50
 
 
 
 
 
51
  filtered_df = df
52
  if topics and len(topics) > 0:
53
  filtered_df = df[df['topic'].isin(topics)]
@@ -56,11 +90,17 @@ def get_specific_categories(topics=None):
56
  return sorted(filtered_df['category'].dropna().unique().tolist())
57
  return []
58
 
 
59
  def get_locations(topics=None):
60
  """Get unique locations filtered by topics"""
 
61
  if df.empty:
62
  return []
63
 
 
 
 
 
64
  filtered_df = df
65
  if topics and len(topics) > 0:
66
  filtered_df = df[df['topic'].isin(topics)]
@@ -69,11 +109,17 @@ def get_locations(topics=None):
69
  return sorted(filtered_df['location'].dropna().unique().tolist())
70
  return []
71
 
 
72
  def get_impacts(topics=None):
73
  """Get unique impact types filtered by topics"""
 
74
  if df.empty:
75
  return []
76
 
 
 
 
 
77
  filtered_df = df
78
  if topics and len(topics) > 0:
79
  filtered_df = df[df['topic'].isin(topics)]
@@ -82,8 +128,10 @@ def get_impacts(topics=None):
82
  return sorted(filtered_df['impact'].dropna().unique().tolist())
83
  return []
84
 
 
85
  def get_regions():
86
  """Get unique regions"""
 
87
  if df.empty:
88
  return []
89
  if 'region' in df.columns:
@@ -92,10 +140,12 @@ def get_regions():
92
 
93
  def filter_data(countries=None, topics=None, categories=None, locations=None, impacts=None, regions=None, min_value=None, max_value=None, search_text=None):
94
  """Filter dataset based on user selections"""
 
95
  if df.empty:
96
  return pd.DataFrame()
97
 
98
- filtered_df = df.copy()
 
99
 
100
  # Filter by countries
101
  if countries and len(countries) > 0:
@@ -280,12 +330,15 @@ def create_box_plot(filtered_df):
280
  return fig
281
 
282
 
283
- def get_data_table(filtered_df, max_rows=1000):
284
- """Return filtered data as a dataframe with formatted values"""
 
 
 
285
  if filtered_df.empty:
286
  return pd.DataFrame({"Message": ["No data available for the selected filters"]})
287
 
288
- # Create a copy and format the value column
289
  display_df = filtered_df.head(max_rows).copy()
290
 
291
  # Format the value column with dollar sign and commas
@@ -384,13 +437,13 @@ with gr.Blocks(title="GVFD Navigator", theme=gr.themes.Soft()) as demo:
384
 
385
  # Data table as primary visualization
386
  gr.Markdown("## Data Table")
387
- gr.Markdown("Filtered data appears below. Values are formatted with dollar signs and comma separators.")
388
 
389
  data_table = gr.Dataframe(
390
  label="Filtered Value Factors",
391
  wrap=True,
392
  interactive=False,
393
- value=get_data_table(df), # Show all data initially (up to max_rows limit)
394
  column_widths=["10%", "12%", "12%", "12%", "12%", "10%", "12%", "10%", "10%"]
395
  )
396
 
@@ -400,16 +453,16 @@ with gr.Blocks(title="GVFD Navigator", theme=gr.themes.Soft()) as demo:
400
 
401
  with gr.Tabs():
402
  with gr.Tab("Bar Chart"):
403
- bar_chart = gr.Plot(label="Value Factors by Country", value=create_bar_chart(df))
404
 
405
  with gr.Tab("World Map"):
406
- map_chart = gr.Plot(label="Global Value Factor Distribution", value=create_map_visualization(df))
407
 
408
  with gr.Tab("Category Comparison"):
409
- comparison_chart = gr.Plot(label="Category Comparison", value=create_comparison_chart(df))
410
 
411
  with gr.Tab("Distribution"):
412
- box_plot = gr.Plot(label="Value Factor Distribution", value=create_box_plot(df))
413
 
414
  with gr.Tab("About"):
415
  gr.Markdown("""
@@ -551,10 +604,12 @@ with gr.Blocks(title="GVFD Navigator", theme=gr.themes.Soft()) as demo:
551
  # Event handlers
552
  def update_dropdowns_on_topic_change(topics):
553
  """Update category, location, and impact dropdowns based on selected topics"""
 
 
554
  return (
555
- gr.Dropdown(choices=get_specific_categories(topics), value=None),
556
- gr.Dropdown(choices=get_locations(topics), value=None),
557
- gr.Dropdown(choices=get_impacts(topics), value=None)
558
  )
559
 
560
  def update_all(search, countries, topics, categories, locations, impacts, regions, min_val, max_val):
@@ -581,6 +636,19 @@ with gr.Blocks(title="GVFD Navigator", theme=gr.themes.Soft()) as demo:
581
  create_box_plot(filtered_df)
582
  )
583
 
 
 
 
 
 
 
 
 
 
 
 
 
 
584
  # Wire up topic selector to update dependent dropdowns
585
  topic_selector.change(
586
  fn=update_dropdowns_on_topic_change,
@@ -605,5 +673,12 @@ with gr.Blocks(title="GVFD Navigator", theme=gr.themes.Soft()) as demo:
605
  outputs=[data_table, bar_chart, map_chart, comparison_chart, box_plot]
606
  )
607
 
 
 
 
 
 
 
 
608
  if __name__ == "__main__":
609
  demo.launch()
 
5
  import json
6
  import os
7
  import numpy as np
8
+ from functools import lru_cache
9
+
10
+ # Global variable to hold the dataframe - lazy loaded
11
+ _df_cache = None
12
 
 
13
  def load_data():
14
+ """Load the GVFD dataset from local JSON file with lazy initialization"""
15
+ global _df_cache
16
+
17
+ if _df_cache is not None:
18
+ return _df_cache
19
+
20
  try:
21
  json_path = os.path.join(os.path.dirname(__file__), 'data.json')
22
+ print(f"Loading data from {json_path}...")
23
+
24
  with open(json_path, 'r') as f:
25
  data = json.load(f)
26
+
27
  # Extract records from the JSON structure
28
  records = data.get('records', [])
29
+ _df_cache = pd.DataFrame(records)
30
+
31
+ # Optimize data types to reduce memory usage
32
+ for col in _df_cache.columns:
33
+ if _df_cache[col].dtype == 'object':
34
+ # Try to convert to categorical if reasonable number of unique values
35
+ nunique = _df_cache[col].nunique()
36
+ if nunique / len(_df_cache) < 0.5: # If less than 50% unique, use categorical
37
+ _df_cache[col] = _df_cache[col].astype('category')
38
+
39
+ print(f"Data loaded: {len(_df_cache)} records, {_df_cache.memory_usage(deep=True).sum() / 1024**2:.2f} MB")
40
+ return _df_cache
41
+
42
  except Exception as e:
43
  print(f"Error loading dataset: {e}")
44
  # Return empty dataframe if loading fails
45
+ _df_cache = pd.DataFrame()
46
+ return _df_cache
47
 
48
+ def get_df():
49
+ """Helper function to get the dataframe, loading it if necessary"""
50
+ return load_data()
51
 
52
+ @lru_cache(maxsize=1)
53
  def get_countries():
54
  """Get sorted list of unique countries from the dataset"""
55
+ df = get_df()
56
  if df.empty:
57
  return []
58
  # The column is named 'country' in the JSON data
 
60
  return sorted(df['country'].dropna().unique().tolist())
61
  return []
62
 
63
+ @lru_cache(maxsize=1)
64
  def get_topics():
65
  """Get available topics from the dataset"""
66
+ df = get_df()
67
  if df.empty:
68
  return []
69
  # Get unique topics from the data (topic column contains the categories)
 
71
  return sorted(df['topic'].dropna().unique().tolist())
72
  return []
73
 
74
+ @lru_cache(maxsize=128)
75
  def get_specific_categories(topics=None):
76
  """Get unique specific categories filtered by topics"""
77
+ df = get_df()
78
  if df.empty:
79
  return []
80
 
81
+ # Convert topics to tuple for caching (lists aren't hashable)
82
+ if topics is not None and not isinstance(topics, tuple):
83
+ topics = tuple(topics) if topics else None
84
+
85
  filtered_df = df
86
  if topics and len(topics) > 0:
87
  filtered_df = df[df['topic'].isin(topics)]
 
90
  return sorted(filtered_df['category'].dropna().unique().tolist())
91
  return []
92
 
93
+ @lru_cache(maxsize=128)
94
  def get_locations(topics=None):
95
  """Get unique locations filtered by topics"""
96
+ df = get_df()
97
  if df.empty:
98
  return []
99
 
100
+ # Convert topics to tuple for caching (lists aren't hashable)
101
+ if topics is not None and not isinstance(topics, tuple):
102
+ topics = tuple(topics) if topics else None
103
+
104
  filtered_df = df
105
  if topics and len(topics) > 0:
106
  filtered_df = df[df['topic'].isin(topics)]
 
109
  return sorted(filtered_df['location'].dropna().unique().tolist())
110
  return []
111
 
112
+ @lru_cache(maxsize=128)
113
  def get_impacts(topics=None):
114
  """Get unique impact types filtered by topics"""
115
+ df = get_df()
116
  if df.empty:
117
  return []
118
 
119
+ # Convert topics to tuple for caching (lists aren't hashable)
120
+ if topics is not None and not isinstance(topics, tuple):
121
+ topics = tuple(topics) if topics else None
122
+
123
  filtered_df = df
124
  if topics and len(topics) > 0:
125
  filtered_df = df[df['topic'].isin(topics)]
 
128
  return sorted(filtered_df['impact'].dropna().unique().tolist())
129
  return []
130
 
131
+ @lru_cache(maxsize=1)
132
  def get_regions():
133
  """Get unique regions"""
134
+ df = get_df()
135
  if df.empty:
136
  return []
137
  if 'region' in df.columns:
 
140
 
141
  def filter_data(countries=None, topics=None, categories=None, locations=None, impacts=None, regions=None, min_value=None, max_value=None, search_text=None):
142
  """Filter dataset based on user selections"""
143
+ df = get_df()
144
  if df.empty:
145
  return pd.DataFrame()
146
 
147
+ # Use view instead of copy for better performance - only copy at the end if needed
148
+ filtered_df = df
149
 
150
  # Filter by countries
151
  if countries and len(countries) > 0:
 
330
  return fig
331
 
332
 
333
+ def get_data_table(filtered_df, max_rows=500):
334
+ """Return filtered data as a dataframe with formatted values
335
+
336
+ Reduced max_rows to 500 for better performance with large datasets
337
+ """
338
  if filtered_df.empty:
339
  return pd.DataFrame({"Message": ["No data available for the selected filters"]})
340
 
341
+ # Only take the first max_rows to avoid loading entire dataset
342
  display_df = filtered_df.head(max_rows).copy()
343
 
344
  # Format the value column with dollar sign and commas
 
437
 
438
  # Data table as primary visualization
439
  gr.Markdown("## Data Table")
440
+ gr.Markdown("Filtered data appears below (showing up to 500 rows). Values are formatted with dollar signs and comma separators. Use filters to narrow down the dataset.")
441
 
442
  data_table = gr.Dataframe(
443
  label="Filtered Value Factors",
444
  wrap=True,
445
  interactive=False,
446
+ value=None, # Don't load data initially - wait for user interaction
447
  column_widths=["10%", "12%", "12%", "12%", "12%", "10%", "12%", "10%", "10%"]
448
  )
449
 
 
453
 
454
  with gr.Tabs():
455
  with gr.Tab("Bar Chart"):
456
+ bar_chart = gr.Plot(label="Value Factors by Country", value=None)
457
 
458
  with gr.Tab("World Map"):
459
+ map_chart = gr.Plot(label="Global Value Factor Distribution", value=None)
460
 
461
  with gr.Tab("Category Comparison"):
462
+ comparison_chart = gr.Plot(label="Category Comparison", value=None)
463
 
464
  with gr.Tab("Distribution"):
465
+ box_plot = gr.Plot(label="Value Factor Distribution", value=None)
466
 
467
  with gr.Tab("About"):
468
  gr.Markdown("""
 
604
  # Event handlers
605
  def update_dropdowns_on_topic_change(topics):
606
  """Update category, location, and impact dropdowns based on selected topics"""
607
+ # Convert to tuple for caching
608
+ topics_tuple = tuple(topics) if topics else None
609
  return (
610
+ gr.Dropdown(choices=get_specific_categories(topics_tuple), value=None),
611
+ gr.Dropdown(choices=get_locations(topics_tuple), value=None),
612
+ gr.Dropdown(choices=get_impacts(topics_tuple), value=None)
613
  )
614
 
615
  def update_all(search, countries, topics, categories, locations, impacts, regions, min_val, max_val):
 
636
  create_box_plot(filtered_df)
637
  )
638
 
639
+ def load_initial_view():
640
+ """Load initial view with a small sample of data"""
641
+ df = get_df()
642
+ # Show a small sample initially to avoid loading everything
643
+ sample_df = df.head(500) if not df.empty else df
644
+ return (
645
+ get_data_table(sample_df),
646
+ create_bar_chart(sample_df),
647
+ create_map_visualization(sample_df),
648
+ create_comparison_chart(sample_df),
649
+ create_box_plot(sample_df)
650
+ )
651
+
652
  # Wire up topic selector to update dependent dropdowns
653
  topic_selector.change(
654
  fn=update_dropdowns_on_topic_change,
 
673
  outputs=[data_table, bar_chart, map_chart, comparison_chart, box_plot]
674
  )
675
 
676
+ # Load initial view when the app opens
677
+ demo.load(
678
+ fn=load_initial_view,
679
+ inputs=None,
680
+ outputs=[data_table, bar_chart, map_chart, comparison_chart, box_plot]
681
+ )
682
+
683
  if __name__ == "__main__":
684
  demo.launch()