mirix commited on
Commit
6c18cec
·
verified ·
1 Parent(s): 679efc4

Upload 3 files

Browse files
Files changed (2) hide show
  1. Russia_regions_data.parquet +2 -2
  2. app.py +93 -26
Russia_regions_data.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c87635401d364d438db96c1bcf0fbf513ab22a299b7b23c6082e6fac165b69d
3
- size 60001
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c6fbeffe32651121b34b38db76f19f6ffeac91894b97d6bc1e17181a906e33c
3
+ size 89033
app.py CHANGED
@@ -13,8 +13,8 @@ except Exception as e:
13
  print(f"Error loading files: {e}")
14
  # Fallback dummy data
15
  df = pd.DataFrame(columns=['section', 'subsection', 'indicator_name', 'comment', 'year',
16
- 'indicator_value', 'indicator_unit', 'indicator_code',
17
- 'Region', 'object_oktmo', 'ISO'])
18
  gdf = gpd.GeoDataFrame(columns=['ISO', 'geometry'])
19
 
20
  # Ensure geometries are in standard lat/lon
@@ -25,10 +25,10 @@ gdf = gdf.to_crs(epsg=4326)
25
  # Filter for unique indicator_code
26
  df_unique = df.drop_duplicates(subset=['indicator_code']).copy()
27
 
28
- # Define columns (removed indicator_value from display)
29
  display_columns = [
30
  'section', 'subsection', 'indicator_name', 'comment',
31
- 'year', 'indicator_unit'
32
  ]
33
  hidden_link_key = 'indicator_code'
34
 
@@ -80,8 +80,8 @@ def remove_outliers(series):
80
  Q1 = series.quantile(0.01)
81
  Q3 = series.quantile(0.99)
82
  IQR = Q3 - Q1
83
- lower_bound = Q1 - 1.5 * IQR
84
- upper_bound = Q3 + 1.5 * IQR
85
  return series.where((series >= lower_bound) & (series <= upper_bound), np.nan)
86
 
87
  def should_use_log_scale(values):
@@ -128,13 +128,67 @@ def format_value(value):
128
 
129
  # 4. Define App Logic
130
  # ---------------------------------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  def update_map(select_data: gr.SelectData, current_table_data):
132
  """
133
  Triggered when a cell in the table is clicked.
134
  select_data.index is a tuple (row, col) or int depending on version.
135
  """
136
  if select_data is None:
137
- return None
138
 
139
  # Handle index format (it often comes as [row, col] or just row index)
140
  # We safely extract the row index
@@ -148,7 +202,7 @@ def update_map(select_data: gr.SelectData, current_table_data):
148
  if isinstance(current_table_data, pd.DataFrame):
149
  selected_row = current_table_data.iloc[row_index]
150
  else:
151
- return None
152
 
153
  # Find the corresponding unique indicator code
154
  match = df_unique[
@@ -161,6 +215,7 @@ def update_map(select_data: gr.SelectData, current_table_data):
161
 
162
  selected_code = match.iloc[0][hidden_link_key]
163
  selected_unit = match.iloc[0]['indicator_unit']
 
164
 
165
  # Filter main data for this indicator
166
  df_filtered = df[df['indicator_code'] == selected_code].copy()
@@ -168,12 +223,15 @@ def update_map(select_data: gr.SelectData, current_table_data):
168
  # Ensure one row per region (remove duplicates, keep first occurrence)
169
  df_filtered = df_filtered.drop_duplicates(subset=['ISO'], keep='first')
170
 
171
- # Remove outliers (replace with NaN)
172
- df_filtered['indicator_value_clean'] = remove_outliers(df_filtered['indicator_value'])
 
 
 
173
 
174
- # Calculate Rankings (Desc: Higher Value = Rank 1) - excluding NaN values
175
- df_filtered_for_ranking = df_filtered.dropna(subset=['indicator_value_clean']).copy()
176
- df_filtered_for_ranking = df_filtered_for_ranking.sort_values('indicator_value_clean', ascending=False).reset_index(drop=True)
177
  df_filtered_for_ranking['Ranking'] = range(1, len(df_filtered_for_ranking) + 1)
178
 
179
  # Merge rankings back
@@ -184,14 +242,14 @@ def update_map(select_data: gr.SelectData, current_table_data):
184
  )
185
 
186
  # Decide if we should use log scale
187
- use_log = should_use_log_scale(df_filtered['indicator_value_clean'])
188
 
189
  # Create color scale values (log if needed)
190
  if use_log:
191
- df_filtered['color_value'] = np.log10(df_filtered['indicator_value_clean'])
192
  color_label = f"{selected_unit} (log scale)"
193
  else:
194
- df_filtered['color_value'] = df_filtered['indicator_value_clean']
195
  color_label = selected_unit
196
 
197
  # Merge with Geometry
@@ -200,22 +258,30 @@ def update_map(select_data: gr.SelectData, current_table_data):
200
  if merged.empty:
201
  return px.choropleth(title="No data for this indicator")
202
 
203
- # Construct Map with divergent color scale
 
 
 
 
 
 
 
 
204
  fig = px.choropleth_map(
205
  merged,
206
  geojson=merged.geometry,
207
  locations=merged.index,
208
  color='color_value',
209
- color_continuous_scale="RdYlBu_r", # Divergent color scale (red-yellow-blue reversed)
210
  map_style="satellite-streets",
211
  zoom=2,
212
  center={"lat": 60, "lon": 90},
213
  opacity=0.6,
214
- labels={'color_value': color_label}
215
  )
216
 
217
  # Format values for tooltip
218
- merged['formatted_value'] = merged['indicator_value'].apply(format_value)
219
  merged['formatted_ranking'] = merged['Ranking'].apply(lambda x: str(int(x)) if pd.notna(x) else "N/A")
220
 
221
  # Tooltip Configuration
@@ -225,14 +291,14 @@ def update_map(select_data: gr.SelectData, current_table_data):
225
  "<b>Rank:</b> %{customdata[0]}<br>"
226
  "<b>Region:</b> %{customdata[1]}<br>"
227
  "<b>Indicator Name:</b> %{customdata[2]}<br>"
228
- "<b>Indicator Value:</b> %{customdata[3]}"
229
  "<extra></extra>"
230
  )
231
  )
232
 
233
  fig.update_layout(
234
  margin={"r":0,"t":0,"l":0,"b":0},
235
- height=800 # Increased map height
236
  )
237
 
238
  return fig
@@ -243,7 +309,8 @@ with gr.Blocks(title="Russian Regions Analytics") as demo:
243
  gr.Markdown("## Russian Regional Indicators")
244
 
245
  with gr.Row():
246
- map_plot = gr.Plot(label="Regional Distribution")
 
247
 
248
  with gr.Row():
249
  # Prepare table value with styling metadata
@@ -258,10 +325,10 @@ with gr.Blocks(title="Russian Regions Analytics") as demo:
258
  table = gr.DataFrame(
259
  value=table_value,
260
  label="Select an Indicator",
261
- datatype=["str", "str", "str", "str", "number", "str"],
262
  interactive=True,
263
- max_height=700, # Increased table height
264
- column_widths=column_widths # Smart column widths based on content
265
  )
266
 
267
  # Wire the selection event
 
13
  print(f"Error loading files: {e}")
14
  # Fallback dummy data
15
  df = pd.DataFrame(columns=['section', 'subsection', 'indicator_name', 'comment', 'year',
16
+ 'indicator_value', 'rel_indicator_value', 'indicator_unit', 'positive',
17
+ 'indicator_code', 'Region', 'object_oktmo', 'ISO'])
18
  gdf = gpd.GeoDataFrame(columns=['ISO', 'geometry'])
19
 
20
  # Ensure geometries are in standard lat/lon
 
25
  # Filter for unique indicator_code
26
  df_unique = df.drop_duplicates(subset=['indicator_code']).copy()
27
 
28
+ # Define columns (added indicator_value and rel_indicator_value, hidden positive)
29
  display_columns = [
30
  'section', 'subsection', 'indicator_name', 'comment',
31
+ 'year', 'indicator_value', 'rel_indicator_value', 'indicator_unit'
32
  ]
33
  hidden_link_key = 'indicator_code'
34
 
 
80
  Q1 = series.quantile(0.01)
81
  Q3 = series.quantile(0.99)
82
  IQR = Q3 - Q1
83
+ lower_bound = Q1 - 2 * IQR
84
+ upper_bound = Q3 + 2 * IQR
85
  return series.where((series >= lower_bound) & (series <= upper_bound), np.nan)
86
 
87
  def should_use_log_scale(values):
 
128
 
129
  # 4. Define App Logic
130
  # ---------------------------------------------------------
131
+ def create_ranking_map():
132
+ """Create the default map showing overall rankings"""
133
+ # Filter for the overall ranking indicator
134
+ df_ranking = df[df['indicator_code'] == 'OVERALL_RANKING'].copy()
135
+
136
+ if df_ranking.empty:
137
+ return px.choropleth(title="Overall Ranking not found")
138
+
139
+ # Ensure one row per region
140
+ df_ranking = df_ranking.drop_duplicates(subset=['ISO'], keep='first')
141
+
142
+ # Use rel_indicator_value (which is the same as indicator_value for ranking)
143
+ df_ranking['color_value'] = df_ranking['rel_indicator_value']
144
+
145
+ # Merge with Geometry
146
+ merged = gdf.merge(df_ranking, on='ISO', how='inner')
147
+
148
+ if merged.empty:
149
+ return px.choropleth(title="No ranking data available")
150
+
151
+ # Color scale: blue (lower/better) to red (higher/worse)
152
+ fig = px.choropleth_map(
153
+ merged,
154
+ geojson=merged.geometry,
155
+ locations=merged.index,
156
+ color='color_value',
157
+ color_continuous_scale="RdYlBu_r", # Blue (low/good) to Red (high/bad)
158
+ map_style="satellite-streets",
159
+ zoom=2,
160
+ center={"lat": 60, "lon": 90},
161
+ opacity=0.6,
162
+ labels={'color_value': 'Overall Ranking'}
163
+ )
164
+
165
+ # Format values for tooltip
166
+ merged['formatted_value'] = merged['rel_indicator_value'].apply(format_value)
167
+
168
+ # Tooltip Configuration
169
+ fig.update_traces(
170
+ customdata=merged[['Region', 'formatted_value']],
171
+ hovertemplate=(
172
+ "<b>Region:</b> %{customdata[0]}<br>"
173
+ "<b>Overall Ranking:</b> %{customdata[1]}"
174
+ "<extra></extra>"
175
+ )
176
+ )
177
+
178
+ fig.update_layout(
179
+ margin={"r":0,"t":0,"l":0,"b":0},
180
+ height=800
181
+ )
182
+
183
+ return fig
184
+
185
  def update_map(select_data: gr.SelectData, current_table_data):
186
  """
187
  Triggered when a cell in the table is clicked.
188
  select_data.index is a tuple (row, col) or int depending on version.
189
  """
190
  if select_data is None:
191
+ return create_ranking_map()
192
 
193
  # Handle index format (it often comes as [row, col] or just row index)
194
  # We safely extract the row index
 
202
  if isinstance(current_table_data, pd.DataFrame):
203
  selected_row = current_table_data.iloc[row_index]
204
  else:
205
+ return create_ranking_map()
206
 
207
  # Find the corresponding unique indicator code
208
  match = df_unique[
 
215
 
216
  selected_code = match.iloc[0][hidden_link_key]
217
  selected_unit = match.iloc[0]['indicator_unit']
218
+ selected_positive = match.iloc[0]['positive']
219
 
220
  # Filter main data for this indicator
221
  df_filtered = df[df['indicator_code'] == selected_code].copy()
 
223
  # Ensure one row per region (remove duplicates, keep first occurrence)
224
  df_filtered = df_filtered.drop_duplicates(subset=['ISO'], keep='first')
225
 
226
+ # Remove outliers (replace with NaN) - use rel_indicator_value
227
+ df_filtered['value_clean'] = remove_outliers(df_filtered['rel_indicator_value'])
228
+
229
+ # Calculate Rankings based on positive column
230
+ df_filtered_for_ranking = df_filtered.dropna(subset=['value_clean']).copy()
231
 
232
+ # If P: higher is better (ascending=False), if N or other: lower is better (ascending=True)
233
+ ascending = (selected_positive != 'P')
234
+ df_filtered_for_ranking = df_filtered_for_ranking.sort_values('value_clean', ascending=ascending).reset_index(drop=True)
235
  df_filtered_for_ranking['Ranking'] = range(1, len(df_filtered_for_ranking) + 1)
236
 
237
  # Merge rankings back
 
242
  )
243
 
244
  # Decide if we should use log scale
245
+ use_log = should_use_log_scale(df_filtered['value_clean'])
246
 
247
  # Create color scale values (log if needed)
248
  if use_log:
249
+ df_filtered['color_value'] = np.log10(df_filtered['value_clean'])
250
  color_label = f"{selected_unit} (log scale)"
251
  else:
252
+ df_filtered['color_value'] = df_filtered['value_clean']
253
  color_label = selected_unit
254
 
255
  # Merge with Geometry
 
258
  if merged.empty:
259
  return px.choropleth(title="No data for this indicator")
260
 
261
+ # Determine color scale based on positive column
262
+ # If P: red (low/bad) to blue (high/good)
263
+ # If N or other: blue (low/good) to red (high/bad)
264
+ if selected_positive == 'P':
265
+ color_scale = "RdYlBu" # Blue (low/bad) to Red (high/good)
266
+ else:
267
+ color_scale = "RdYlBu_r" # Red (low/good) to Blue (high/bad) reversed
268
+
269
+ # Construct Map with appropriate color scale
270
  fig = px.choropleth_map(
271
  merged,
272
  geojson=merged.geometry,
273
  locations=merged.index,
274
  color='color_value',
275
+ color_continuous_scale=color_scale, # Red (low/good) to Blue (high/bad)
276
  map_style="satellite-streets",
277
  zoom=2,
278
  center={"lat": 60, "lon": 90},
279
  opacity=0.6,
280
+ labels={'color_value': 'Overall Ranking'}
281
  )
282
 
283
  # Format values for tooltip
284
+ merged['formatted_value'] = merged['rel_indicator_value'].apply(format_value)
285
  merged['formatted_ranking'] = merged['Ranking'].apply(lambda x: str(int(x)) if pd.notna(x) else "N/A")
286
 
287
  # Tooltip Configuration
 
291
  "<b>Rank:</b> %{customdata[0]}<br>"
292
  "<b>Region:</b> %{customdata[1]}<br>"
293
  "<b>Indicator Name:</b> %{customdata[2]}<br>"
294
+ "<b>Relative Value:</b> %{customdata[3]}"
295
  "<extra></extra>"
296
  )
297
  )
298
 
299
  fig.update_layout(
300
  margin={"r":0,"t":0,"l":0,"b":0},
301
+ height=800
302
  )
303
 
304
  return fig
 
309
  gr.Markdown("## Russian Regional Indicators")
310
 
311
  with gr.Row():
312
+ # Initialize with ranking map
313
+ map_plot = gr.Plot(label="Regional Distribution", value=create_ranking_map())
314
 
315
  with gr.Row():
316
  # Prepare table value with styling metadata
 
325
  table = gr.DataFrame(
326
  value=table_value,
327
  label="Select an Indicator",
328
+ datatype=["str", "str", "str", "str", "number", "number", "number", "str"],
329
  interactive=True,
330
+ max_height=700,
331
+ column_widths=column_widths
332
  )
333
 
334
  # Wire the selection event