Spaces:
Sleeping
Sleeping
Upload 3 files
Browse files- Russia_regions_data.parquet +2 -2
- app.py +93 -26
Russia_regions_data.parquet
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7c6fbeffe32651121b34b38db76f19f6ffeac91894b97d6bc1e17181a906e33c
|
| 3 |
+
size 89033
|
app.py
CHANGED
|
@@ -13,8 +13,8 @@ except Exception as e:
|
|
| 13 |
print(f"Error loading files: {e}")
|
| 14 |
# Fallback dummy data
|
| 15 |
df = pd.DataFrame(columns=['section', 'subsection', 'indicator_name', 'comment', 'year',
|
| 16 |
-
'indicator_value', '
|
| 17 |
-
'Region', 'object_oktmo', 'ISO'])
|
| 18 |
gdf = gpd.GeoDataFrame(columns=['ISO', 'geometry'])
|
| 19 |
|
| 20 |
# Ensure geometries are in standard lat/lon
|
|
@@ -25,10 +25,10 @@ gdf = gdf.to_crs(epsg=4326)
|
|
| 25 |
# Filter for unique indicator_code
|
| 26 |
df_unique = df.drop_duplicates(subset=['indicator_code']).copy()
|
| 27 |
|
| 28 |
-
# Define columns (
|
| 29 |
display_columns = [
|
| 30 |
'section', 'subsection', 'indicator_name', 'comment',
|
| 31 |
-
'year', 'indicator_unit'
|
| 32 |
]
|
| 33 |
hidden_link_key = 'indicator_code'
|
| 34 |
|
|
@@ -80,8 +80,8 @@ def remove_outliers(series):
|
|
| 80 |
Q1 = series.quantile(0.01)
|
| 81 |
Q3 = series.quantile(0.99)
|
| 82 |
IQR = Q3 - Q1
|
| 83 |
-
lower_bound = Q1 -
|
| 84 |
-
upper_bound = Q3 +
|
| 85 |
return series.where((series >= lower_bound) & (series <= upper_bound), np.nan)
|
| 86 |
|
| 87 |
def should_use_log_scale(values):
|
|
@@ -128,13 +128,67 @@ def format_value(value):
|
|
| 128 |
|
| 129 |
# 4. Define App Logic
|
| 130 |
# ---------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
def update_map(select_data: gr.SelectData, current_table_data):
|
| 132 |
"""
|
| 133 |
Triggered when a cell in the table is clicked.
|
| 134 |
select_data.index is a tuple (row, col) or int depending on version.
|
| 135 |
"""
|
| 136 |
if select_data is None:
|
| 137 |
-
return
|
| 138 |
|
| 139 |
# Handle index format (it often comes as [row, col] or just row index)
|
| 140 |
# We safely extract the row index
|
|
@@ -148,7 +202,7 @@ def update_map(select_data: gr.SelectData, current_table_data):
|
|
| 148 |
if isinstance(current_table_data, pd.DataFrame):
|
| 149 |
selected_row = current_table_data.iloc[row_index]
|
| 150 |
else:
|
| 151 |
-
return
|
| 152 |
|
| 153 |
# Find the corresponding unique indicator code
|
| 154 |
match = df_unique[
|
|
@@ -161,6 +215,7 @@ def update_map(select_data: gr.SelectData, current_table_data):
|
|
| 161 |
|
| 162 |
selected_code = match.iloc[0][hidden_link_key]
|
| 163 |
selected_unit = match.iloc[0]['indicator_unit']
|
|
|
|
| 164 |
|
| 165 |
# Filter main data for this indicator
|
| 166 |
df_filtered = df[df['indicator_code'] == selected_code].copy()
|
|
@@ -168,12 +223,15 @@ def update_map(select_data: gr.SelectData, current_table_data):
|
|
| 168 |
# Ensure one row per region (remove duplicates, keep first occurrence)
|
| 169 |
df_filtered = df_filtered.drop_duplicates(subset=['ISO'], keep='first')
|
| 170 |
|
| 171 |
-
# Remove outliers (replace with NaN)
|
| 172 |
-
df_filtered['
|
|
|
|
|
|
|
|
|
|
| 173 |
|
| 174 |
-
#
|
| 175 |
-
|
| 176 |
-
df_filtered_for_ranking = df_filtered_for_ranking.sort_values('
|
| 177 |
df_filtered_for_ranking['Ranking'] = range(1, len(df_filtered_for_ranking) + 1)
|
| 178 |
|
| 179 |
# Merge rankings back
|
|
@@ -184,14 +242,14 @@ def update_map(select_data: gr.SelectData, current_table_data):
|
|
| 184 |
)
|
| 185 |
|
| 186 |
# Decide if we should use log scale
|
| 187 |
-
use_log = should_use_log_scale(df_filtered['
|
| 188 |
|
| 189 |
# Create color scale values (log if needed)
|
| 190 |
if use_log:
|
| 191 |
-
df_filtered['color_value'] = np.log10(df_filtered['
|
| 192 |
color_label = f"{selected_unit} (log scale)"
|
| 193 |
else:
|
| 194 |
-
df_filtered['color_value'] = df_filtered['
|
| 195 |
color_label = selected_unit
|
| 196 |
|
| 197 |
# Merge with Geometry
|
|
@@ -200,22 +258,30 @@ def update_map(select_data: gr.SelectData, current_table_data):
|
|
| 200 |
if merged.empty:
|
| 201 |
return px.choropleth(title="No data for this indicator")
|
| 202 |
|
| 203 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 204 |
fig = px.choropleth_map(
|
| 205 |
merged,
|
| 206 |
geojson=merged.geometry,
|
| 207 |
locations=merged.index,
|
| 208 |
color='color_value',
|
| 209 |
-
color_continuous_scale=
|
| 210 |
map_style="satellite-streets",
|
| 211 |
zoom=2,
|
| 212 |
center={"lat": 60, "lon": 90},
|
| 213 |
opacity=0.6,
|
| 214 |
-
labels={'color_value':
|
| 215 |
)
|
| 216 |
|
| 217 |
# Format values for tooltip
|
| 218 |
-
merged['formatted_value'] = merged['
|
| 219 |
merged['formatted_ranking'] = merged['Ranking'].apply(lambda x: str(int(x)) if pd.notna(x) else "N/A")
|
| 220 |
|
| 221 |
# Tooltip Configuration
|
|
@@ -225,14 +291,14 @@ def update_map(select_data: gr.SelectData, current_table_data):
|
|
| 225 |
"<b>Rank:</b> %{customdata[0]}<br>"
|
| 226 |
"<b>Region:</b> %{customdata[1]}<br>"
|
| 227 |
"<b>Indicator Name:</b> %{customdata[2]}<br>"
|
| 228 |
-
"<b>
|
| 229 |
"<extra></extra>"
|
| 230 |
)
|
| 231 |
)
|
| 232 |
|
| 233 |
fig.update_layout(
|
| 234 |
margin={"r":0,"t":0,"l":0,"b":0},
|
| 235 |
-
height=800
|
| 236 |
)
|
| 237 |
|
| 238 |
return fig
|
|
@@ -243,7 +309,8 @@ with gr.Blocks(title="Russian Regions Analytics") as demo:
|
|
| 243 |
gr.Markdown("## Russian Regional Indicators")
|
| 244 |
|
| 245 |
with gr.Row():
|
| 246 |
-
|
|
|
|
| 247 |
|
| 248 |
with gr.Row():
|
| 249 |
# Prepare table value with styling metadata
|
|
@@ -258,10 +325,10 @@ with gr.Blocks(title="Russian Regions Analytics") as demo:
|
|
| 258 |
table = gr.DataFrame(
|
| 259 |
value=table_value,
|
| 260 |
label="Select an Indicator",
|
| 261 |
-
datatype=["str", "str", "str", "str", "number", "str"],
|
| 262 |
interactive=True,
|
| 263 |
-
max_height=700,
|
| 264 |
-
column_widths=column_widths
|
| 265 |
)
|
| 266 |
|
| 267 |
# Wire the selection event
|
|
|
|
| 13 |
print(f"Error loading files: {e}")
|
| 14 |
# Fallback dummy data
|
| 15 |
df = pd.DataFrame(columns=['section', 'subsection', 'indicator_name', 'comment', 'year',
|
| 16 |
+
'indicator_value', 'rel_indicator_value', 'indicator_unit', 'positive',
|
| 17 |
+
'indicator_code', 'Region', 'object_oktmo', 'ISO'])
|
| 18 |
gdf = gpd.GeoDataFrame(columns=['ISO', 'geometry'])
|
| 19 |
|
| 20 |
# Ensure geometries are in standard lat/lon
|
|
|
|
| 25 |
# Filter for unique indicator_code
|
| 26 |
df_unique = df.drop_duplicates(subset=['indicator_code']).copy()
|
| 27 |
|
| 28 |
+
# Define columns (added indicator_value and rel_indicator_value, hidden positive)
|
| 29 |
display_columns = [
|
| 30 |
'section', 'subsection', 'indicator_name', 'comment',
|
| 31 |
+
'year', 'indicator_value', 'rel_indicator_value', 'indicator_unit'
|
| 32 |
]
|
| 33 |
hidden_link_key = 'indicator_code'
|
| 34 |
|
|
|
|
| 80 |
Q1 = series.quantile(0.01)
|
| 81 |
Q3 = series.quantile(0.99)
|
| 82 |
IQR = Q3 - Q1
|
| 83 |
+
lower_bound = Q1 - 2 * IQR
|
| 84 |
+
upper_bound = Q3 + 2 * IQR
|
| 85 |
return series.where((series >= lower_bound) & (series <= upper_bound), np.nan)
|
| 86 |
|
| 87 |
def should_use_log_scale(values):
|
|
|
|
| 128 |
|
| 129 |
# 4. Define App Logic
|
| 130 |
# ---------------------------------------------------------
|
| 131 |
+
def create_ranking_map():
|
| 132 |
+
"""Create the default map showing overall rankings"""
|
| 133 |
+
# Filter for the overall ranking indicator
|
| 134 |
+
df_ranking = df[df['indicator_code'] == 'OVERALL_RANKING'].copy()
|
| 135 |
+
|
| 136 |
+
if df_ranking.empty:
|
| 137 |
+
return px.choropleth(title="Overall Ranking not found")
|
| 138 |
+
|
| 139 |
+
# Ensure one row per region
|
| 140 |
+
df_ranking = df_ranking.drop_duplicates(subset=['ISO'], keep='first')
|
| 141 |
+
|
| 142 |
+
# Use rel_indicator_value (which is the same as indicator_value for ranking)
|
| 143 |
+
df_ranking['color_value'] = df_ranking['rel_indicator_value']
|
| 144 |
+
|
| 145 |
+
# Merge with Geometry
|
| 146 |
+
merged = gdf.merge(df_ranking, on='ISO', how='inner')
|
| 147 |
+
|
| 148 |
+
if merged.empty:
|
| 149 |
+
return px.choropleth(title="No ranking data available")
|
| 150 |
+
|
| 151 |
+
# Color scale: blue (lower/better) to red (higher/worse)
|
| 152 |
+
fig = px.choropleth_map(
|
| 153 |
+
merged,
|
| 154 |
+
geojson=merged.geometry,
|
| 155 |
+
locations=merged.index,
|
| 156 |
+
color='color_value',
|
| 157 |
+
color_continuous_scale="RdYlBu_r", # Blue (low/good) to Red (high/bad)
|
| 158 |
+
map_style="satellite-streets",
|
| 159 |
+
zoom=2,
|
| 160 |
+
center={"lat": 60, "lon": 90},
|
| 161 |
+
opacity=0.6,
|
| 162 |
+
labels={'color_value': 'Overall Ranking'}
|
| 163 |
+
)
|
| 164 |
+
|
| 165 |
+
# Format values for tooltip
|
| 166 |
+
merged['formatted_value'] = merged['rel_indicator_value'].apply(format_value)
|
| 167 |
+
|
| 168 |
+
# Tooltip Configuration
|
| 169 |
+
fig.update_traces(
|
| 170 |
+
customdata=merged[['Region', 'formatted_value']],
|
| 171 |
+
hovertemplate=(
|
| 172 |
+
"<b>Region:</b> %{customdata[0]}<br>"
|
| 173 |
+
"<b>Overall Ranking:</b> %{customdata[1]}"
|
| 174 |
+
"<extra></extra>"
|
| 175 |
+
)
|
| 176 |
+
)
|
| 177 |
+
|
| 178 |
+
fig.update_layout(
|
| 179 |
+
margin={"r":0,"t":0,"l":0,"b":0},
|
| 180 |
+
height=800
|
| 181 |
+
)
|
| 182 |
+
|
| 183 |
+
return fig
|
| 184 |
+
|
| 185 |
def update_map(select_data: gr.SelectData, current_table_data):
|
| 186 |
"""
|
| 187 |
Triggered when a cell in the table is clicked.
|
| 188 |
select_data.index is a tuple (row, col) or int depending on version.
|
| 189 |
"""
|
| 190 |
if select_data is None:
|
| 191 |
+
return create_ranking_map()
|
| 192 |
|
| 193 |
# Handle index format (it often comes as [row, col] or just row index)
|
| 194 |
# We safely extract the row index
|
|
|
|
| 202 |
if isinstance(current_table_data, pd.DataFrame):
|
| 203 |
selected_row = current_table_data.iloc[row_index]
|
| 204 |
else:
|
| 205 |
+
return create_ranking_map()
|
| 206 |
|
| 207 |
# Find the corresponding unique indicator code
|
| 208 |
match = df_unique[
|
|
|
|
| 215 |
|
| 216 |
selected_code = match.iloc[0][hidden_link_key]
|
| 217 |
selected_unit = match.iloc[0]['indicator_unit']
|
| 218 |
+
selected_positive = match.iloc[0]['positive']
|
| 219 |
|
| 220 |
# Filter main data for this indicator
|
| 221 |
df_filtered = df[df['indicator_code'] == selected_code].copy()
|
|
|
|
| 223 |
# Ensure one row per region (remove duplicates, keep first occurrence)
|
| 224 |
df_filtered = df_filtered.drop_duplicates(subset=['ISO'], keep='first')
|
| 225 |
|
| 226 |
+
# Remove outliers (replace with NaN) - use rel_indicator_value
|
| 227 |
+
df_filtered['value_clean'] = remove_outliers(df_filtered['rel_indicator_value'])
|
| 228 |
+
|
| 229 |
+
# Calculate Rankings based on positive column
|
| 230 |
+
df_filtered_for_ranking = df_filtered.dropna(subset=['value_clean']).copy()
|
| 231 |
|
| 232 |
+
# If P: higher is better (ascending=False), if N or other: lower is better (ascending=True)
|
| 233 |
+
ascending = (selected_positive != 'P')
|
| 234 |
+
df_filtered_for_ranking = df_filtered_for_ranking.sort_values('value_clean', ascending=ascending).reset_index(drop=True)
|
| 235 |
df_filtered_for_ranking['Ranking'] = range(1, len(df_filtered_for_ranking) + 1)
|
| 236 |
|
| 237 |
# Merge rankings back
|
|
|
|
| 242 |
)
|
| 243 |
|
| 244 |
# Decide if we should use log scale
|
| 245 |
+
use_log = should_use_log_scale(df_filtered['value_clean'])
|
| 246 |
|
| 247 |
# Create color scale values (log if needed)
|
| 248 |
if use_log:
|
| 249 |
+
df_filtered['color_value'] = np.log10(df_filtered['value_clean'])
|
| 250 |
color_label = f"{selected_unit} (log scale)"
|
| 251 |
else:
|
| 252 |
+
df_filtered['color_value'] = df_filtered['value_clean']
|
| 253 |
color_label = selected_unit
|
| 254 |
|
| 255 |
# Merge with Geometry
|
|
|
|
| 258 |
if merged.empty:
|
| 259 |
return px.choropleth(title="No data for this indicator")
|
| 260 |
|
| 261 |
+
# Determine color scale based on positive column
|
| 262 |
+
# If P: red (low/bad) to blue (high/good)
|
| 263 |
+
# If N or other: blue (low/good) to red (high/bad)
|
| 264 |
+
if selected_positive == 'P':
|
| 265 |
+
color_scale = "RdYlBu" # Blue (low/bad) to Red (high/good)
|
| 266 |
+
else:
|
| 267 |
+
color_scale = "RdYlBu_r" # Red (low/good) to Blue (high/bad) reversed
|
| 268 |
+
|
| 269 |
+
# Construct Map with appropriate color scale
|
| 270 |
fig = px.choropleth_map(
|
| 271 |
merged,
|
| 272 |
geojson=merged.geometry,
|
| 273 |
locations=merged.index,
|
| 274 |
color='color_value',
|
| 275 |
+
color_continuous_scale=color_scale, # Red (low/good) to Blue (high/bad)
|
| 276 |
map_style="satellite-streets",
|
| 277 |
zoom=2,
|
| 278 |
center={"lat": 60, "lon": 90},
|
| 279 |
opacity=0.6,
|
| 280 |
+
labels={'color_value': 'Overall Ranking'}
|
| 281 |
)
|
| 282 |
|
| 283 |
# Format values for tooltip
|
| 284 |
+
merged['formatted_value'] = merged['rel_indicator_value'].apply(format_value)
|
| 285 |
merged['formatted_ranking'] = merged['Ranking'].apply(lambda x: str(int(x)) if pd.notna(x) else "N/A")
|
| 286 |
|
| 287 |
# Tooltip Configuration
|
|
|
|
| 291 |
"<b>Rank:</b> %{customdata[0]}<br>"
|
| 292 |
"<b>Region:</b> %{customdata[1]}<br>"
|
| 293 |
"<b>Indicator Name:</b> %{customdata[2]}<br>"
|
| 294 |
+
"<b>Relative Value:</b> %{customdata[3]}"
|
| 295 |
"<extra></extra>"
|
| 296 |
)
|
| 297 |
)
|
| 298 |
|
| 299 |
fig.update_layout(
|
| 300 |
margin={"r":0,"t":0,"l":0,"b":0},
|
| 301 |
+
height=800
|
| 302 |
)
|
| 303 |
|
| 304 |
return fig
|
|
|
|
| 309 |
gr.Markdown("## Russian Regional Indicators")
|
| 310 |
|
| 311 |
with gr.Row():
|
| 312 |
+
# Initialize with ranking map
|
| 313 |
+
map_plot = gr.Plot(label="Regional Distribution", value=create_ranking_map())
|
| 314 |
|
| 315 |
with gr.Row():
|
| 316 |
# Prepare table value with styling metadata
|
|
|
|
| 325 |
table = gr.DataFrame(
|
| 326 |
value=table_value,
|
| 327 |
label="Select an Indicator",
|
| 328 |
+
datatype=["str", "str", "str", "str", "number", "number", "number", "str"],
|
| 329 |
interactive=True,
|
| 330 |
+
max_height=700,
|
| 331 |
+
column_widths=column_widths
|
| 332 |
)
|
| 333 |
|
| 334 |
# Wire the selection event
|