rogergs94 commited on
Commit
2de9065
Β·
verified Β·
1 Parent(s): 7dabfd5

app updated with filters in the map section

Browse files
Files changed (1) hide show
  1. app.py +370 -240
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import folium
2
- from folium.plugins import MarkerCluster
3
  from geopy.geocoders import Nominatim
4
  from functools import lru_cache
5
  import pandas as pd
@@ -121,7 +121,6 @@ class FeedReader:
121
  self.df = df
122
  df['last_update'] = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
123
  df_processed = df
124
- #df_processed = df.fillna(0).infer_objects(copy=False)
125
 
126
  summary = f"""
127
  πŸ“Š **Feed Processing Results**
@@ -238,194 +237,288 @@ class FeedReader:
238
  )
239
  return display_df
240
 
241
- def generate_map_with_job_counts(self, city_col, state_col=None, country_col=None,
242
- title_col=None, max_points=500, progress=gr.Progress()):
243
- """Generate map with job count markers per location with progress tracking"""
244
- if self.df is None or self.df.empty:
245
- return None, "⚠️ Please load a feed first"
246
-
247
- if city_col not in self.df.columns:
248
- return None, f"⚠️ Column '{city_col}' not found in dataset"
249
-
250
- progress(0, desc="Initializing map generation...")
251
-
252
- # Create map
253
- m = folium.Map(location=[20, 0], zoom_start=2)
254
-
255
- progress(0.1, desc="Processing location data...")
256
-
257
- # Prepare location data
258
- location_data = []
259
- total_rows = len(self.df)
260
-
261
- for idx, (_, row) in enumerate(self.df.iterrows()):
262
- if idx % 100 == 0: # Update progress every 100 rows
263
- progress(0.1 + (0.3 * idx / total_rows),
264
- desc=f"Processing locations... {idx}/{total_rows}")
265
 
266
- city = str(row[city_col]) if city_col else ""
267
- state = str(row[state_col]) if state_col and state_col in self.df.columns else ""
268
- country = str(row[country_col]) if country_col and country_col in self.df.columns else ""
269
 
270
- location_parts = [p for p in [city, state, country] if p and p.strip() and p != 'nan']
271
- if not location_parts:
272
- continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
273
 
274
- location_key = ", ".join(location_parts)
275
- title_id = str(row[title_col]) if title_col and title_col in self.df.columns else None
276
-
277
- location_data.append({
278
- 'location_key': location_key,
279
- 'city': city,
280
- 'state': state,
281
- 'country': country,
282
- 'title_id': title_id
283
- })
284
-
285
- if not location_data:
286
- progress(1, desc="No valid location data found")
287
- return None, "⚠️ No valid location data found"
288
-
289
- progress(0.4, desc="Aggregating location statistics...")
290
-
291
- # Group by location
292
- locations_df = pd.DataFrame(location_data)
293
-
294
- if title_col and title_col in self.df.columns:
295
- location_stats = locations_df.groupby('location_key').agg({
296
- 'title_id': ['count', 'nunique'],
297
- 'city': 'first',
298
- 'state': 'first',
299
- 'country': 'first'
300
- }).reset_index()
301
- location_stats.columns = ['location_key', 'total_postings', 'unique_titles', 'city', 'state', 'country']
302
- else:
303
- location_stats = locations_df.groupby('location_key').agg({
304
- 'city': 'first',
305
- 'state': 'first',
306
- 'country': 'first'
307
- }).reset_index()
308
- location_stats['total_postings'] = locations_df.groupby('location_key').size().values
309
- location_stats['unique_titles'] = location_stats['total_postings']
310
-
311
- progress(0.5, desc="Starting geocoding process...")
312
-
313
- # Geocoding with progress tracking
314
- successful_mappings = 0
315
- failed_geocoding = 0
316
- total_locations = len(location_stats)
317
-
318
- for idx, (_, row) in enumerate(location_stats.iterrows()):
319
- if successful_mappings >= max_points:
320
- break
321
-
322
- # Update progress during geocoding
323
- progress(0.5 + (0.4 * idx / total_locations),
324
- desc=f"Geocoding locations... {successful_mappings} mapped, {failed_geocoding} failed")
325
-
326
- location_key = row['location_key']
327
- total_postings = row['total_postings']
328
- unique_titles = row['unique_titles']
329
-
330
- location = geocode_cached(location_key)
331
- if location:
332
- # Calculate marker properties
333
- max_titles = location_stats['unique_titles'].max()
334
- min_size = 10
335
- max_size = 50
336
 
337
- if max_titles > 0:
338
- marker_size = min_size + (max_size - min_size) * (unique_titles / max_titles)
339
- else:
340
- marker_size = min_size
 
 
 
 
 
 
 
 
 
 
 
 
 
341
 
342
- # Color coding
343
- if unique_titles >= max_titles * 0.8:
344
- color = 'red'
345
- elif unique_titles >= max_titles * 0.5:
346
- color = 'orange'
347
- elif unique_titles >= max_titles * 0.2:
348
- color = 'yellow'
 
 
 
 
349
  else:
350
- color = 'green'
 
 
 
 
 
 
 
 
351
 
352
- # Create popup
353
- popup_text = f"""
354
- <div style='font-family: Arial, sans-serif; min-width: 200px;'>
355
- <h4 style='color: #2E86AB; margin-bottom: 10px;'>πŸ“ {location_key}</h4>
356
- <hr style='margin: 5px 0;'>
357
- <p><strong>🎯 Unique Titles:</strong> {unique_titles}</p>
358
- <p><strong>πŸ“Š Total Postings:</strong> {total_postings}</p>
359
- <p><strong>πŸ“ˆ Avg Postings/Title:</strong> {round(total_postings/unique_titles, 1) if unique_titles > 0 else 0}</p>
360
  </div>
361
  """
362
 
363
- folium.CircleMarker(
364
- location=[location.latitude, location.longitude],
365
- radius=marker_size,
366
- popup=folium.Popup(popup_text, max_width=300),
367
- color='black',
368
- weight=2,
369
- fillColor=color,
370
- fillOpacity=0.7,
371
- tooltip=f"{location_key}: {unique_titles} titles"
372
- ).add_to(m)
373
 
374
- successful_mappings += 1
 
 
 
 
 
 
 
 
 
 
 
 
375
  else:
376
- failed_geocoding += 1
 
 
377
 
378
- # Small delay to prevent overwhelming the geocoding service
379
- time.sleep(0.1)
380
-
381
- progress(0.9, desc="Finalizing map...")
382
-
383
- # Add legend
384
- legend_html = f"""
385
- <div style='position: fixed;
386
- bottom: 50px; left: 50px; width: 200px; height: 120px;
387
- background-color: white; border:2px solid grey; z-index:9999;
388
- font-size:14px; padding: 10px'>
389
- <h4 style='margin:0; color: #2E86AB;'>πŸ“Š Job Count Legend</h4>
390
- <p style='margin:5px 0;'><i style='color:red'>●</i> High (80%+ of max)</p>
391
- <p style='margin:5px 0;'><i style='color:orange'>●</i> Medium-High (50-80%)</p>
392
- <p style='margin:5px 0;'><i style='color:yellow'>●</i> Medium (20-50%)</p>
393
- <p style='margin:5px 0;'><i style='color:green'>●</i> Low (&lt;20%)</p>
394
- <small>Marker size = Job count</small>
395
- </div>
396
- """
397
-
398
- m.get_root().html.add_child(folium.Element(legend_html))
399
-
400
- progress(1, desc="Map generation complete!")
401
-
402
- # Generate status message
403
- status_msg = f"""
404
- βœ… **Map Generated Successfully**
405
 
406
- πŸ—ΊοΈ **Mapped Locations:** {successful_mappings}
407
- ❌ **Failed to Geocode:** {failed_geocoding}
408
- πŸ“Š **Total Unique Locations:** {len(location_stats)}
409
- 🎯 **Columns Used:**
410
  β€’ City: {city_col}
411
- β€’ State: {state_col if state_col else 'Not selected'}
412
- β€’ Country: {country_col if country_col else 'Not selected'}
413
- β€’ Title/ID: {title_col if title_col else 'Not selected'}
414
 
415
- πŸ’‘ **Map Features:**
416
- β€’ Marker size represents job count
417
- β€’ Colors show relative job density
418
- β€’ Click markers for detailed info
419
- β€’ Hover for quick stats
420
- """
421
-
422
- if title_col:
423
- top_location_idx = location_stats['unique_titles'].idxmax()
424
- top_location = location_stats.loc[top_location_idx, 'location_key']
425
- top_count = location_stats['unique_titles'].max()
426
- status_msg += f"\nπŸ† **Top Location:** {top_location} ({top_count} titles)"
427
-
428
- return m._repr_html_(), status_msg
429
 
430
  def generate_csv(self, df, filename_prefix="feed"):
431
  """Generate CSV file for download"""
@@ -464,7 +557,7 @@ def create_enhanced_gradio_app():
464
  gr.Markdown("""
465
  # πŸ“‘ Enhanced Feed Reader & Analyzer
466
 
467
- Load and analyze XML or JSON feeds with advanced multi-filtering and interactive mapping.
468
  """)
469
 
470
  with gr.Tab("πŸ“₯ Load Feed"):
@@ -916,11 +1009,11 @@ def create_enhanced_gradio_app():
916
  outputs=[weighted_stats_summary, weighted_stats_output, weighted_stats_csv]
917
  )
918
 
919
- with gr.Tab("🌍 Interactive Job Map"):
920
  with gr.Row():
921
  with gr.Column():
922
- gr.Markdown("### πŸ“ Map Configuration")
923
- gr.Markdown("Select columns for geographic visualization:")
924
 
925
  city_col = gr.Dropdown(
926
  label="πŸ™οΈ City Column (Required)",
@@ -940,31 +1033,45 @@ def create_enhanced_gradio_app():
940
  value=None,
941
  info="Column containing country names"
942
  )
943
- title_col = gr.Dropdown(
944
- label="🎯 Title/Job ID Column (Optional)",
945
- choices=[],
 
 
 
 
 
 
 
 
 
 
 
946
  value=None,
947
- info="Column containing job titles or reference IDs"
 
 
 
 
 
 
 
948
  )
949
 
950
- with gr.Row():
951
- map_btn = gr.Button("πŸ—ΊοΈ Generate Interactive Map", variant="primary", size="lg")
952
- clear_map_btn = gr.Button("🧹 Clear Map", variant="secondary")
953
 
954
- with gr.Column():
955
- map_status = gr.Markdown()
956
 
957
  with gr.Row():
958
- map_output = gr.HTML(label="Interactive Job Distribution Map")
959
 
960
- def update_map_choices(column_choices):
961
  if not column_choices:
962
- return (
963
- gr.Dropdown(choices=[]),
964
- gr.Dropdown(choices=[]),
965
- gr.Dropdown(choices=[]),
966
- gr.Dropdown(choices=[])
967
- )
968
 
969
  optional_choices = ["None"] + column_choices
970
 
@@ -972,7 +1079,8 @@ def create_enhanced_gradio_app():
972
  city_default = None
973
  state_default = "None"
974
  country_default = "None"
975
- title_default = "None"
 
976
 
977
  for col in column_choices:
978
  col_lower = col.lower()
@@ -983,88 +1091,110 @@ def create_enhanced_gradio_app():
983
  state_default = col
984
  elif any(term in col_lower for term in ['country', 'nation', 'pais', 'pays']):
985
  country_default = col
986
- elif any(term in col_lower for term in ['title', 'job', 'position', 'req', 'reference', 'id', 'titulo']):
987
- title_default = col
 
 
988
 
989
  return (
990
  gr.Dropdown(choices=column_choices, value=city_default),
991
  gr.Dropdown(choices=optional_choices, value=state_default),
992
  gr.Dropdown(choices=optional_choices, value=country_default),
993
- gr.Dropdown(choices=optional_choices, value=title_default)
 
 
994
  )
995
 
 
 
 
 
 
 
 
996
  column_choices_state.change(
997
- update_map_choices,
998
  inputs=[column_choices_state],
999
- outputs=[city_col, state_col, country_col, title_col]
 
 
 
 
 
 
1000
  )
1001
 
1002
- def generate_job_count_map(city_col, state_col, country_col, title_col, progress=gr.Progress()):
1003
  if not city_col:
1004
  return "❌ Please select a city column", None
1005
 
1006
  # Handle "None" selections
1007
  state_col = None if state_col == "None" else state_col
1008
  country_col = None if country_col == "None" else country_col
1009
- title_col = None if title_col == "None" else title_col
 
 
1010
 
1011
- map_html, msg = feed_reader.generate_map_with_job_counts(
1012
- city_col, state_col, country_col, title_col, progress=progress
 
1013
  )
1014
- return msg, map_html
1015
 
1016
- def clear_map():
1017
- return "🧹 Map cleared", ""
1018
 
1019
- map_btn.click(
1020
- generate_job_count_map,
1021
- inputs=[city_col, state_col, country_col, title_col],
1022
- outputs=[map_status, map_output]
1023
  )
1024
 
1025
- clear_map_btn.click(
1026
- clear_map,
1027
- outputs=[map_status, map_output]
1028
  )
1029
 
1030
  gr.Markdown("""
1031
  ---
1032
  ### πŸ“ Enhanced Features:
1033
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1034
  **πŸ” Advanced Multi-Filtering:**
1035
  - Apply up to 4 simultaneous filters on different columns
1036
  - Real-time progress tracking during filter operations
1037
  - Smart dropdown population with available values
1038
  - Clear filter functionality
1039
 
1040
- **🌍 Interactive Map with Progress:**
1041
- - Real-time progress bar during map generation
1042
- - Geocoding progress tracking
1043
- - Location data processing updates
1044
- - Performance optimizations with delays to prevent API limits
1045
-
1046
  **πŸ“Š Enhanced Data Processing:**
1047
- - Improved error handling
1048
- - Better memory management
1049
- - Optimized for large datasets
1050
- - Smart column auto-detection
1051
-
1052
- **πŸ’‘ Usage Tips:**
1053
- - **Multi-Filtering**: Select "None" to skip a filter, "All" to show all values for that column
1054
- - **Map Generation**: Progress bar shows geocoding status and success/failure rates
1055
- - **Performance**: Large datasets may take longer to process - progress bars keep you informed
1056
- - **Column Detection**: Common column names are automatically detected and pre-selected
1057
 
1058
- **🎯 Common Filter Combinations:**
1059
- - Filter 1: Company/Client + Filter 2: City
1060
- - Filter 1: Job Title + Filter 2: State + Filter 3: Country
1061
- - Filter 1: Category + Filter 2: Experience Level + Filter 3: Salary Range
 
1062
 
1063
- **πŸ—ΊοΈ Map Features:**
1064
- - Marker size = Job count per location
1065
- - Color coding = Job density (red=high, green=low)
1066
- - Interactive popups with detailed statistics
1067
- - Automatic legend and geocoding status
1068
  """)
1069
 
1070
  return app
 
1
  import folium
2
+ from folium.plugins import HeatMap
3
  from geopy.geocoders import Nominatim
4
  from functools import lru_cache
5
  import pandas as pd
 
121
  self.df = df
122
  df['last_update'] = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
123
  df_processed = df
 
124
 
125
  summary = f"""
126
  πŸ“Š **Feed Processing Results**
 
237
  )
238
  return display_df
239
 
240
+ def generate_heatmap(self, city_col, state_col=None, country_col=None,
241
+ metric_col=None, filter_col=None, filter_value=None,
242
+ max_points=500, progress=gr.Progress()):
243
+ """Generate heatmap based on selected metric with optional filtering"""
244
+ try:
245
+ if self.df is None or self.df.empty:
246
+ return None, "⚠️ Please load a feed first"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
247
 
248
+ if city_col not in self.df.columns:
249
+ available_cols = ', '.join(self.df.columns.tolist()[:10])
250
+ return None, f"⚠️ Column '{city_col}' not found. Available columns: {available_cols}..."
251
 
252
+ progress(0, desc="Initializing heatmap generation...")
253
+
254
+ # Apply filter if specified
255
+ working_df = self.df.copy()
256
+ original_rows = len(working_df)
257
+
258
+ if filter_col and filter_value and filter_col != "None" and filter_value != "All":
259
+ if filter_col in working_df.columns:
260
+ working_df = working_df[working_df[filter_col].astype(str) == str(filter_value)]
261
+ if working_df.empty:
262
+ return None, f"⚠️ No data found for filter: {filter_col} = {filter_value}"
263
+ else:
264
+ return None, f"⚠️ Filter column '{filter_col}' not found in dataset"
265
+
266
+ progress(0.1, desc=f"Processing {len(working_df)} rows...")
267
+
268
+ # Prepare location data with better error handling
269
+ location_data = []
270
+ skipped_rows = 0
271
+
272
+ for idx, (_, row) in enumerate(working_df.iterrows()):
273
+ try:
274
+ city = str(row[city_col]).strip() if pd.notna(row[city_col]) else ""
275
+ state = ""
276
+ country = ""
277
+
278
+ if state_col and state_col in working_df.columns and state_col != "None":
279
+ state = str(row[state_col]).strip() if pd.notna(row[state_col]) else ""
280
+
281
+ if country_col and country_col in working_df.columns and country_col != "None":
282
+ country = str(row[country_col]).strip() if pd.notna(row[country_col]) else ""
283
+
284
+ # Filter out invalid location data
285
+ location_parts = []
286
+ if city and city.lower() not in ['nan', 'none', 'null', '']:
287
+ location_parts.append(city)
288
+ if state and state.lower() not in ['nan', 'none', 'null', '']:
289
+ location_parts.append(state)
290
+ if country and country.lower() not in ['nan', 'none', 'null', '']:
291
+ location_parts.append(country)
292
+
293
+ if not location_parts:
294
+ skipped_rows += 1
295
+ continue
296
+
297
+ location_key = ", ".join(location_parts)
298
+
299
+ # Get metric value with better error handling
300
+ metric_value = 1.0 # Default weight for count-based heatmap
301
+ if metric_col and metric_col in working_df.columns and metric_col != "None":
302
+ try:
303
+ val = row[metric_col]
304
+ if pd.notna(val):
305
+ metric_value = float(val)
306
+ if metric_value <= 0: # Handle zero or negative values
307
+ metric_value = 0.1 # Small positive value
308
+ else:
309
+ metric_value = 1.0
310
+ except (ValueError, TypeError):
311
+ metric_value = 1.0
312
+
313
+ location_data.append({
314
+ 'location_key': location_key,
315
+ 'city': city,
316
+ 'state': state,
317
+ 'country': country,
318
+ 'metric_value': metric_value
319
+ })
320
+
321
+ except Exception as e:
322
+ skipped_rows += 1
323
+ continue
324
+
325
+ if not location_data:
326
+ return None, f"⚠️ No valid location data found. Processed {len(working_df)} rows, skipped {skipped_rows} rows with invalid location data."
327
+
328
+ progress(0.3, desc=f"Found {len(location_data)} valid locations, aggregating...")
329
+
330
+ # Group by location and calculate metrics
331
+ locations_df = pd.DataFrame(location_data)
332
+
333
+ try:
334
+ if metric_col and metric_col != "None":
335
+ # For numeric metrics
336
+ location_stats = locations_df.groupby('location_key').agg({
337
+ 'metric_value': ['sum', 'count', 'mean'],
338
+ 'city': 'first',
339
+ 'state': 'first',
340
+ 'country': 'first'
341
+ }).reset_index()
342
+ location_stats.columns = ['location_key', 'total_metric', 'job_count', 'avg_metric', 'city', 'state', 'country']
343
+ location_stats['heatmap_weight'] = location_stats['avg_metric']
344
+ else:
345
+ # For count-based heatmap
346
+ location_stats = locations_df.groupby('location_key').agg({
347
+ 'city': 'first',
348
+ 'state': 'first',
349
+ 'country': 'first'
350
+ }).reset_index()
351
+ location_stats['job_count'] = locations_df.groupby('location_key').size().values
352
+ location_stats['heatmap_weight'] = location_stats['job_count']
353
+ except Exception as e:
354
+ return None, f"⚠️ Error aggregating location data: {str(e)}"
355
+
356
+ progress(0.4, desc=f"Starting geocoding for {len(location_stats)} unique locations...")
357
+
358
+ # Geocoding with enhanced error handling
359
+ heat_data = []
360
+ successful_mappings = 0
361
+ failed_geocoding = 0
362
+ geocoding_errors = []
363
+
364
+ for idx, (_, row) in enumerate(location_stats.iterrows()):
365
+ if successful_mappings >= max_points:
366
+ break
367
 
368
+ try:
369
+ # Update progress during geocoding
370
+ progress_val = 0.4 + (0.5 * idx / len(location_stats))
371
+ progress(progress_val, desc=f"Geocoding {idx+1}/{len(location_stats)}: {successful_mappings} successful")
372
+
373
+ location_key = row['location_key']
374
+ weight = row['heatmap_weight']
375
+
376
+ if weight <= 0:
377
+ failed_geocoding += 1
378
+ continue
379
+
380
+ # Try geocoding with timeout and error handling
381
+ location = None
382
+ try:
383
+ location = geocode_cached(location_key)
384
+ except Exception as geocode_error:
385
+ geocoding_errors.append(f"{location_key}: {str(geocode_error)}")
386
+ failed_geocoding += 1
387
+ continue
388
+
389
+ if location and hasattr(location, 'latitude') and hasattr(location, 'longitude'):
390
+ if location.latitude and location.longitude:
391
+ heat_data.append([float(location.latitude), float(location.longitude), float(weight)])
392
+ successful_mappings += 1
393
+ else:
394
+ failed_geocoding += 1
395
+ else:
396
+ failed_geocoding += 1
397
+
398
+ # Small delay to prevent overwhelming the geocoding service
399
+ time.sleep(0.05) # Reduced delay for small datasets
400
+
401
+ except Exception as e:
402
+ geocoding_errors.append(f"{location_key}: {str(e)}")
403
+ failed_geocoding += 1
404
+ continue
405
+
406
+ if not heat_data:
407
+ error_details = f"No valid coordinates found. Geocoding errors: {geocoding_errors[:3]}" if geocoding_errors else "No valid coordinates found"
408
+ return None, f"⚠️ {error_details}"
409
+
410
+ progress(0.9, desc="Generating heatmap visualization...")
411
+
412
+ try:
413
+ # Create map with heatmap
414
+ # Calculate center point from successful geocodes
415
+ lats = [point[0] for point in heat_data]
416
+ lons = [point[1] for point in heat_data]
417
+ center_lat = sum(lats) / len(lats)
418
+ center_lon = sum(lons) / len(lons)
 
 
 
 
 
 
 
 
 
 
 
419
 
420
+ m = folium.Map(location=[center_lat, center_lon], zoom_start=6)
421
+
422
+ # Add heatmap layer with error handling
423
+ HeatMap(
424
+ heat_data,
425
+ min_opacity=0.3,
426
+ max_zoom=18,
427
+ radius=25,
428
+ blur=20,
429
+ gradient={0.2: 'blue', 0.5: 'lime', 0.7: 'orange', 1.0: 'red'}
430
+ ).add_to(m)
431
+
432
+ # Generate statistics for legend
433
+ weights = [point[2] for point in heat_data]
434
+ min_weight = min(weights)
435
+ max_weight = max(weights)
436
+ avg_weight = sum(weights) / len(weights)
437
 
438
+ # Create legend based on metric type
439
+ if metric_col and metric_col != "None":
440
+ legend_title = f"Heatmap: {metric_col}"
441
+ legend_content = f"""
442
+ <h4 style='margin:0; color: #2E86AB;'>{legend_title}</h4>
443
+ <p style='margin:3px 0;'><span style='color:red'>β– </span> High ({max_weight:.2f})</p>
444
+ <p style='margin:3px 0;'><span style='color:orange'>β– </span> Med-High</p>
445
+ <p style='margin:3px 0;'><span style='color:lime'>β– </span> Medium</p>
446
+ <p style='margin:3px 0;'><span style='color:blue'>β– </span> Low ({min_weight:.2f})</p>
447
+ <small>Avg: {avg_weight:.2f} | Locations: {len(heat_data)}</small>
448
+ """
449
  else:
450
+ legend_title = "Job Count Heatmap"
451
+ legend_content = f"""
452
+ <h4 style='margin:0; color: #2E86AB;'>{legend_title}</h4>
453
+ <p style='margin:3px 0;'><span style='color:red'>β– </span> High ({int(max_weight)} jobs)</p>
454
+ <p style='margin:3px 0;'><span style='color:orange'>β– </span> Med-High</p>
455
+ <p style='margin:3px 0;'><span style='color:lime'>β– </span> Medium</p>
456
+ <p style='margin:3px 0;'><span style='color:blue'>β– </span> Low ({int(min_weight)} jobs)</p>
457
+ <small>Avg: {avg_weight:.1f} jobs | Locations: {len(heat_data)}</small>
458
+ """
459
 
460
+ legend_html = f"""
461
+ <div style='position: fixed;
462
+ bottom: 50px; left: 50px; width: 220px; height: 120px;
463
+ background-color: white; border:2px solid grey; z-index:9999;
464
+ font-size:12px; padding: 8px; border-radius: 5px;'>
465
+ {legend_content}
 
 
466
  </div>
467
  """
468
 
469
+ m.get_root().html.add_child(folium.Element(legend_html))
 
 
 
 
 
 
 
 
 
470
 
471
+ except Exception as e:
472
+ return None, f"⚠️ Error creating map visualization: {str(e)}"
473
+
474
+ progress(1, desc="Heatmap generation complete!")
475
+
476
+ # Generate detailed status message
477
+ filter_info = f" (Filtered by {filter_col}: {filter_value})" if filter_col and filter_value and filter_col != "None" and filter_value != "All" else ""
478
+
479
+ # Format values based on metric type
480
+ if metric_col and metric_col != "None":
481
+ min_val_str = f"{min_weight:.2f}"
482
+ max_val_str = f"{max_weight:.2f}"
483
+ avg_val_str = f"{avg_weight:.2f}"
484
  else:
485
+ min_val_str = f"{int(min_weight)}"
486
+ max_val_str = f"{int(max_weight)}"
487
+ avg_val_str = f"{avg_weight:.1f}"
488
 
489
+ status_msg = f"""
490
+ βœ… **Heatmap Generated Successfully**
491
+
492
+ πŸ“Š **Data Processing:**
493
+ β€’ Original Rows: {original_rows}
494
+ β€’ Valid Locations: {len(location_data)}
495
+ β€’ Unique Locations: {len(location_stats)}
496
+ β€’ Skipped Rows: {skipped_rows}
497
+ {filter_info}
498
+
499
+ 🌍 **Geocoding Results:**
500
+ β€’ Successfully Mapped: {successful_mappings}
501
+ β€’ Failed to Geocode: {failed_geocoding}
502
+ β€’ Success Rate: {(successful_mappings/(successful_mappings+failed_geocoding)*100):.1f}%
 
 
 
 
 
 
 
 
 
 
 
 
 
503
 
504
+ 🎯 **Heatmap Configuration:**
505
+ β€’ Metric Used: {metric_col if metric_col and metric_col != "None" else "Job Count"}
 
 
506
  β€’ City: {city_col}
507
+ β€’ State: {state_col if state_col and state_col != "None" else 'Not used'}
508
+ β€’ Country: {country_col if country_col and country_col != "None" else 'Not used'}
 
509
 
510
+ πŸ“ˆ **Value Statistics:**
511
+ β€’ Min Value: {min_val_str}
512
+ β€’ Max Value: {max_val_str}
513
+ β€’ Average: {avg_val_str}
514
+
515
+ 🌈 **Color Mapping:** Red=High, Orange=Med-High, Green=Medium, Blue=Low
516
+ """
517
+
518
+ return m._repr_html_(), status_msg
519
+
520
+ except Exception as e:
521
+ return None, f"⚠️ Unexpected error in heatmap generation: {str(e)}. Please check your data and try again."
 
 
522
 
523
  def generate_csv(self, df, filename_prefix="feed"):
524
  """Generate CSV file for download"""
 
557
  gr.Markdown("""
558
  # πŸ“‘ Enhanced Feed Reader & Analyzer
559
 
560
+ Load and analyze XML or JSON feeds with advanced multi-filtering and interactive heatmap visualization.
561
  """)
562
 
563
  with gr.Tab("πŸ“₯ Load Feed"):
 
1009
  outputs=[weighted_stats_summary, weighted_stats_output, weighted_stats_csv]
1010
  )
1011
 
1012
+ with gr.Tab("🌍 Interactive Heatmap"):
1013
  with gr.Row():
1014
  with gr.Column():
1015
+ gr.Markdown("### πŸ“ Heatmap Configuration")
1016
+ gr.Markdown("Create heatmaps based on job metrics and locations:")
1017
 
1018
  city_col = gr.Dropdown(
1019
  label="πŸ™οΈ City Column (Required)",
 
1033
  value=None,
1034
  info="Column containing country names"
1035
  )
1036
+
1037
+ with gr.Column():
1038
+ gr.Markdown("### 🎯 Heatmap Metrics & Filters")
1039
+
1040
+ metric_col = gr.Dropdown(
1041
+ label="πŸ“Š Metric Column (Optional)",
1042
+ choices=[],
1043
+ value=None,
1044
+ info="Column to use for heatmap intensity (CPC, CPA, etc.). Leave empty for job count."
1045
+ )
1046
+
1047
+ filter_col = gr.Dropdown(
1048
+ label="πŸ” Filter Column (Optional)",
1049
+ choices=[],
1050
  value=None,
1051
+ info="Column to filter data before creating heatmap (Company, Client, etc.)"
1052
+ )
1053
+
1054
+ filter_val = gr.Dropdown(
1055
+ label="🎯 Filter Value",
1056
+ choices=[],
1057
+ value=None,
1058
+ info="Specific value to filter by"
1059
  )
1060
 
1061
+ with gr.Row():
1062
+ heatmap_btn = gr.Button("πŸ”₯ Generate Heatmap", variant="primary", size="lg")
1063
+ clear_heatmap_btn = gr.Button("🧹 Clear Heatmap", variant="secondary")
1064
 
1065
+ with gr.Row():
1066
+ heatmap_status = gr.Markdown()
1067
 
1068
  with gr.Row():
1069
+ heatmap_output = gr.HTML(label="Interactive Job Heatmap")
1070
 
1071
+ def update_heatmap_choices(column_choices):
1072
  if not column_choices:
1073
+ empty_choices = gr.Dropdown(choices=[])
1074
+ return (empty_choices, empty_choices, empty_choices, empty_choices, empty_choices, empty_choices)
 
 
 
 
1075
 
1076
  optional_choices = ["None"] + column_choices
1077
 
 
1079
  city_default = None
1080
  state_default = "None"
1081
  country_default = "None"
1082
+ metric_default = "None"
1083
+ filter_default = "None"
1084
 
1085
  for col in column_choices:
1086
  col_lower = col.lower()
 
1091
  state_default = col
1092
  elif any(term in col_lower for term in ['country', 'nation', 'pais', 'pays']):
1093
  country_default = col
1094
+ elif any(term in col_lower for term in ['cpc', 'cpa', 'cost', 'payout', 'bid', 'sponsored']):
1095
+ metric_default = col
1096
+ elif any(term in col_lower for term in ['company', 'client', 'advertiser', 'brand']):
1097
+ filter_default = col
1098
 
1099
  return (
1100
  gr.Dropdown(choices=column_choices, value=city_default),
1101
  gr.Dropdown(choices=optional_choices, value=state_default),
1102
  gr.Dropdown(choices=optional_choices, value=country_default),
1103
+ gr.Dropdown(choices=optional_choices, value=metric_default),
1104
+ gr.Dropdown(choices=optional_choices, value=filter_default),
1105
+ gr.Dropdown(choices=["All"], value="All")
1106
  )
1107
 
1108
+ def update_filter_values_heatmap(selected_column):
1109
+ if not selected_column or selected_column == "None" or feed_reader.df is None:
1110
+ return gr.Dropdown(choices=["All"], value="All")
1111
+
1112
+ unique_values = feed_reader.get_column_unique_values(selected_column)
1113
+ return gr.Dropdown(choices=unique_values, value="All" if unique_values else "All")
1114
+
1115
  column_choices_state.change(
1116
+ update_heatmap_choices,
1117
  inputs=[column_choices_state],
1118
+ outputs=[city_col, state_col, country_col, metric_col, filter_col, filter_val]
1119
+ )
1120
+
1121
+ filter_col.change(
1122
+ update_filter_values_heatmap,
1123
+ inputs=[filter_col],
1124
+ outputs=[filter_val]
1125
  )
1126
 
1127
+ def generate_heatmap(city_col, state_col, country_col, metric_col, filter_col, filter_val, progress=gr.Progress()):
1128
  if not city_col:
1129
  return "❌ Please select a city column", None
1130
 
1131
  # Handle "None" selections
1132
  state_col = None if state_col == "None" else state_col
1133
  country_col = None if country_col == "None" else country_col
1134
+ metric_col = None if metric_col == "None" else metric_col
1135
+ filter_col = None if filter_col == "None" else filter_col
1136
+ filter_val = None if filter_val == "All" else filter_val
1137
 
1138
+ heatmap_html, msg = feed_reader.generate_heatmap(
1139
+ city_col, state_col, country_col, metric_col,
1140
+ filter_col, filter_val, progress=progress
1141
  )
1142
+ return msg, heatmap_html
1143
 
1144
+ def clear_heatmap():
1145
+ return "🧹 Heatmap cleared", ""
1146
 
1147
+ heatmap_btn.click(
1148
+ generate_heatmap,
1149
+ inputs=[city_col, state_col, country_col, metric_col, filter_col, filter_val],
1150
+ outputs=[heatmap_status, heatmap_output]
1151
  )
1152
 
1153
+ clear_heatmap_btn.click(
1154
+ clear_heatmap,
1155
+ outputs=[heatmap_status, heatmap_output]
1156
  )
1157
 
1158
  gr.Markdown("""
1159
  ---
1160
  ### πŸ“ Enhanced Features:
1161
 
1162
+ **πŸ”₯ Interactive Heatmap Visualization:**
1163
+ - Heat intensity based on selected metrics (CPC, CPA, job count, etc.)
1164
+ - Real-time filtering by company, client, or any column
1165
+ - Color-coded intensity: Red (high) to Blue (low)
1166
+ - Progress tracking during geocoding and map generation
1167
+ - Dynamic legend with actual metric ranges
1168
+
1169
+ **🎯 Heatmap Configuration Options:**
1170
+ - **Metric Column**: Choose CPC, CPA, or any numeric column for intensity
1171
+ - **Filter Options**: Pre-filter data by company, client, etc.
1172
+ - **Location Mapping**: City (required), State, Country (optional)
1173
+ - **Automatic Detection**: Smart column name detection
1174
+
1175
  **πŸ” Advanced Multi-Filtering:**
1176
  - Apply up to 4 simultaneous filters on different columns
1177
  - Real-time progress tracking during filter operations
1178
  - Smart dropdown population with available values
1179
  - Clear filter functionality
1180
 
 
 
 
 
 
 
1181
  **πŸ“Š Enhanced Data Processing:**
1182
+ - Improved error handling and memory management
1183
+ - Optimized for large datasets with progress indicators
1184
+ - Smart column auto-detection for common field names
1185
+ - Geocoding with rate limiting to prevent API issues
 
 
 
 
 
 
1186
 
1187
+ **πŸ’‘ Heatmap Usage Examples:**
1188
+ - **CPC Heatmap**: See where highest-paying jobs are located
1189
+ - **Job Count Heatmap**: Visualize job density by location
1190
+ - **Filtered Views**: Show only specific company/client job distributions
1191
+ - **Performance Analysis**: Compare metrics across geographic regions
1192
 
1193
+ **🌈 Heatmap Color Legend:**
1194
+ - **Red**: Highest values (top 20% of metric range)
1195
+ - **Orange**: High values (60-80% of range)
1196
+ - **Lime/Green**: Medium values (40-60% of range)
1197
+ - **Blue**: Lower values (bottom 40% of range)
1198
  """)
1199
 
1200
  return app