|
|
import folium |
|
|
from folium.plugins import HeatMap |
|
|
from geopy.geocoders import Nominatim |
|
|
from functools import lru_cache |
|
|
import pandas as pd |
|
|
import requests |
|
|
import xml.etree.ElementTree as ET |
|
|
import numpy as np |
|
|
from io import BytesIO, StringIO |
|
|
import gzip |
|
|
import datetime |
|
|
import gradio as gr |
|
|
import os |
|
|
import tempfile |
|
|
import pytz |
|
|
import time |
|
|
|
|
|
geolocator = Nominatim(user_agent="feed_reader_app") |
|
|
|
|
|
@lru_cache(maxsize=10000) |
|
|
def geocode_cached(query): |
|
|
try: |
|
|
return geolocator.geocode(query, timeout=10) |
|
|
except Exception: |
|
|
return None |
|
|
|
|
|
class FeedReader: |
|
|
def __init__(self): |
|
|
self.df = None |
|
|
|
|
|
@staticmethod |
|
|
def truncate(value, max_length=49000): |
|
|
"""Truncate string values that are too long""" |
|
|
if value and isinstance(value, str) and len(value) > max_length: |
|
|
return value[:max_length] |
|
|
return value |
|
|
|
|
|
@staticmethod |
|
|
def clean_invalid_numbers(df): |
|
|
"""Replace invalid numbers (NaN or infinite values) with NaN""" |
|
|
return df.apply(lambda col: col.map( |
|
|
lambda x: np.nan if isinstance(x, float) and (np.isnan(x) or np.isinf(x)) else x |
|
|
)) |
|
|
|
|
|
def load_feed_to_dataframe(self, url, job_tag="job"): |
|
|
""" |
|
|
Load an XML feed (.xml or .xml.gz) or JSON from a URL and convert to DataFrame. |
|
|
""" |
|
|
try: |
|
|
response = requests.get(url, timeout=30) |
|
|
response.raise_for_status() |
|
|
|
|
|
|
|
|
content_type = response.headers.get("Content-Type", "").lower() |
|
|
is_json = ("application/json" in content_type or |
|
|
url.endswith(".json") or |
|
|
"rest-api" in url.lower()) |
|
|
|
|
|
if is_json: |
|
|
data = response.json() |
|
|
|
|
|
|
|
|
if isinstance(data, list): |
|
|
df = pd.DataFrame(data) |
|
|
elif isinstance(data, dict) and "jobs" in data: |
|
|
df = pd.DataFrame(data["jobs"]) |
|
|
else: |
|
|
df = pd.DataFrame([data] if not isinstance(data, list) else data) |
|
|
|
|
|
df = df.applymap(lambda x: self.truncate(x) if isinstance(x, str) else x) |
|
|
df = self.clean_invalid_numbers(df) |
|
|
return df |
|
|
|
|
|
|
|
|
if url.endswith(".gz"): |
|
|
with gzip.GzipFile(fileobj=BytesIO(response.content)) as f: |
|
|
xml_content = f.read() |
|
|
else: |
|
|
xml_content = response.content |
|
|
|
|
|
root = ET.fromstring(xml_content) |
|
|
items = root.findall(f".//{job_tag}") |
|
|
|
|
|
if not items: |
|
|
common_tags = ["item", "entry", "record", "row"] |
|
|
for tag in common_tags: |
|
|
items = root.findall(f".//{tag}") |
|
|
if items: |
|
|
break |
|
|
|
|
|
if not items: |
|
|
return pd.DataFrame(), f"No <{job_tag}> elements found in the XML." |
|
|
|
|
|
jobs_data = [] |
|
|
for job in items: |
|
|
job_data = {child.tag: self.truncate(child.text) for child in job} |
|
|
jobs_data.append(job_data) |
|
|
|
|
|
df = pd.DataFrame(jobs_data) |
|
|
df = self.clean_invalid_numbers(df) |
|
|
return df, "Success" |
|
|
|
|
|
except Exception as e: |
|
|
return pd.DataFrame(), f"Error: {str(e)}" |
|
|
|
|
|
def process_feed(self, url, job_tag="job"): |
|
|
"""Main function to process feed and return results""" |
|
|
if not url.strip(): |
|
|
return "Please enter a valid URL", None, "", "", [] |
|
|
|
|
|
result = self.load_feed_to_dataframe(url.strip(), job_tag.strip()) |
|
|
|
|
|
if isinstance(result, tuple): |
|
|
df, message = result |
|
|
if df.empty: |
|
|
return f"Error: {message}", None, "", "", [] |
|
|
else: |
|
|
df = result |
|
|
message = "Success" |
|
|
|
|
|
self.df = df |
|
|
df['last_update'] = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') |
|
|
df_processed = df |
|
|
|
|
|
summary = f""" |
|
|
📊 **Feed Processing Results** |
|
|
|
|
|
✅ **Status:** {message} |
|
|
📋 **Rows:** {df_processed.shape[0]:,} |
|
|
📝 **Columns:** {df_processed.shape[1]} |
|
|
""" |
|
|
|
|
|
metadata_df = pd.DataFrame({ |
|
|
'Column Name': df_processed.columns.tolist(), |
|
|
'Data Type': [str(df_processed[col].dtype) for col in df_processed.columns], |
|
|
'Unique Values': [df_processed[col].nunique() for col in df_processed.columns], |
|
|
'Null Values': [df_processed[col].isnull().sum() for col in df_processed.columns] |
|
|
}) |
|
|
|
|
|
column_choices = df_processed.columns.tolist() |
|
|
|
|
|
return summary, df_processed, self.generate_csv(df_processed, "feed"), self.get_preview(df_processed), column_choices, metadata_df |
|
|
|
|
|
def get_column_unique_values(self, column_name): |
|
|
"""Get unique values for a specific column""" |
|
|
if self.df is None or column_name not in self.df.columns: |
|
|
return [] |
|
|
|
|
|
unique_values = self.df[column_name].dropna().astype(str).unique() |
|
|
unique_values = sorted([str(val) for val in unique_values if str(val) != 'nan']) |
|
|
return ["All"] + unique_values |
|
|
|
|
|
def apply_multiple_filters(self, filters_dict, progress=gr.Progress()): |
|
|
"""Apply multiple filters to the dataframe""" |
|
|
if self.df is None: |
|
|
return pd.DataFrame(), "Please load a feed first", "" |
|
|
|
|
|
progress(0, desc="Starting filter process...") |
|
|
|
|
|
|
|
|
filtered_df = self.df.copy() |
|
|
filter_descriptions = [] |
|
|
|
|
|
|
|
|
active_filters = {k: v for k, v in filters_dict.items() |
|
|
if v and v != "All" and v != "None"} |
|
|
|
|
|
if not active_filters: |
|
|
progress(1, desc="No filters applied - showing all data") |
|
|
filtered_df = filtered_df.fillna(0).infer_objects(copy=False) |
|
|
display_df = self.truncate_display_columns(filtered_df.copy()) |
|
|
summary = f""" |
|
|
🔍 **Filter Results** |
|
|
📋 **Total Rows:** {filtered_df.shape[0]:,} |
|
|
🎯 **Filters Applied:** None (showing all data) |
|
|
""" |
|
|
return display_df, summary, self.generate_csv(filtered_df, "all_data") |
|
|
|
|
|
progress(0.2, desc="Applying filters...") |
|
|
|
|
|
for i, (column, value) in enumerate(active_filters.items()): |
|
|
if column not in self.df.columns: |
|
|
continue |
|
|
|
|
|
progress(0.2 + (0.6 * i / len(active_filters)), |
|
|
desc=f"Filtering by {column}: {value}") |
|
|
|
|
|
|
|
|
if self.df[column].dtype == 'object': |
|
|
filtered_df = filtered_df[filtered_df[column].astype(str) == str(value)] |
|
|
else: |
|
|
try: |
|
|
filter_val_numeric = float(value) |
|
|
filtered_df = filtered_df[filtered_df[column] == filter_val_numeric] |
|
|
except ValueError: |
|
|
filtered_df = filtered_df[filtered_df[column].astype(str) == str(value)] |
|
|
|
|
|
filter_descriptions.append(f"{column} = '{value}'") |
|
|
|
|
|
progress(0.8, desc="Processing results...") |
|
|
|
|
|
if filtered_df.empty: |
|
|
progress(1, desc="Filter complete - no results found") |
|
|
return pd.DataFrame(), "No records found matching the specified filters", "" |
|
|
|
|
|
filtered_df = filtered_df.fillna(0).infer_objects(copy=False) |
|
|
display_df = self.truncate_display_columns(filtered_df.copy()) |
|
|
|
|
|
progress(1, desc="Filter complete") |
|
|
|
|
|
summary = f""" |
|
|
🔍 **Multi-Filter Results** |
|
|
|
|
|
📋 **Matching Rows:** {filtered_df.shape[0]:,} |
|
|
🎯 **Filters Applied:** {len(active_filters)} |
|
|
📝 **Filter Details:** |
|
|
{chr(10).join(f" • {desc}" for desc in filter_descriptions)} |
|
|
""" |
|
|
|
|
|
filename_suffix = "_".join([f"{k}_{v}" for k, v in active_filters.items()])[:50] |
|
|
|
|
|
return display_df, summary, self.generate_csv(filtered_df, f"filtered_{filename_suffix}") |
|
|
|
|
|
def truncate_display_columns(self, df): |
|
|
"""Truncate long columns for better display""" |
|
|
display_df = df.copy() |
|
|
long_content_columns = ['url', 'description', 'link', 'content', 'summary', 'text'] |
|
|
|
|
|
for col in display_df.select_dtypes(include=['object']).columns: |
|
|
if any(long_col in col.lower() for long_col in long_content_columns): |
|
|
display_df[col] = display_df[col].astype(str).apply( |
|
|
lambda x: x[:30] + '...' if len(str(x)) > 30 else x |
|
|
) |
|
|
else: |
|
|
display_df[col] = display_df[col].astype(str).apply( |
|
|
lambda x: x[:50] + '...' if len(str(x)) > 50 else x |
|
|
) |
|
|
return display_df |
|
|
|
|
|
def generate_heatmap(self, city_col, state_col=None, country_col=None, |
|
|
metric_col=None, filter_col=None, filter_value=None, |
|
|
max_points=500, progress=gr.Progress()): |
|
|
"""Generate heatmap based on selected metric with optional filtering""" |
|
|
try: |
|
|
if self.df is None or self.df.empty: |
|
|
return None, "⚠️ Please load a feed first" |
|
|
|
|
|
if city_col not in self.df.columns: |
|
|
available_cols = ', '.join(self.df.columns.tolist()[:10]) |
|
|
return None, f"⚠️ Column '{city_col}' not found. Available columns: {available_cols}..." |
|
|
|
|
|
progress(0, desc="Initializing heatmap generation...") |
|
|
|
|
|
|
|
|
working_df = self.df.copy() |
|
|
original_rows = len(working_df) |
|
|
|
|
|
if filter_col and filter_value and filter_col != "None" and filter_value != "All": |
|
|
if filter_col in working_df.columns: |
|
|
working_df = working_df[working_df[filter_col].astype(str) == str(filter_value)] |
|
|
if working_df.empty: |
|
|
return None, f"⚠️ No data found for filter: {filter_col} = {filter_value}" |
|
|
else: |
|
|
return None, f"⚠️ Filter column '{filter_col}' not found in dataset" |
|
|
|
|
|
progress(0.1, desc=f"Processing {len(working_df)} rows...") |
|
|
|
|
|
|
|
|
location_data = [] |
|
|
skipped_rows = 0 |
|
|
|
|
|
for idx, (_, row) in enumerate(working_df.iterrows()): |
|
|
try: |
|
|
city = str(row[city_col]).strip() if pd.notna(row[city_col]) else "" |
|
|
state = "" |
|
|
country = "" |
|
|
|
|
|
if state_col and state_col in working_df.columns and state_col != "None": |
|
|
state = str(row[state_col]).strip() if pd.notna(row[state_col]) else "" |
|
|
|
|
|
if country_col and country_col in working_df.columns and country_col != "None": |
|
|
country = str(row[country_col]).strip() if pd.notna(row[country_col]) else "" |
|
|
|
|
|
|
|
|
location_parts = [] |
|
|
if city and city.lower() not in ['nan', 'none', 'null', '']: |
|
|
location_parts.append(city) |
|
|
if state and state.lower() not in ['nan', 'none', 'null', '']: |
|
|
location_parts.append(state) |
|
|
if country and country.lower() not in ['nan', 'none', 'null', '']: |
|
|
location_parts.append(country) |
|
|
|
|
|
if not location_parts: |
|
|
skipped_rows += 1 |
|
|
continue |
|
|
|
|
|
location_key = ", ".join(location_parts) |
|
|
|
|
|
|
|
|
metric_value = 1.0 |
|
|
if metric_col and metric_col in working_df.columns and metric_col != "None": |
|
|
try: |
|
|
val = row[metric_col] |
|
|
if pd.notna(val): |
|
|
metric_value = float(val) |
|
|
if metric_value <= 0: |
|
|
metric_value = 0.1 |
|
|
else: |
|
|
metric_value = 1.0 |
|
|
except (ValueError, TypeError): |
|
|
metric_value = 1.0 |
|
|
|
|
|
location_data.append({ |
|
|
'location_key': location_key, |
|
|
'city': city, |
|
|
'state': state, |
|
|
'country': country, |
|
|
'metric_value': metric_value |
|
|
}) |
|
|
|
|
|
except Exception as e: |
|
|
skipped_rows += 1 |
|
|
continue |
|
|
|
|
|
if not location_data: |
|
|
return None, f"⚠️ No valid location data found. Processed {len(working_df)} rows, skipped {skipped_rows} rows with invalid location data." |
|
|
|
|
|
progress(0.3, desc=f"Found {len(location_data)} valid locations, aggregating...") |
|
|
|
|
|
|
|
|
locations_df = pd.DataFrame(location_data) |
|
|
|
|
|
try: |
|
|
if metric_col and metric_col != "None": |
|
|
|
|
|
location_stats = locations_df.groupby('location_key').agg({ |
|
|
'metric_value': ['sum', 'count', 'mean'], |
|
|
'city': 'first', |
|
|
'state': 'first', |
|
|
'country': 'first' |
|
|
}).reset_index() |
|
|
location_stats.columns = ['location_key', 'total_metric', 'job_count', 'avg_metric', 'city', 'state', 'country'] |
|
|
location_stats['heatmap_weight'] = location_stats['avg_metric'] |
|
|
else: |
|
|
|
|
|
location_stats = locations_df.groupby('location_key').agg({ |
|
|
'city': 'first', |
|
|
'state': 'first', |
|
|
'country': 'first' |
|
|
}).reset_index() |
|
|
location_stats['job_count'] = locations_df.groupby('location_key').size().values |
|
|
location_stats['heatmap_weight'] = location_stats['job_count'] |
|
|
except Exception as e: |
|
|
return None, f"⚠️ Error aggregating location data: {str(e)}" |
|
|
|
|
|
progress(0.4, desc=f"Starting geocoding for {len(location_stats)} unique locations...") |
|
|
|
|
|
|
|
|
heat_data = [] |
|
|
successful_mappings = 0 |
|
|
failed_geocoding = 0 |
|
|
geocoding_errors = [] |
|
|
|
|
|
for idx, (_, row) in enumerate(location_stats.iterrows()): |
|
|
if successful_mappings >= max_points: |
|
|
break |
|
|
|
|
|
try: |
|
|
|
|
|
progress_val = 0.4 + (0.5 * idx / len(location_stats)) |
|
|
progress(progress_val, desc=f"Geocoding {idx+1}/{len(location_stats)}: {successful_mappings} successful") |
|
|
|
|
|
location_key = row['location_key'] |
|
|
weight = row['heatmap_weight'] |
|
|
|
|
|
if weight <= 0: |
|
|
failed_geocoding += 1 |
|
|
continue |
|
|
|
|
|
|
|
|
location = None |
|
|
try: |
|
|
location = geocode_cached(location_key) |
|
|
except Exception as geocode_error: |
|
|
geocoding_errors.append(f"{location_key}: {str(geocode_error)}") |
|
|
failed_geocoding += 1 |
|
|
continue |
|
|
|
|
|
if location and hasattr(location, 'latitude') and hasattr(location, 'longitude'): |
|
|
if location.latitude and location.longitude: |
|
|
heat_data.append([float(location.latitude), float(location.longitude), float(weight)]) |
|
|
successful_mappings += 1 |
|
|
else: |
|
|
failed_geocoding += 1 |
|
|
else: |
|
|
failed_geocoding += 1 |
|
|
|
|
|
|
|
|
time.sleep(0.05) |
|
|
|
|
|
except Exception as e: |
|
|
geocoding_errors.append(f"{location_key}: {str(e)}") |
|
|
failed_geocoding += 1 |
|
|
continue |
|
|
|
|
|
if not heat_data: |
|
|
error_details = f"No valid coordinates found. Geocoding errors: {geocoding_errors[:3]}" if geocoding_errors else "No valid coordinates found" |
|
|
return None, f"⚠️ {error_details}" |
|
|
|
|
|
progress(0.9, desc="Generating heatmap visualization...") |
|
|
|
|
|
try: |
|
|
|
|
|
|
|
|
lats = [point[0] for point in heat_data] |
|
|
lons = [point[1] for point in heat_data] |
|
|
center_lat = sum(lats) / len(lats) |
|
|
center_lon = sum(lons) / len(lons) |
|
|
|
|
|
m = folium.Map(location=[center_lat, center_lon], zoom_start=6) |
|
|
|
|
|
|
|
|
HeatMap( |
|
|
heat_data, |
|
|
min_opacity=0.3, |
|
|
max_zoom=18, |
|
|
radius=25, |
|
|
blur=20, |
|
|
gradient={0.2: 'blue', 0.5: 'lime', 0.7: 'orange', 1.0: 'red'} |
|
|
).add_to(m) |
|
|
|
|
|
|
|
|
weights = [point[2] for point in heat_data] |
|
|
min_weight = min(weights) |
|
|
max_weight = max(weights) |
|
|
avg_weight = sum(weights) / len(weights) |
|
|
|
|
|
|
|
|
if metric_col and metric_col != "None": |
|
|
legend_title = f"Heatmap: {metric_col}" |
|
|
legend_content = f""" |
|
|
<h4 style='margin:0; color: #2E86AB;'>{legend_title}</h4> |
|
|
<p style='margin:3px 0;'><span style='color:red'>■</span> High ({max_weight:.2f})</p> |
|
|
<p style='margin:3px 0;'><span style='color:orange'>■</span> Med-High</p> |
|
|
<p style='margin:3px 0;'><span style='color:lime'>■</span> Medium</p> |
|
|
<p style='margin:3px 0;'><span style='color:blue'>■</span> Low ({min_weight:.2f})</p> |
|
|
<small>Avg: {avg_weight:.2f} | Locations: {len(heat_data)}</small> |
|
|
""" |
|
|
else: |
|
|
legend_title = "Job Count Heatmap" |
|
|
legend_content = f""" |
|
|
<h4 style='margin:0; color: #2E86AB;'>{legend_title}</h4> |
|
|
<p style='margin:3px 0;'><span style='color:red'>■</span> High ({int(max_weight)} jobs)</p> |
|
|
<p style='margin:3px 0;'><span style='color:orange'>■</span> Med-High</p> |
|
|
<p style='margin:3px 0;'><span style='color:lime'>■</span> Medium</p> |
|
|
<p style='margin:3px 0;'><span style='color:blue'>■</span> Low ({int(min_weight)} jobs)</p> |
|
|
<small>Avg: {avg_weight:.1f} jobs | Locations: {len(heat_data)}</small> |
|
|
""" |
|
|
|
|
|
legend_html = f""" |
|
|
<div style='position: fixed; |
|
|
bottom: 50px; left: 50px; width: 220px; height: 120px; |
|
|
background-color: white; border:2px solid grey; z-index:9999; |
|
|
font-size:12px; padding: 8px; border-radius: 5px;'> |
|
|
{legend_content} |
|
|
</div> |
|
|
""" |
|
|
|
|
|
m.get_root().html.add_child(folium.Element(legend_html)) |
|
|
|
|
|
except Exception as e: |
|
|
return None, f"⚠️ Error creating map visualization: {str(e)}" |
|
|
|
|
|
progress(1, desc="Heatmap generation complete!") |
|
|
|
|
|
|
|
|
filter_info = f" (Filtered by {filter_col}: {filter_value})" if filter_col and filter_value and filter_col != "None" and filter_value != "All" else "" |
|
|
|
|
|
|
|
|
if metric_col and metric_col != "None": |
|
|
min_val_str = f"{min_weight:.2f}" |
|
|
max_val_str = f"{max_weight:.2f}" |
|
|
avg_val_str = f"{avg_weight:.2f}" |
|
|
else: |
|
|
min_val_str = f"{int(min_weight)}" |
|
|
max_val_str = f"{int(max_weight)}" |
|
|
avg_val_str = f"{avg_weight:.1f}" |
|
|
|
|
|
status_msg = f""" |
|
|
✅ **Heatmap Generated Successfully** |
|
|
|
|
|
📊 **Data Processing:** |
|
|
• Original Rows: {original_rows} |
|
|
• Valid Locations: {len(location_data)} |
|
|
• Unique Locations: {len(location_stats)} |
|
|
• Skipped Rows: {skipped_rows} |
|
|
{filter_info} |
|
|
|
|
|
🌍 **Geocoding Results:** |
|
|
• Successfully Mapped: {successful_mappings} |
|
|
• Failed to Geocode: {failed_geocoding} |
|
|
• Success Rate: {(successful_mappings/(successful_mappings+failed_geocoding)*100):.1f}% |
|
|
|
|
|
🎯 **Heatmap Configuration:** |
|
|
• Metric Used: {metric_col if metric_col and metric_col != "None" else "Job Count"} |
|
|
• City: {city_col} |
|
|
• State: {state_col if state_col and state_col != "None" else 'Not used'} |
|
|
• Country: {country_col if country_col and country_col != "None" else 'Not used'} |
|
|
|
|
|
📈 **Value Statistics:** |
|
|
• Min Value: {min_val_str} |
|
|
• Max Value: {max_val_str} |
|
|
• Average: {avg_val_str} |
|
|
|
|
|
🌈 **Color Mapping:** Red=High, Orange=Med-High, Green=Medium, Blue=Low |
|
|
""" |
|
|
|
|
|
return m._repr_html_(), status_msg |
|
|
|
|
|
except Exception as e: |
|
|
return None, f"⚠️ Unexpected error in heatmap generation: {str(e)}. Please check your data and try again." |
|
|
|
|
|
def generate_csv(self, df, filename_prefix="feed"): |
|
|
"""Generate CSV file for download""" |
|
|
if df is None or df.empty: |
|
|
return None |
|
|
|
|
|
temp_file = tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False, prefix='') |
|
|
temp_file.close() |
|
|
|
|
|
final_filename = temp_file.name.replace(os.path.basename(temp_file.name), f"{filename_prefix}.csv") |
|
|
df.to_csv(final_filename, index=False) |
|
|
|
|
|
return final_filename |
|
|
|
|
|
def get_preview(self, df, max_rows=10): |
|
|
"""Get a preview of the dataframe""" |
|
|
if df is None or df.empty: |
|
|
return None |
|
|
|
|
|
preview_df = df.head(max_rows).copy() |
|
|
|
|
|
for col in preview_df.select_dtypes(include=['object']).columns: |
|
|
preview_df[col] = preview_df[col].astype(str).apply( |
|
|
lambda x: x[:50] + '...' if len(str(x)) > 50 else x |
|
|
) |
|
|
|
|
|
return preview_df |
|
|
|
|
|
|
|
|
feed_reader = FeedReader() |
|
|
|
|
|
def create_enhanced_gradio_app(): |
|
|
with gr.Blocks(title="Enhanced Feed Reader & Analyzer", theme=gr.themes.Soft()) as app: |
|
|
with gr.Row(): |
|
|
with gr.Column(scale=4): |
|
|
gr.Markdown(""" |
|
|
# 📡 Enhanced Feed Reader & Analyzer |
|
|
|
|
|
Load and analyze XML or JSON feeds with advanced multi-filtering and interactive heatmap visualization. |
|
|
""") |
|
|
|
|
|
with gr.Tab("📥 Load Feed"): |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
url_input = gr.Textbox( |
|
|
label="Feed URL", |
|
|
placeholder="https://example.com/feed.xml", |
|
|
lines=1 |
|
|
) |
|
|
job_tag_input = gr.Textbox( |
|
|
label="XML Job Tag (for XML feeds only)", |
|
|
value="job", |
|
|
placeholder="job, item, entry, etc." |
|
|
) |
|
|
load_btn = gr.Button("🔄 Load Feed", variant="primary") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
summary_output = gr.Markdown(label="Summary") |
|
|
with gr.Column(): |
|
|
metadata_output = gr.Dataframe( |
|
|
label="📊 Columns Metadata", |
|
|
visible=True, |
|
|
interactive=False, |
|
|
wrap=False |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
preview_dataframe = gr.Dataframe( |
|
|
label="Data Preview", |
|
|
visible=True, |
|
|
interactive=False, |
|
|
wrap=False, |
|
|
row_count=(1, "dynamic") |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
csv_download = gr.File(label="📥 Download Full Dataset (CSV)", visible=True) |
|
|
|
|
|
column_choices_state = gr.State([]) |
|
|
|
|
|
def process_and_download(url, job_tag): |
|
|
summary, df_processed, csv_file, preview_df, column_choices, metadata_df = feed_reader.process_feed(url, job_tag) |
|
|
return summary, metadata_df, preview_df, csv_file, column_choices |
|
|
|
|
|
load_btn.click( |
|
|
process_and_download, |
|
|
inputs=[url_input, job_tag_input], |
|
|
outputs=[summary_output, metadata_output, preview_dataframe, csv_download, column_choices_state] |
|
|
) |
|
|
|
|
|
with gr.Tab("🔍 Advanced Filter Data"): |
|
|
gr.Markdown("### 🎯 Multi-Column Filtering") |
|
|
gr.Markdown("Apply multiple filters simultaneously to narrow down your dataset:") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
gr.Markdown("**Primary Filters:**") |
|
|
with gr.Column(): |
|
|
filter1_col = gr.Dropdown( |
|
|
label="Filter 1 - Column", |
|
|
choices=[], |
|
|
value=None |
|
|
) |
|
|
filter1_val = gr.Dropdown( |
|
|
label="Filter 1 - Value", |
|
|
choices=[], |
|
|
value=None |
|
|
) |
|
|
with gr.Column(): |
|
|
filter2_col = gr.Dropdown( |
|
|
label="Filter 2 - Column", |
|
|
choices=[], |
|
|
value=None |
|
|
) |
|
|
filter2_val = gr.Dropdown( |
|
|
label="Filter 2 - Value", |
|
|
choices=[], |
|
|
value=None |
|
|
) |
|
|
|
|
|
with gr.Column(): |
|
|
gr.Markdown("**Additional Filters:**") |
|
|
with gr.Column(): |
|
|
filter3_col = gr.Dropdown( |
|
|
label="Filter 3 - Column", |
|
|
choices=[], |
|
|
value=None |
|
|
) |
|
|
filter3_val = gr.Dropdown( |
|
|
label="Filter 3 - Value", |
|
|
choices=[], |
|
|
value=None |
|
|
) |
|
|
with gr.Column(): |
|
|
filter4_col = gr.Dropdown( |
|
|
label="Filter 4 - Column", |
|
|
choices=[], |
|
|
value=None |
|
|
) |
|
|
filter4_val = gr.Dropdown( |
|
|
label="Filter 4 - Value", |
|
|
choices=[], |
|
|
value=None |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
multi_filter_btn = gr.Button("🔍 Apply Multi-Filter", variant="primary", size="lg") |
|
|
clear_filters_btn = gr.Button("🧹 Clear All Filters", variant="secondary") |
|
|
|
|
|
with gr.Row(): |
|
|
multi_filter_summary = gr.Markdown(label="Multi-Filter Results") |
|
|
|
|
|
with gr.Row(): |
|
|
multi_filtered_dataframe = gr.Dataframe( |
|
|
label="Filtered Data", |
|
|
visible=True, |
|
|
interactive=False, |
|
|
wrap=False, |
|
|
row_count=(1, "dynamic") |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
multi_filtered_csv = gr.File(label="📥 Download Filtered Data (CSV)", visible=True) |
|
|
|
|
|
|
|
|
def update_all_filter_columns(column_choices): |
|
|
choices_with_none = ["None"] + column_choices if column_choices else ["None"] |
|
|
return ( |
|
|
gr.Dropdown(choices=choices_with_none, value="None"), |
|
|
gr.Dropdown(choices=choices_with_none, value="None"), |
|
|
gr.Dropdown(choices=choices_with_none, value="None"), |
|
|
gr.Dropdown(choices=choices_with_none, value="None") |
|
|
) |
|
|
|
|
|
def update_filter_values(selected_column): |
|
|
if not selected_column or selected_column == "None" or feed_reader.df is None: |
|
|
return gr.Dropdown(choices=["None"], value="None") |
|
|
|
|
|
unique_values = feed_reader.get_column_unique_values(selected_column) |
|
|
return gr.Dropdown(choices=unique_values, value="All" if unique_values else "None") |
|
|
|
|
|
|
|
|
column_choices_state.change( |
|
|
update_all_filter_columns, |
|
|
inputs=[column_choices_state], |
|
|
outputs=[filter1_col, filter2_col, filter3_col, filter4_col] |
|
|
) |
|
|
|
|
|
|
|
|
filter1_col.change(update_filter_values, inputs=[filter1_col], outputs=[filter1_val]) |
|
|
filter2_col.change(update_filter_values, inputs=[filter2_col], outputs=[filter2_val]) |
|
|
filter3_col.change(update_filter_values, inputs=[filter3_col], outputs=[filter3_val]) |
|
|
filter4_col.change(update_filter_values, inputs=[filter4_col], outputs=[filter4_val]) |
|
|
|
|
|
|
|
|
def apply_multi_filters(col1, val1, col2, val2, col3, val3, col4, val4, progress=gr.Progress()): |
|
|
filters = {} |
|
|
|
|
|
if col1 and col1 != "None" and val1 and val1 != "None": |
|
|
filters[col1] = val1 |
|
|
if col2 and col2 != "None" and val2 and val2 != "None": |
|
|
filters[col2] = val2 |
|
|
if col3 and col3 != "None" and val3 and val3 != "None": |
|
|
filters[col3] = val3 |
|
|
if col4 and col4 != "None" and val4 and val4 != "None": |
|
|
filters[col4] = val4 |
|
|
|
|
|
return feed_reader.apply_multiple_filters(filters, progress) |
|
|
|
|
|
def clear_all_filters(): |
|
|
return ( |
|
|
"Filters cleared - select columns and values to filter data", |
|
|
pd.DataFrame(), |
|
|
None, |
|
|
gr.Dropdown(value="None"), |
|
|
gr.Dropdown(value="None"), |
|
|
gr.Dropdown(value="None"), |
|
|
gr.Dropdown(value="None"), |
|
|
gr.Dropdown(value="None"), |
|
|
gr.Dropdown(value="None"), |
|
|
gr.Dropdown(value="None"), |
|
|
gr.Dropdown(value="None") |
|
|
) |
|
|
|
|
|
multi_filter_btn.click( |
|
|
apply_multi_filters, |
|
|
inputs=[filter1_col, filter1_val, filter2_col, filter2_val, |
|
|
filter3_col, filter3_val, filter4_col, filter4_val], |
|
|
outputs=[multi_filtered_dataframe, multi_filter_summary, multi_filtered_csv] |
|
|
) |
|
|
|
|
|
clear_filters_btn.click( |
|
|
clear_all_filters, |
|
|
outputs=[multi_filter_summary, multi_filtered_dataframe, multi_filtered_csv, |
|
|
filter1_col, filter1_val, filter2_col, filter2_val, |
|
|
filter3_col, filter3_val, filter4_col, filter4_val] |
|
|
) |
|
|
|
|
|
with gr.Tab("📊 Statistics"): |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
gr.Markdown("### 📋 Basic Column Statistics") |
|
|
basic_stats_btn = gr.Button("📊 Generate Column Statistics", variant="primary") |
|
|
basic_stats_output = gr.Dataframe(label="Column Statistics") |
|
|
|
|
|
with gr.Column(): |
|
|
gr.Markdown("### 🎯 Weighted Statistics by Group") |
|
|
|
|
|
|
|
|
stats_group_column = gr.Radio( |
|
|
label="Group By Column (company, client, etc.)", |
|
|
choices=[], |
|
|
value=None |
|
|
) |
|
|
|
|
|
|
|
|
with gr.Row(): |
|
|
reference_column = gr.Dropdown( |
|
|
label="Reference ID Column", |
|
|
choices=[], |
|
|
value=None |
|
|
) |
|
|
cpa_column = gr.Dropdown( |
|
|
label="CPA Goal Column", |
|
|
choices=[], |
|
|
value=None |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
cpc_column = gr.Dropdown( |
|
|
label="Payouts: CPC/CPA Columns", |
|
|
choices=[], |
|
|
value=None |
|
|
) |
|
|
|
|
|
weighted_stats_btn = gr.Button("🧮 Calculate Weighted Statistics", variant="secondary") |
|
|
weighted_stats_summary = gr.Markdown(label="Weighted Stats Summary") |
|
|
|
|
|
with gr.Row(): |
|
|
weighted_stats_output = gr.Dataframe( |
|
|
label="📈 Weighted Statistics by Group", |
|
|
visible=True, |
|
|
interactive=False, |
|
|
wrap=False |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
weighted_stats_csv = gr.File(label="📥 Download Weighted Statistics (CSV)", visible=True) |
|
|
|
|
|
|
|
|
def update_all_stats_choices(column_choices): |
|
|
|
|
|
exclude_columns = ['last_update'] |
|
|
grouping_choices = [col for col in column_choices if col not in exclude_columns] |
|
|
|
|
|
|
|
|
metric_choices = ["None"] + column_choices |
|
|
|
|
|
|
|
|
reference_default = "None" |
|
|
cpa_default = "None" |
|
|
cpc_default = "None" |
|
|
|
|
|
for col in column_choices: |
|
|
col_lower = col.lower() |
|
|
if 'reference' in col_lower or 'req' in col_lower or col_lower == 'referencenumber': |
|
|
reference_default = col |
|
|
elif 'cpa' in col_lower or 'goal' in col_lower: |
|
|
cpa_default = col |
|
|
elif 'cpc' in col_lower or 'sponsored' in col_lower or 'cost' in col_lower or 'payout' in col_lower: |
|
|
cpc_default = col |
|
|
|
|
|
return ( |
|
|
gr.Radio(choices=grouping_choices, value=grouping_choices[0] if grouping_choices else None), |
|
|
gr.Dropdown(choices=metric_choices, value=reference_default), |
|
|
gr.Dropdown(choices=metric_choices, value=cpa_default), |
|
|
gr.Dropdown(choices=metric_choices, value=cpc_default) |
|
|
) |
|
|
|
|
|
|
|
|
column_choices_state.change( |
|
|
update_all_stats_choices, |
|
|
inputs=[column_choices_state], |
|
|
outputs=[stats_group_column, reference_column, cpa_column, cpc_column] |
|
|
) |
|
|
|
|
|
|
|
|
def get_column_stats(): |
|
|
"""Get statistics for each column""" |
|
|
if feed_reader.df is None: |
|
|
return pd.DataFrame() |
|
|
|
|
|
try: |
|
|
stats = [] |
|
|
for column in feed_reader.df.columns: |
|
|
unique_values = feed_reader.df[column].nunique() |
|
|
null_count = feed_reader.df[column].isnull().sum() |
|
|
total_count = len(feed_reader.df) |
|
|
|
|
|
|
|
|
if feed_reader.df[column].dtype == 'object': |
|
|
top_values = feed_reader.df[column].value_counts().head(5) |
|
|
top_values_str = ", ".join([f"{val} ({count})" for val, count in top_values.items()]) |
|
|
else: |
|
|
top_values_str = f"Min: {feed_reader.df[column].min()}, Max: {feed_reader.df[column].max()}" |
|
|
|
|
|
stats.append({ |
|
|
'Column': column, |
|
|
'Unique Values': unique_values, |
|
|
'Null Values': null_count, |
|
|
'Data Type': str(feed_reader.df[column].dtype), |
|
|
'Top Values/Range': top_values_str |
|
|
}) |
|
|
|
|
|
stats_df = pd.DataFrame(stats) |
|
|
return stats_df |
|
|
|
|
|
except Exception as e: |
|
|
return pd.DataFrame() |
|
|
|
|
|
basic_stats_btn.click( |
|
|
get_column_stats, |
|
|
outputs=[basic_stats_output] |
|
|
) |
|
|
|
|
|
|
|
|
def get_weighted_stats_by_group(group_column, reference_col=None, cpa_col=None, cpc_col=None): |
|
|
"""Get weighted statistics grouped by specified column with flexible column selection""" |
|
|
if feed_reader.df is None: |
|
|
return pd.DataFrame(), "Please load a feed first" |
|
|
|
|
|
|
|
|
if group_column not in feed_reader.df.columns: |
|
|
available_columns = [col for col in feed_reader.df.columns if col != 'last_update'] |
|
|
return pd.DataFrame(), f"Column '{group_column}' not found. Available columns: {', '.join(available_columns)}" |
|
|
|
|
|
|
|
|
selected_columns = [col for col in [reference_col, cpa_col, cpc_col] if col is not None] |
|
|
missing_columns = [col for col in selected_columns if col not in feed_reader.df.columns] |
|
|
|
|
|
if missing_columns: |
|
|
available_columns = list(feed_reader.df.columns) |
|
|
return pd.DataFrame(), f"Missing selected columns: {', '.join(missing_columns)}. Available columns: {', '.join(available_columns)}" |
|
|
|
|
|
try: |
|
|
def calculate_group_stats(group_df): |
|
|
results = {} |
|
|
|
|
|
|
|
|
results["total_postings"] = int(len(group_df)) |
|
|
|
|
|
|
|
|
if reference_col: |
|
|
results["unique_references"] = int(group_df[reference_col].nunique()) |
|
|
|
|
|
|
|
|
if cpa_col: |
|
|
cpa_series = pd.to_numeric(group_df[cpa_col], errors='coerce') |
|
|
results["mean_cpa_goal"] = round(cpa_series.mean(), 2) if not cpa_series.isna().all() else 0 |
|
|
results["min_cpa"] = round(cpa_series.min(), 2) if not cpa_series.isna().all() else 0 |
|
|
results["max_cpa"] = round(cpa_series.max(), 2) if not cpa_series.isna().all() else 0 |
|
|
|
|
|
|
|
|
if cpc_col: |
|
|
cpc_series = pd.to_numeric(group_df[cpc_col], errors='coerce') |
|
|
results["mean_payouts"] = round(cpc_series.mean(), 2) if not cpc_series.isna().all() else 0 |
|
|
results["min_payouts"] = round(cpc_series.min(), 2) if not cpc_series.isna().all() else 0 |
|
|
results["max_payouts"] = round(cpc_series.max(), 2) if not cpc_series.isna().all() else 0 |
|
|
|
|
|
|
|
|
if cpa_col and cpc_col: |
|
|
mean_cpa = results.get("mean_cpa_goal", 0) |
|
|
mean_payouts = results.get("mean_payouts", 0) |
|
|
if mean_cpa > 0 and mean_payouts > 0: |
|
|
results["target_cvr"] = round((mean_payouts/mean_cpa)*100, 2) |
|
|
else: |
|
|
results["target_cvr"] = 0 |
|
|
|
|
|
|
|
|
pacific_tz = pytz.timezone("America/Los_Angeles") |
|
|
now_pst = datetime.datetime.now(pytz.utc).astimezone(pacific_tz) |
|
|
results["last_update"] = now_pst.strftime("%Y-%m-%d %H:%M:%S %Z") |
|
|
|
|
|
return pd.Series(results) |
|
|
|
|
|
|
|
|
grouped_stats = feed_reader.df.groupby(group_column).apply(calculate_group_stats).reset_index() |
|
|
|
|
|
|
|
|
if "unique_references" in grouped_stats.columns: |
|
|
grouped_stats = grouped_stats.sort_values('unique_references', ascending=False) |
|
|
else: |
|
|
grouped_stats = grouped_stats.sort_values('total_postings', ascending=False) |
|
|
|
|
|
return grouped_stats, "Success" |
|
|
|
|
|
except Exception as e: |
|
|
return pd.DataFrame(), f"Error calculating weighted statistics: {str(e)}" |
|
|
|
|
|
|
|
|
def calculate_weighted_stats(group_column, reference_col, cpa_col, cpc_col): |
|
|
if not group_column: |
|
|
return "Please select a grouping column", None, None |
|
|
|
|
|
|
|
|
reference_col = None if reference_col == "None" else reference_col |
|
|
cpa_col = None if cpa_col == "None" else cpa_col |
|
|
cpc_col = None if cpc_col == "None" else cpc_col |
|
|
|
|
|
|
|
|
if not reference_col and not cpa_col and not cpc_col: |
|
|
return "Please select at least one metric column (Reference ID, CPA Goal, or Payouts)", None, None |
|
|
|
|
|
weighted_df, message = get_weighted_stats_by_group(group_column, reference_col, cpa_col, cpc_col) |
|
|
|
|
|
if not weighted_df.empty: |
|
|
metrics_used = [] |
|
|
if reference_col: metrics_used.append(f"Reference: {reference_col}") |
|
|
if cpa_col: metrics_used.append(f"CPA: {cpa_col}") |
|
|
if cpc_col: metrics_used.append(f"Payouts: {cpc_col}") |
|
|
|
|
|
summary = f""" |
|
|
🎯 **Weighted Statistics Results** |
|
|
|
|
|
✅ **Status:** {message} |
|
|
📊 **Groups:** {len(weighted_df)} |
|
|
🔢 **Grouped by:** {group_column} |
|
|
📈 **Metrics Used:** {' | '.join(metrics_used)} |
|
|
|
|
|
📊 **Available Metrics:** |
|
|
• **Unique References**: Count of unique IDs per group (if Reference ID selected) |
|
|
• **Total Postings**: Total rows/postings per group |
|
|
• **Mean CPA/Payouts**: Average values across all postings (if columns selected) |
|
|
• **Target CVR**: (Mean Payouts / Mean CPA) × 100 (if both selected) |
|
|
• **Min/Max Ranges**: Minimum and maximum values per group |
|
|
|
|
|
💡 **Note:** Only metrics with selected columns will be calculated and displayed. |
|
|
""" |
|
|
csv_file = feed_reader.generate_csv(weighted_df, f"weighted_stats_{group_column}") |
|
|
return summary, weighted_df, csv_file |
|
|
else: |
|
|
return f"❌ **Error:** {message}", None, None |
|
|
|
|
|
weighted_stats_btn.click( |
|
|
calculate_weighted_stats, |
|
|
inputs=[stats_group_column, reference_column, cpa_column, cpc_column], |
|
|
outputs=[weighted_stats_summary, weighted_stats_output, weighted_stats_csv] |
|
|
) |
|
|
|
|
|
with gr.Tab("🌍 Interactive Heatmap"): |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
gr.Markdown("### 📍 Heatmap Configuration") |
|
|
gr.Markdown("Create heatmaps based on job metrics and locations:") |
|
|
|
|
|
city_col = gr.Dropdown( |
|
|
label="🏙️ City Column (Required)", |
|
|
choices=[], |
|
|
value=None, |
|
|
info="Column containing city names" |
|
|
) |
|
|
state_col = gr.Dropdown( |
|
|
label="🗺️ State/Province Column (Optional)", |
|
|
choices=[], |
|
|
value=None, |
|
|
info="Column containing state or province names" |
|
|
) |
|
|
country_col = gr.Dropdown( |
|
|
label="🌍 Country Column (Optional)", |
|
|
choices=[], |
|
|
value=None, |
|
|
info="Column containing country names" |
|
|
) |
|
|
|
|
|
with gr.Column(): |
|
|
gr.Markdown("### 🎯 Heatmap Metrics & Filters") |
|
|
|
|
|
metric_col = gr.Dropdown( |
|
|
label="📊 Metric Column (Optional)", |
|
|
choices=[], |
|
|
value=None, |
|
|
info="Column to use for heatmap intensity (CPC, CPA, etc.). Leave empty for job count." |
|
|
) |
|
|
|
|
|
filter_col = gr.Dropdown( |
|
|
label="🔍 Filter Column (Optional)", |
|
|
choices=[], |
|
|
value=None, |
|
|
info="Column to filter data before creating heatmap (Company, Client, etc.)" |
|
|
) |
|
|
|
|
|
filter_val = gr.Dropdown( |
|
|
label="🎯 Filter Value", |
|
|
choices=[], |
|
|
value=None, |
|
|
info="Specific value to filter by" |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
heatmap_btn = gr.Button("🔥 Generate Heatmap", variant="primary", size="lg") |
|
|
clear_heatmap_btn = gr.Button("🧹 Clear Heatmap", variant="secondary") |
|
|
|
|
|
with gr.Row(): |
|
|
heatmap_status = gr.Markdown() |
|
|
|
|
|
with gr.Row(): |
|
|
heatmap_output = gr.HTML(label="Interactive Job Heatmap") |
|
|
|
|
|
def update_heatmap_choices(column_choices): |
|
|
if not column_choices: |
|
|
empty_choices = gr.Dropdown(choices=[]) |
|
|
return (empty_choices, empty_choices, empty_choices, empty_choices, empty_choices, empty_choices) |
|
|
|
|
|
optional_choices = ["None"] + column_choices |
|
|
|
|
|
|
|
|
city_default = None |
|
|
state_default = "None" |
|
|
country_default = "None" |
|
|
metric_default = "None" |
|
|
filter_default = "None" |
|
|
|
|
|
for col in column_choices: |
|
|
col_lower = col.lower() |
|
|
|
|
|
if any(term in col_lower for term in ['city', 'ciudad', 'ville', 'location']): |
|
|
city_default = col |
|
|
elif any(term in col_lower for term in ['state', 'province', 'region', 'estado']): |
|
|
state_default = col |
|
|
elif any(term in col_lower for term in ['country', 'nation', 'pais', 'pays']): |
|
|
country_default = col |
|
|
elif any(term in col_lower for term in ['cpc', 'cpa', 'cost', 'payout', 'bid', 'sponsored']): |
|
|
metric_default = col |
|
|
elif any(term in col_lower for term in ['company', 'client', 'advertiser', 'brand']): |
|
|
filter_default = col |
|
|
|
|
|
return ( |
|
|
gr.Dropdown(choices=column_choices, value=city_default), |
|
|
gr.Dropdown(choices=optional_choices, value=state_default), |
|
|
gr.Dropdown(choices=optional_choices, value=country_default), |
|
|
gr.Dropdown(choices=optional_choices, value=metric_default), |
|
|
gr.Dropdown(choices=optional_choices, value=filter_default), |
|
|
gr.Dropdown(choices=["All"], value="All") |
|
|
) |
|
|
|
|
|
def update_filter_values_heatmap(selected_column): |
|
|
if not selected_column or selected_column == "None" or feed_reader.df is None: |
|
|
return gr.Dropdown(choices=["All"], value="All") |
|
|
|
|
|
unique_values = feed_reader.get_column_unique_values(selected_column) |
|
|
return gr.Dropdown(choices=unique_values, value="All" if unique_values else "All") |
|
|
|
|
|
column_choices_state.change( |
|
|
update_heatmap_choices, |
|
|
inputs=[column_choices_state], |
|
|
outputs=[city_col, state_col, country_col, metric_col, filter_col, filter_val] |
|
|
) |
|
|
|
|
|
filter_col.change( |
|
|
update_filter_values_heatmap, |
|
|
inputs=[filter_col], |
|
|
outputs=[filter_val] |
|
|
) |
|
|
|
|
|
def generate_heatmap(city_col, state_col, country_col, metric_col, filter_col, filter_val, progress=gr.Progress()): |
|
|
if not city_col: |
|
|
return "❌ Please select a city column", None |
|
|
|
|
|
|
|
|
state_col = None if state_col == "None" else state_col |
|
|
country_col = None if country_col == "None" else country_col |
|
|
metric_col = None if metric_col == "None" else metric_col |
|
|
filter_col = None if filter_col == "None" else filter_col |
|
|
filter_val = None if filter_val == "All" else filter_val |
|
|
|
|
|
heatmap_html, msg = feed_reader.generate_heatmap( |
|
|
city_col, state_col, country_col, metric_col, |
|
|
filter_col, filter_val, progress=progress |
|
|
) |
|
|
return msg, heatmap_html |
|
|
|
|
|
def clear_heatmap(): |
|
|
return "🧹 Heatmap cleared", "" |
|
|
|
|
|
heatmap_btn.click( |
|
|
generate_heatmap, |
|
|
inputs=[city_col, state_col, country_col, metric_col, filter_col, filter_val], |
|
|
outputs=[heatmap_status, heatmap_output] |
|
|
) |
|
|
|
|
|
clear_heatmap_btn.click( |
|
|
clear_heatmap, |
|
|
outputs=[heatmap_status, heatmap_output] |
|
|
) |
|
|
|
|
|
gr.Markdown(""" |
|
|
--- |
|
|
### 📝 Enhanced Features: |
|
|
|
|
|
**🔥 Interactive Heatmap Visualization:** |
|
|
- Heat intensity based on selected metrics (CPC, CPA, job count, etc.) |
|
|
- Real-time filtering by company, client, or any column |
|
|
- Color-coded intensity: Red (high) to Blue (low) |
|
|
- Progress tracking during geocoding and map generation |
|
|
- Dynamic legend with actual metric ranges |
|
|
|
|
|
**🎯 Heatmap Configuration Options:** |
|
|
- **Metric Column**: Choose CPC, CPA, or any numeric column for intensity |
|
|
- **Filter Options**: Pre-filter data by company, client, etc. |
|
|
- **Location Mapping**: City (required), State, Country (optional) |
|
|
- **Automatic Detection**: Smart column name detection |
|
|
|
|
|
**🔍 Advanced Multi-Filtering:** |
|
|
- Apply up to 4 simultaneous filters on different columns |
|
|
- Real-time progress tracking during filter operations |
|
|
- Smart dropdown population with available values |
|
|
- Clear filter functionality |
|
|
|
|
|
**📊 Enhanced Data Processing:** |
|
|
- Improved error handling and memory management |
|
|
- Optimized for large datasets with progress indicators |
|
|
- Smart column auto-detection for common field names |
|
|
- Geocoding with rate limiting to prevent API issues |
|
|
|
|
|
**💡 Heatmap Usage Examples:** |
|
|
- **CPC Heatmap**: See where highest-paying jobs are located |
|
|
- **Job Count Heatmap**: Visualize job density by location |
|
|
- **Filtered Views**: Show only specific company/client job distributions |
|
|
- **Performance Analysis**: Compare metrics across geographic regions |
|
|
|
|
|
**🌈 Heatmap Color Legend:** |
|
|
- **Red**: Highest values (top 20% of metric range) |
|
|
- **Orange**: High values (60-80% of range) |
|
|
- **Lime/Green**: Medium values (40-60% of range) |
|
|
- **Blue**: Lower values (bottom 40% of range) |
|
|
""") |
|
|
|
|
|
return app |
|
|
|
|
|
if __name__ == "__main__": |
|
|
app = create_enhanced_gradio_app() |
|
|
app.launch(share=True, debug=True) |