Spaces:

rogergs94
/

feed

Running

App Files Files Community

rogergs94 commited on Sep 19, 2025

Commit

7dabfd5

verified ·

1 Parent(s): d3a234f

feed_map updated

Browse files

Added the map tool plus the different filter options in the Filter Data tab

Files changed (1) hide show

app.py +598 -394

app.py CHANGED Viewed

@@ -13,6 +13,7 @@ import gradio as gr
 import os
 import tempfile
 import pytz
 geolocator = Nominatim(user_agent="feed_reader_app")
@@ -44,13 +45,6 @@ class FeedReader:
     def load_feed_to_dataframe(self, url, job_tag="job"):
         """
         Load an XML feed (.xml or .xml.gz) or JSON from a URL and convert to DataFrame.
-        Args:
-            url (str): URL of the feed
-            job_tag (str): Name of the XML tag representing each job (only for XML feeds)
-        Returns:
-            pd.DataFrame: DataFrame containing the feed data
         """
         try:
             response = requests.get(url, timeout=30)
@@ -71,10 +65,8 @@ class FeedReader:
                 elif isinstance(data, dict) and "jobs" in data:
                     df = pd.DataFrame(data["jobs"])
                 else:
-                    # Try to convert any other dict structure to DataFrame
                     df = pd.DataFrame([data] if not isinstance(data, list) else data)
-                # Truncate and clean
                 df = df.applymap(lambda x: self.truncate(x) if isinstance(x, str) else x)
                 df = self.clean_invalid_numbers(df)
                 return df
@@ -90,7 +82,6 @@ class FeedReader:
             items = root.findall(f".//{job_tag}")
             if not items:
-                # Try common alternative tag names
                 common_tags = ["item", "entry", "record", "row"]
                 for tag in common_tags:
                     items = root.findall(f".//{tag}")
@@ -98,7 +89,7 @@ class FeedReader:
                         break
             if not items:
-                return pd.DataFrame(), f"No <{job_tag}> elements found in the XML. Tried common alternatives too."
             jobs_data = []
             for job in items:
@@ -109,21 +100,14 @@ class FeedReader:
             df = self.clean_invalid_numbers(df)
             return df, "Success"
-        except requests.exceptions.RequestException as e:
-            return pd.DataFrame(), f"Request error: {str(e)}"
-        except ET.ParseError as e:
-            return pd.DataFrame(), f"XML parsing error: {str(e)}"
-        except ValueError as e:
-            return pd.DataFrame(), f"JSON parsing error: {str(e)}"
         except Exception as e:
-            return pd.DataFrame(), f"Unexpected error: {str(e)}"
     def process_feed(self, url, job_tag="job"):
         """Main function to process feed and return results"""
         if not url.strip():
             return "Please enter a valid URL", None, "", "", []
-        # Load the feed
         result = self.load_feed_to_dataframe(url.strip(), job_tag.strip())
         if isinstance(result, tuple):
@@ -134,27 +118,19 @@ class FeedReader:
             df = result
             message = "Success"
-        # Store the dataframe
         self.df = df
-        # Add timestamp
         df['last_update'] = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
-        # Fill NaN values with 0 (with future-proof pandas handling)
-        df_processed = df.fillna(0).infer_objects(copy=False)
-        # Generate summary
         summary = f"""
 📊 **Feed Processing Results**
 ✅ **Status:** {message}
 📋 **Rows:** {df_processed.shape[0]:,}
 📝 **Columns:** {df_processed.shape[1]}
         """
-        # Create metadata dataframe
         metadata_df = pd.DataFrame({
             'Column Name': df_processed.columns.tolist(),
             'Data Type': [str(df_processed[col].dtype) for col in df_processed.columns],
@@ -162,271 +138,315 @@ class FeedReader:
             'Null Values': [df_processed[col].isnull().sum() for col in df_processed.columns]
         })
-        # Get column choices for filter tab
         column_choices = df_processed.columns.tolist()
         return summary, df_processed, self.generate_csv(df_processed, "feed"), self.get_preview(df_processed), column_choices, metadata_df
     def get_column_unique_values(self, column_name):
         """Get unique values for a specific column"""
-        if self.df is None:
             return []
-        if column_name not in self.df.columns:
-            return []
-        # Get unique values and convert to string, sort them
         unique_values = self.df[column_name].dropna().astype(str).unique()
         unique_values = sorted([str(val) for val in unique_values if str(val) != 'nan'])
-        # Add "All" option at the beginning
         return ["All"] + unique_values
-    def filter_by_column(self, column_name, filter_value):
-        """Filter dataframe by column value"""
         if self.df is None:
-            return "Please load a feed first", None, ""
-        if not column_name or not filter_value:
-            return "Please specify both column name and filter value", None, ""
-        try:
-            # Check if column exists (case insensitive)
-            available_columns = self.df.columns.tolist()
-            matching_columns = [col for col in available_columns if col.lower() == column_name.lower()]
-            if not matching_columns:
-                return f"Column '{column_name}' not found. Available columns: {', '.join(available_columns)}", None, ""
-            actual_column = matching_columns[0]
-            # If "All" is selected, return the entire dataframe
-            if filter_value == "All":
-                filtered_df = self.df.copy()
-                filtered_df = filtered_df.fillna(0).infer_objects(copy=False)
-                # Truncate long columns for display only
-                display_df = self.truncate_display_columns(filtered_df.copy())
-                summary = f"""
-🔍 **Filtered Results**
 📋 **Total Rows:** {filtered_df.shape[0]:,}
-🎯 **Filter:** Showing all records from column '{actual_column}'
-                """
-                return summary, display_df, self.generate_csv(filtered_df, f"all_{actual_column}")
-            # Filter the dataframe for specific value
-            if self.df[actual_column].dtype == 'object':  # String column
-                # Exact match for dropdown selection
-                filtered_df = self.df[self.df[actual_column].astype(str) == str(filter_value)]
-            else:  # Numeric column
                 try:
-                    filter_val_numeric = float(filter_value)
-                    filtered_df = self.df[self.df[actual_column] == filter_val_numeric]
                 except ValueError:
-                    filtered_df = self.df[self.df[actual_column].astype(str) == str(filter_value)]
-            if filtered_df.empty:
-                return f"No records found matching '{filter_value}' in column '{actual_column}'", pd.DataFrame(), ""
-            filtered_df = filtered_df.fillna(0).infer_objects(copy=False)
-            # Truncate long columns for display only
-            display_df = self.truncate_display_columns(filtered_df.copy())
-            summary = f"""
-🔍 **Filtered Results**
 📋 **Matching Rows:** {filtered_df.shape[0]:,}
-🎯 **Filter:** {actual_column} = '{filter_value}'
-            """
-            return summary, display_df, self.generate_csv(filtered_df, f"filtered_{filter_value}")
-        except Exception as e:
-            return f"Error filtering data: {str(e)}", pd.DataFrame(), ""
     def truncate_display_columns(self, df):
-        """Truncate long columns for better display in DataFrames"""
         display_df = df.copy()
-        # Define columns that typically have long content
         long_content_columns = ['url', 'description', 'link', 'content', 'summary', 'text']
         for col in display_df.select_dtypes(include=['object']).columns:
-            # Apply more aggressive truncation to known long columns
             if any(long_col in col.lower() for long_col in long_content_columns):
                 display_df[col] = display_df[col].astype(str).apply(
                     lambda x: x[:30] + '...' if len(str(x)) > 30 else x
                 )
             else:
-                # Standard truncation for other text columns
                 display_df[col] = display_df[col].astype(str).apply(
                     lambda x: x[:50] + '...' if len(str(x)) > 50 else x
                 )
         return display_df
-    def get_column_stats(self):
-        """Get statistics for each column"""
-        if self.df is None:
-            return "Please load a feed first"
-        try:
-            stats = []
-            for column in self.df.columns:
-                unique_values = self.df[column].nunique()
-                null_count = self.df[column].isnull().sum()
-                total_count = len(self.df)
-                # Get top 5 most common values
-                if self.df[column].dtype == 'object':
-                    top_values = self.df[column].value_counts().head(5)
-                    top_values_str = ", ".join([f"{val} ({count})" for val, count in top_values.items()])
-                else:
-                    top_values_str = f"Min: {self.df[column].min()}, Max: {self.df[column].max()}"
-                stats.append({
-                    'Column': column,
-                    'Unique Values': unique_values,
-                    'Null Values': null_count,
-                    'Data Type': str(self.df[column].dtype),
-                    'Top Values/Range': top_values_str
-                })
-            stats_df = pd.DataFrame(stats)
-            return stats_df
-        except Exception as e:
-            return f"Error generating statistics: {str(e)}"
-    def calcular_ponderados(self, df):
-        """Función para calcular medias ponderadas"""
-        total_count = df["count"].sum()
-        mean_cpa = (df["cpa_goal"] * df["count"]).sum() / total_count if total_count > 0 else 0
-        mean_sponsored = (df["sponsored"] * df["count"]).sum() / total_count if total_count > 0 else 0
-        min_cpc = (df["sponsored"]).min()
-        max_cpc = (df["sponsored"]).max()
-        min_cpa = (df["cpa_goal"]).min()
-        max_cpa = (df["cpa_goal"]).max()
-        # Obtener la hora actual en PST
-        pacific_tz = pytz.timezone("America/Los_Angeles")
-        now_pst = datetime.datetime.now(pytz.utc).astimezone(pacific_tz)
-        return pd.Series({
-            "total_jobs": int(total_count),
-            "mean_cpa_goal": round(mean_cpa,2),
-            "mean_cpc": round(mean_sponsored,2),
-            "target_cvr": round((mean_sponsored/mean_cpa)*100,2) if mean_cpa > 0 else 0,
-            "min_cpc": round(min_cpc,2),
-            "max_cpc": round(max_cpc,2),
-            "min_cpa": round(min_cpa,2),
-            "max_cpa": round(max_cpa,2),
-            "last_update": now_pst.strftime("%Y-%m-%d %H:%M:%S %Z")
-        })
-    def get_weighted_stats_by_group(self, group_column, reference_col=None, cpa_col=None, cpc_col=None):
-        """Get weighted statistics grouped by specified column with flexible column selection"""
-        if self.df is None:
-            return pd.DataFrame(), "Please load a feed first"
-        # Check if group column exists
-        if group_column not in self.df.columns:
-            available_columns = [col for col in self.df.columns if col != 'last_update']
-            return pd.DataFrame(), f"Column '{group_column}' not found. Available columns: {', '.join(available_columns)}"
-        # Check if selected columns exist
-        selected_columns = [col for col in [reference_col, cpa_col, cpc_col] if col is not None]
-        missing_columns = [col for col in selected_columns if col not in self.df.columns]
-        if missing_columns:
-            available_columns = list(self.df.columns)
-            return pd.DataFrame(), f"Missing selected columns: {', '.join(missing_columns)}. Available columns: {', '.join(available_columns)}"
-        try:
-            def calculate_group_stats(group_df):
-                results = {}
-                # Always calculate total postings
-                results["total_postings"] = int(len(group_df))
-                # Calculate unique references if reference column is provided
-                if reference_col:
-                    results["unique_references"] = int(group_df[reference_col].nunique())
-                # Calculate CPA statistics if CPA column is provided
-                if cpa_col:
-                    cpa_series = pd.to_numeric(group_df[cpa_col], errors='coerce')
-                    results["mean_cpa_goal"] = round(cpa_series.mean(), 2) if not cpa_series.isna().all() else 0
-                    results["min_cpa"] = round(cpa_series.min(), 2) if not cpa_series.isna().all() else 0
-                    results["max_cpa"] = round(cpa_series.max(), 2) if not cpa_series.isna().all() else 0
-                # Calculate CPC/Payout statistics if CPC column is provided
-                if cpc_col:
-                    cpc_series = pd.to_numeric(group_df[cpc_col], errors='coerce')
-                    results["mean_payouts"] = round(cpc_series.mean(), 2) if not cpc_series.isna().all() else 0
-                    results["min_payouts"] = round(cpc_series.min(), 2) if not cpc_series.isna().all() else 0
-                    results["max_payouts"] = round(cpc_series.max(), 2) if not cpc_series.isna().all() else 0
-                # Calculate Target CVR if both CPA and CPC columns are provided
-                if cpa_col and cpc_col:
-                    mean_cpa = results.get("mean_cpa_goal", 0)
-                    mean_payouts = results.get("mean_payouts", 0)
-                    if mean_cpa > 0 and mean_payouts > 0:
-                        results["target_cvr"] = round((mean_payouts/mean_cpa)*100, 2)
-                    else:
-                        results["target_cvr"] = 0
-                # Get current time in PST
-                pacific_tz = pytz.timezone("America/Los_Angeles")
-                now_pst = datetime.datetime.now(pytz.utc).astimezone(pacific_tz)
-                results["last_update"] = now_pst.strftime("%Y-%m-%d %H:%M:%S %Z")
-                return pd.Series(results)
-            # Group by selected column and apply calculations
-            grouped_stats = self.df.groupby(group_column).apply(calculate_group_stats).reset_index()
-            # Sort by most relevant metric
-            if "unique_references" in grouped_stats.columns:
-                grouped_stats = grouped_stats.sort_values('unique_references', ascending=False)
-            else:
-                grouped_stats = grouped_stats.sort_values('total_postings', ascending=False)
-            return grouped_stats, "Success"
-        except Exception as e:
-            return pd.DataFrame(), f"Error calculating weighted statistics: {str(e)}"
     def generate_csv(self, df, filename_prefix="feed"):
-        """Generate CSV file for download with fixed filename"""
         if df is None or df.empty:
             return None
-        # Create a temporary file with the exact name we want
         temp_file = tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False, prefix='')
-        temp_file.close()  # Close to get the filename
-        # Rename the file to what we want
-        import shutil
         final_filename = temp_file.name.replace(os.path.basename(temp_file.name), f"{filename_prefix}.csv")
-        # Save CSV with the desired name
         df.to_csv(final_filename, index=False)
         return final_filename
     def get_preview(self, df, max_rows=10):
-        """Get a preview of the dataframe as a Gradio DataFrame component with truncated long columns"""
         if df is None or df.empty:
             return None
-        # Limit the preview to avoid overwhelming display
         preview_df = df.head(max_rows).copy()
-        # Truncate long string values for better display
         for col in preview_df.select_dtypes(include=['object']).columns:
             preview_df[col] = preview_df[col].astype(str).apply(
                 lambda x: x[:50] + '...' if len(str(x)) > 50 else x
@@ -434,61 +454,19 @@ class FeedReader:
         return preview_df
-    def generate_map(self, city_col, state_col=None, country_col=None, max_points=500):
-        if self.df is None or self.df.empty:
-            return None, "⚠️ Please load a feed first"
-        if city_col not in self.df.columns:
-            return None, f"⚠️ Column '{city_col}' not found in dataset"
-        m = folium.Map(location=[20, 0], zoom_start=2)
-        marker_cluster = MarkerCluster().add_to(m)
-        count = 0
-        for _, row in self.df.iterrows():
-            if count >= max_points:
-                break
-            city = str(row[city_col]) if city_col else ""
-            state = str(row[state_col]) if state_col and state_col in self.df.columns else ""
-            country = str(row[country_col]) if country_col and country_col in self.df.columns else ""
-            query = ", ".join([p for p in [city, state, country] if p])
-            if not query.strip():
-                continue
-            location = geocode_cached(query)
-            if location:
-                folium.Marker(
-                    location=[location.latitude, location.longitude],
-                    popup=query
-                ).add_to(marker_cluster)
-                count += 1
-        return m._repr_html_(), f"✅ Mapped {count} locations"
 # Initialize the feed reader
 feed_reader = FeedReader()
-# Create Gradio interface
-def create_gradio_app():
-    with gr.Blocks(title="Feed Reader & Analyzer", theme=gr.themes.Soft()) as app:
-        # Header with theme toggle
         with gr.Row():
             with gr.Column(scale=4):
                 gr.Markdown("""
-                # 📡 Feed Reader & Analyzer
-                Load and analyze XML or JSON feeds from URLs. Supports compressed files (.gz) and various data formats.
                 """)
-        # Theme state
-        is_dark_theme = gr.State(False)
-        # CSS output for theme switching
-        theme_css = gr.HTML()
         with gr.Tab("📥 Load Feed"):
             with gr.Row():
                 with gr.Column():
@@ -520,17 +498,15 @@ def create_gradio_app():
                     label="Data Preview",
                     visible=True,
                     interactive=False,
-                    wrap=False,  # Keep rows small
-                    row_count=(1, "dynamic")  # Dynamic row configuration
                 )
             with gr.Row():
                 csv_download = gr.File(label="📥 Download Full Dataset (CSV)", visible=True)
-            # Variable para almacenar las opciones de columnas
             column_choices_state = gr.State([])
-            # Load feed functionality
             def process_and_download(url, job_tag):
                 summary, df_processed, csv_file, preview_df, column_choices, metadata_df = feed_reader.process_feed(url, job_tag)
                 return summary, metadata_df, preview_df, csv_file, column_choices
@@ -541,82 +517,152 @@ def create_gradio_app():
                 outputs=[summary_output, metadata_output, preview_dataframe, csv_download, column_choices_state]
             )
-        with gr.Tab("🔍 Filter Data"):
             with gr.Row():
                 with gr.Column():
-                    # Botones de columnas (inicialmente vacío)
-                    columns_radio = gr.Dropdown(
-                        label="Select Column",
-                        choices=[],
-                        value=None
-                    )
-                    # Dropdown para los valores de filtro
-                    filter_value_dropdown = gr.Dropdown(
-                        label="Filter Value",
-                        choices=[],
-                        value=None,
-                        interactive=True
-                    )
-                    filter_btn = gr.Button("🔍 Filter", variant="primary")
                 with gr.Column():
-                    filter_summary = gr.Markdown(label="Filter Results")
             with gr.Row():
-                filtered_dataframe = gr.Dataframe(
                     label="Filtered Data",
                     visible=True,
                     interactive=False,
-                    wrap=False,  # Disable text wrapping to keep rows small
-                    row_count=(1, "dynamic")  # Allow dynamic rows
                 )
             with gr.Row():
-                filtered_csv = gr.File(label="📥 Download Filtered Data (CSV)", visible=True)
-            # Función para actualizar las opciones de columnas
-            def update_column_choices(column_choices):
-                return gr.Radio(choices=column_choices, value=None if not column_choices else column_choices[0])
-            # Función para actualizar los valores del dropdown cuando se selecciona una columna
             def update_filter_values(selected_column):
-                if not selected_column or feed_reader.df is None:
-                    return gr.Dropdown(choices=[], value=None)
                 unique_values = feed_reader.get_column_unique_values(selected_column)
-                return gr.Dropdown(
-                    choices=unique_values,
-                    value="All" if unique_values else None
-                )
-            # Actualizar las opciones cuando se carga un feed
             column_choices_state.change(
-                update_column_choices,
                 inputs=[column_choices_state],
-                outputs=[columns_radio]
             )
-            # Actualizar los valores del dropdown cuando se selecciona una columna
-            columns_radio.change(
-                update_filter_values,
-                inputs=[columns_radio],
-                outputs=[filter_value_dropdown]
-            )
-            # Filter functionality
-            def filter_and_download(column_name, filter_value):
-                summary, df_filtered, csv_file = feed_reader.filter_by_column(column_name, filter_value)
-                if df_filtered is not None:
-                    # Show both summary and dataframe
-                    return summary, df_filtered, csv_file
-                else:
-                    # Show error and empty dataframe
-                    return summary, pd.DataFrame(), None
-            filter_btn.click(
-                filter_and_download,
-                inputs=[columns_radio, filter_value_dropdown],
-                outputs=[filter_summary, filtered_dataframe, filtered_csv]
             )
         with gr.Tab("📊 Statistics"):
@@ -708,11 +754,118 @@ def create_gradio_app():
             )
             # Basic statistics functionality
             basic_stats_btn.click(
-                feed_reader.get_column_stats,
                 outputs=[basic_stats_output]
             )
             # Weighted statistics functionality
             def calculate_weighted_stats(group_column, reference_col, cpa_col, cpc_col):
                 if not group_column:
@@ -727,7 +880,7 @@ def create_gradio_app():
                 if not reference_col and not cpa_col and not cpc_col:
                     return "Please select at least one metric column (Reference ID, CPA Goal, or Payouts)", None, None
-                weighted_df, message = feed_reader.get_weighted_stats_by_group(group_column, reference_col, cpa_col, cpc_col)
                 if not weighted_df.empty:
                     metrics_used = []
@@ -763,108 +916,159 @@ def create_gradio_app():
                 outputs=[weighted_stats_summary, weighted_stats_output, weighted_stats_csv]
             )
-        with gr.Tab("🌍 Map"):
             with gr.Row():
                 with gr.Column():
-                    gr.Markdown("### Select Columns for Mapping")
-                    city_col = gr.Dropdown(label="City Column", choices=[], value=None)
-                    state_col = gr.Dropdown(label="State Column (optional)", choices=[], value=None)
-                    country_col = gr.Dropdown(label="Country Column (optional)", choices=[], value=None)
-                    map_btn = gr.Button("🗺️ Generate Map", variant="primary")
                 with gr.Column():
                     map_status = gr.Markdown()
-                    map_output = gr.HTML()
-            # Actualizar dropdowns cuando se cargue un feed
             def update_map_choices(column_choices):
                 if not column_choices:
                     return (
-                        gr.Dropdown.update(choices=[]),
-                        gr.Dropdown.update(choices=[]),
-                        gr.Dropdown.update(choices=[])
                     )
                 return (
-                    gr.Dropdown.update(choices=column_choices, value=column_choices[0]),
-                    gr.Dropdown.update(choices=["None"] + column_choices, value="None"),
-                    gr.Dropdown.update(choices=["None"] + column_choices, value="None")
                 )
             column_choices_state.change(
                 update_map_choices,
                 inputs=[column_choices_state],
-                outputs=[city_col, state_col, country_col]
             )
-            # Generar mapa desde feed_reader
-            def generate_map_handler(city_col, state_col, country_col):
                 state_col = None if state_col == "None" else state_col
                 country_col = None if country_col == "None" else country_col
-                map_html, msg = feed_reader.generate_map(city_col, state_col, country_col)
                 return msg, map_html
             map_btn.click(
-                generate_map_handler,
-                inputs=[city_col, state_col, country_col],
                 outputs=[map_status, map_output]
             )
-            # Actualizar dropdowns cuando se cargue un feed
-            def update_map_choices(column_choices):
-                return (
-                    gr.Dropdown(choices=column_choices, value=None),
-                    gr.Dropdown(choices=["None"] + column_choices, value="None"),
-                    gr.Dropdown(choices=["None"] + column_choices, value="None")
-                )
-            column_choices_state.change(
-                update_map_choices,
-                inputs=[column_choices_state],
-                outputs=[city_col, state_col, country_col]
-            )
-            # Función para generar mapa
-            def generate_map(city_col, state_col, country_col):
-                state_col = None if state_col == "None" else state_col
-                country_col = None if country_col == "None" else country_col
-                map_html, msg = feed_reader.generate_map(city_col, state_col, country_col)
-                return msg, map_html
-            map_btn.click(
-                generate_map,
-                inputs=[city_col, state_col, country_col],
                 outputs=[map_status, map_output]
             )
         gr.Markdown("""
         ---
-        ### 📝 Instructions:
-        1. **Load Feed**: Enter a URL pointing to an XML or JSON feed and click "Load Feed"
-        2. **Filter Data**: Select a column from the radio buttons and enter a filter value
-        3. **Statistics**: View detailed statistics about each column in your dataset
-        4. **Download**: CSV files are automatically generated for download
-        **Supported Formats:**
-        - XML files (.xml, .xml.gz)
-        - JSON files (.json)
-        - REST APIs returning JSON
-        **Features:**
-        - Automatic format detection
-        - Data cleaning and validation
-        - Dynamic column-based filtering with dropdown values
-        - Statistical analysis
-        - CSV export functionality
-        - Resizable dataframe columns (drag column borders to resize)
         """)
     return app
-# Launch the app
 if __name__ == "__main__":
-    app = create_gradio_app()
     app.launch(share=True, debug=True)

 import os
 import tempfile
 import pytz
+import time
 geolocator = Nominatim(user_agent="feed_reader_app")
     def load_feed_to_dataframe(self, url, job_tag="job"):
         """
         Load an XML feed (.xml or .xml.gz) or JSON from a URL and convert to DataFrame.
         """
         try:
             response = requests.get(url, timeout=30)
                 elif isinstance(data, dict) and "jobs" in data:
                     df = pd.DataFrame(data["jobs"])
                 else:
                     df = pd.DataFrame([data] if not isinstance(data, list) else data)
                 df = df.applymap(lambda x: self.truncate(x) if isinstance(x, str) else x)
                 df = self.clean_invalid_numbers(df)
                 return df
             items = root.findall(f".//{job_tag}")
             if not items:
                 common_tags = ["item", "entry", "record", "row"]
                 for tag in common_tags:
                     items = root.findall(f".//{tag}")
                         break
             if not items:
+                return pd.DataFrame(), f"No <{job_tag}> elements found in the XML."
             jobs_data = []
             for job in items:
             df = self.clean_invalid_numbers(df)
             return df, "Success"
         except Exception as e:
+            return pd.DataFrame(), f"Error: {str(e)}"
     def process_feed(self, url, job_tag="job"):
         """Main function to process feed and return results"""
         if not url.strip():
             return "Please enter a valid URL", None, "", "", []
         result = self.load_feed_to_dataframe(url.strip(), job_tag.strip())
         if isinstance(result, tuple):
             df = result
             message = "Success"
         self.df = df
         df['last_update'] = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+        df_processed = df
+        #df_processed = df.fillna(0).infer_objects(copy=False)
         summary = f"""
 📊 **Feed Processing Results**
 ✅ **Status:** {message}
 📋 **Rows:** {df_processed.shape[0]:,}
 📝 **Columns:** {df_processed.shape[1]}
         """
         metadata_df = pd.DataFrame({
             'Column Name': df_processed.columns.tolist(),
             'Data Type': [str(df_processed[col].dtype) for col in df_processed.columns],
             'Null Values': [df_processed[col].isnull().sum() for col in df_processed.columns]
         })
         column_choices = df_processed.columns.tolist()
         return summary, df_processed, self.generate_csv(df_processed, "feed"), self.get_preview(df_processed), column_choices, metadata_df
     def get_column_unique_values(self, column_name):
         """Get unique values for a specific column"""
+        if self.df is None or column_name not in self.df.columns:
             return []
         unique_values = self.df[column_name].dropna().astype(str).unique()
         unique_values = sorted([str(val) for val in unique_values if str(val) != 'nan'])
         return ["All"] + unique_values
+    def apply_multiple_filters(self, filters_dict, progress=gr.Progress()):
+        """Apply multiple filters to the dataframe"""
         if self.df is None:
+            return pd.DataFrame(), "Please load a feed first", ""
+        progress(0, desc="Starting filter process...")
+        # Start with the full dataframe
+        filtered_df = self.df.copy()
+        filter_descriptions = []
+        # Apply each filter
+        active_filters = {k: v for k, v in filters_dict.items()
+                         if v and v != "All" and v != "None"}
+        if not active_filters:
+            progress(1, desc="No filters applied - showing all data")
+            filtered_df = filtered_df.fillna(0).infer_objects(copy=False)
+            display_df = self.truncate_display_columns(filtered_df.copy())
+            summary = f"""
+🔍 **Filter Results**
 📋 **Total Rows:** {filtered_df.shape[0]:,}
+🎯 **Filters Applied:** None (showing all data)
+            """
+            return display_df, summary, self.generate_csv(filtered_df, "all_data")
+        progress(0.2, desc="Applying filters...")
+        for i, (column, value) in enumerate(active_filters.items()):
+            if column not in self.df.columns:
+                continue
+            progress(0.2 + (0.6 * i / len(active_filters)),
+                    desc=f"Filtering by {column}: {value}")
+            # Apply filter based on data type
+            if self.df[column].dtype == 'object':
+                filtered_df = filtered_df[filtered_df[column].astype(str) == str(value)]
+            else:
                 try:
+                    filter_val_numeric = float(value)
+                    filtered_df = filtered_df[filtered_df[column] == filter_val_numeric]
                 except ValueError:
+                    filtered_df = filtered_df[filtered_df[column].astype(str) == str(value)]
+            filter_descriptions.append(f"{column} = '{value}'")
+        progress(0.8, desc="Processing results...")
+        if filtered_df.empty:
+            progress(1, desc="Filter complete - no results found")
+            return pd.DataFrame(), "No records found matching the specified filters", ""
+        filtered_df = filtered_df.fillna(0).infer_objects(copy=False)
+        display_df = self.truncate_display_columns(filtered_df.copy())
+        progress(1, desc="Filter complete")
+        summary = f"""
+🔍 **Multi-Filter Results**
 📋 **Matching Rows:** {filtered_df.shape[0]:,}
+🎯 **Filters Applied:** {len(active_filters)}
+📝 **Filter Details:**
+{chr(10).join(f"   • {desc}" for desc in filter_descriptions)}
+        """
+        filename_suffix = "_".join([f"{k}_{v}" for k, v in active_filters.items()])[:50]
+        return display_df, summary, self.generate_csv(filtered_df, f"filtered_{filename_suffix}")
     def truncate_display_columns(self, df):
+        """Truncate long columns for better display"""
         display_df = df.copy()
         long_content_columns = ['url', 'description', 'link', 'content', 'summary', 'text']
         for col in display_df.select_dtypes(include=['object']).columns:
             if any(long_col in col.lower() for long_col in long_content_columns):
                 display_df[col] = display_df[col].astype(str).apply(
                     lambda x: x[:30] + '...' if len(str(x)) > 30 else x
                 )
             else:
                 display_df[col] = display_df[col].astype(str).apply(
                     lambda x: x[:50] + '...' if len(str(x)) > 50 else x
                 )
         return display_df
+    def generate_map_with_job_counts(self, city_col, state_col=None, country_col=None,
+                                   title_col=None, max_points=500, progress=gr.Progress()):
+        """Generate map with job count markers per location with progress tracking"""
+        if self.df is None or self.df.empty:
+            return None, "⚠️ Please load a feed first"
+        if city_col not in self.df.columns:
+            return None, f"⚠️ Column '{city_col}' not found in dataset"
+        progress(0, desc="Initializing map generation...")
+        # Create map
+        m = folium.Map(location=[20, 0], zoom_start=2)
+        progress(0.1, desc="Processing location data...")
+        # Prepare location data
+        location_data = []
+        total_rows = len(self.df)
+        for idx, (_, row) in enumerate(self.df.iterrows()):
+            if idx % 100 == 0:  # Update progress every 100 rows
+                progress(0.1 + (0.3 * idx / total_rows),
+                        desc=f"Processing locations... {idx}/{total_rows}")
+            city = str(row[city_col]) if city_col else ""
+            state = str(row[state_col]) if state_col and state_col in self.df.columns else ""
+            country = str(row[country_col]) if country_col and country_col in self.df.columns else ""
+            location_parts = [p for p in [city, state, country] if p and p.strip() and p != 'nan']
+            if not location_parts:
+                continue
+            location_key = ", ".join(location_parts)
+            title_id = str(row[title_col]) if title_col and title_col in self.df.columns else None
+            location_data.append({
+                'location_key': location_key,
+                'city': city,
+                'state': state,
+                'country': country,
+                'title_id': title_id
+            })
+        if not location_data:
+            progress(1, desc="No valid location data found")
+            return None, "⚠️ No valid location data found"
+        progress(0.4, desc="Aggregating location statistics...")
+        # Group by location
+        locations_df = pd.DataFrame(location_data)
+        if title_col and title_col in self.df.columns:
+            location_stats = locations_df.groupby('location_key').agg({
+                'title_id': ['count', 'nunique'],
+                'city': 'first',
+                'state': 'first',
+                'country': 'first'
+            }).reset_index()
+            location_stats.columns = ['location_key', 'total_postings', 'unique_titles', 'city', 'state', 'country']
+        else:
+            location_stats = locations_df.groupby('location_key').agg({
+                'city': 'first',
+                'state': 'first',
+                'country': 'first'
+            }).reset_index()
+            location_stats['total_postings'] = locations_df.groupby('location_key').size().values
+            location_stats['unique_titles'] = location_stats['total_postings']
+        progress(0.5, desc="Starting geocoding process...")
+        # Geocoding with progress tracking
+        successful_mappings = 0
+        failed_geocoding = 0
+        total_locations = len(location_stats)
+        for idx, (_, row) in enumerate(location_stats.iterrows()):
+            if successful_mappings >= max_points:
+                break
+            # Update progress during geocoding
+            progress(0.5 + (0.4 * idx / total_locations),
+                    desc=f"Geocoding locations... {successful_mappings} mapped, {failed_geocoding} failed")
+            location_key = row['location_key']
+            total_postings = row['total_postings']
+            unique_titles = row['unique_titles']
+            location = geocode_cached(location_key)
+            if location:
+                # Calculate marker properties
+                max_titles = location_stats['unique_titles'].max()
+                min_size = 10
+                max_size = 50
+                if max_titles > 0:
+                    marker_size = min_size + (max_size - min_size) * (unique_titles / max_titles)
+                else:
+                    marker_size = min_size
+                # Color coding
+                if unique_titles >= max_titles * 0.8:
+                    color = 'red'
+                elif unique_titles >= max_titles * 0.5:
+                    color = 'orange'
+                elif unique_titles >= max_titles * 0.2:
+                    color = 'yellow'
+                else:
+                    color = 'green'
+                # Create popup
+                popup_text = f"""
+                <div style='font-family: Arial, sans-serif; min-width: 200px;'>
+                    <h4 style='color: #2E86AB; margin-bottom: 10px;'>📍 {location_key}</h4>
+                    <hr style='margin: 5px 0;'>
+                    <p><strong>🎯 Unique Titles:</strong> {unique_titles}</p>
+                    <p><strong>📊 Total Postings:</strong> {total_postings}</p>
+                    <p><strong>📈 Avg Postings/Title:</strong> {round(total_postings/unique_titles, 1) if unique_titles > 0 else 0}</p>
+                </div>
+                """
+                folium.CircleMarker(
+                    location=[location.latitude, location.longitude],
+                    radius=marker_size,
+                    popup=folium.Popup(popup_text, max_width=300),
+                    color='black',
+                    weight=2,
+                    fillColor=color,
+                    fillOpacity=0.7,
+                    tooltip=f"{location_key}: {unique_titles} titles"
+                ).add_to(m)
+                successful_mappings += 1
+            else:
+                failed_geocoding += 1
+            # Small delay to prevent overwhelming the geocoding service
+            time.sleep(0.1)
+        progress(0.9, desc="Finalizing map...")
+        # Add legend
+        legend_html = f"""
+        <div style='position: fixed;
+                    bottom: 50px; left: 50px; width: 200px; height: 120px;
+                    background-color: white; border:2px solid grey; z-index:9999;
+                    font-size:14px; padding: 10px'>
+        <h4 style='margin:0; color: #2E86AB;'>📊 Job Count Legend</h4>
+        <p style='margin:5px 0;'><i style='color:red'>●</i> High (80%+ of max)</p>
+        <p style='margin:5px 0;'><i style='color:orange'>●</i> Medium-High (50-80%)</p>
+        <p style='margin:5px 0;'><i style='color:yellow'>●</i> Medium (20-50%)</p>
+        <p style='margin:5px 0;'><i style='color:green'>●</i> Low (&lt;20%)</p>
+        <small>Marker size = Job count</small>
+        </div>
+        """
+        m.get_root().html.add_child(folium.Element(legend_html))
+        progress(1, desc="Map generation complete!")
+        # Generate status message
+        status_msg = f"""
+✅ **Map Generated Successfully**
+🗺️ **Mapped Locations:** {successful_mappings}
+❌ **Failed to Geocode:** {failed_geocoding}
+📊 **Total Unique Locations:** {len(location_stats)}
+🎯 **Columns Used:**
+   • City: {city_col}
+   • State: {state_col if state_col else 'Not selected'}
+   • Country: {country_col if country_col else 'Not selected'}
+   • Title/ID: {title_col if title_col else 'Not selected'}
+💡 **Map Features:**
+   • Marker size represents job count
+   • Colors show relative job density
+   • Click markers for detailed info
+   • Hover for quick stats
+        """
+        if title_col:
+            top_location_idx = location_stats['unique_titles'].idxmax()
+            top_location = location_stats.loc[top_location_idx, 'location_key']
+            top_count = location_stats['unique_titles'].max()
+            status_msg += f"\n🏆 **Top Location:** {top_location} ({top_count} titles)"
+        return m._repr_html_(), status_msg
     def generate_csv(self, df, filename_prefix="feed"):
+        """Generate CSV file for download"""
         if df is None or df.empty:
             return None
         temp_file = tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False, prefix='')
+        temp_file.close()
         final_filename = temp_file.name.replace(os.path.basename(temp_file.name), f"{filename_prefix}.csv")
         df.to_csv(final_filename, index=False)
         return final_filename
     def get_preview(self, df, max_rows=10):
+        """Get a preview of the dataframe"""
         if df is None or df.empty:
             return None
         preview_df = df.head(max_rows).copy()
         for col in preview_df.select_dtypes(include=['object']).columns:
             preview_df[col] = preview_df[col].astype(str).apply(
                 lambda x: x[:50] + '...' if len(str(x)) > 50 else x
         return preview_df
 # Initialize the feed reader
 feed_reader = FeedReader()
+def create_enhanced_gradio_app():
+    with gr.Blocks(title="Enhanced Feed Reader & Analyzer", theme=gr.themes.Soft()) as app:
         with gr.Row():
             with gr.Column(scale=4):
                 gr.Markdown("""
+                # 📡 Enhanced Feed Reader & Analyzer
+                Load and analyze XML or JSON feeds with advanced multi-filtering and interactive mapping.
                 """)
         with gr.Tab("📥 Load Feed"):
             with gr.Row():
                 with gr.Column():
                     label="Data Preview",
                     visible=True,
                     interactive=False,
+                    wrap=False,
+                    row_count=(1, "dynamic")
                 )
             with gr.Row():
                 csv_download = gr.File(label="📥 Download Full Dataset (CSV)", visible=True)
             column_choices_state = gr.State([])
             def process_and_download(url, job_tag):
                 summary, df_processed, csv_file, preview_df, column_choices, metadata_df = feed_reader.process_feed(url, job_tag)
                 return summary, metadata_df, preview_df, csv_file, column_choices
                 outputs=[summary_output, metadata_output, preview_dataframe, csv_download, column_choices_state]
             )
+        with gr.Tab("🔍 Advanced Filter Data"):
+            gr.Markdown("### 🎯 Multi-Column Filtering")
+            gr.Markdown("Apply multiple filters simultaneously to narrow down your dataset:")
             with gr.Row():
                 with gr.Column():
+                    gr.Markdown("**Primary Filters:**")
+                    with gr.Column():
+                        filter1_col = gr.Dropdown(
+                            label="Filter 1 - Column",
+                            choices=[],
+                            value=None
+                        )
+                        filter1_val = gr.Dropdown(
+                            label="Filter 1 - Value",
+                            choices=[],
+                            value=None
+                        )
+                    with gr.Column():
+                        filter2_col = gr.Dropdown(
+                            label="Filter 2 - Column",
+                            choices=[],
+                            value=None
+                        )
+                        filter2_val = gr.Dropdown(
+                            label="Filter 2 - Value",
+                            choices=[],
+                            value=None
+                        )
                 with gr.Column():
+                    gr.Markdown("**Additional Filters:**")
+                    with gr.Column():
+                        filter3_col = gr.Dropdown(
+                            label="Filter 3 - Column",
+                            choices=[],
+                            value=None
+                        )
+                        filter3_val = gr.Dropdown(
+                            label="Filter 3 - Value",
+                            choices=[],
+                            value=None
+                        )
+                    with gr.Column():
+                        filter4_col = gr.Dropdown(
+                            label="Filter 4 - Column",
+                            choices=[],
+                            value=None
+                        )
+                        filter4_val = gr.Dropdown(
+                            label="Filter 4 - Value",
+                            choices=[],
+                            value=None
+                        )
+            with gr.Row():
+                multi_filter_btn = gr.Button("🔍 Apply Multi-Filter", variant="primary", size="lg")
+                clear_filters_btn = gr.Button("🧹 Clear All Filters", variant="secondary")
             with gr.Row():
+                multi_filter_summary = gr.Markdown(label="Multi-Filter Results")
+            with gr.Row():
+                multi_filtered_dataframe = gr.Dataframe(
                     label="Filtered Data",
                     visible=True,
                     interactive=False,
+                    wrap=False,
+                    row_count=(1, "dynamic")
                 )
             with gr.Row():
+                multi_filtered_csv = gr.File(label="📥 Download Filtered Data (CSV)", visible=True)
+            # Helper functions for updating dropdowns
+            def update_all_filter_columns(column_choices):
+                choices_with_none = ["None"] + column_choices if column_choices else ["None"]
+                return (
+                    gr.Dropdown(choices=choices_with_none, value="None"),
+                    gr.Dropdown(choices=choices_with_none, value="None"),
+                    gr.Dropdown(choices=choices_with_none, value="None"),
+                    gr.Dropdown(choices=choices_with_none, value="None")
+                )
             def update_filter_values(selected_column):
+                if not selected_column or selected_column == "None" or feed_reader.df is None:
+                    return gr.Dropdown(choices=["None"], value="None")
                 unique_values = feed_reader.get_column_unique_values(selected_column)
+                return gr.Dropdown(choices=unique_values, value="All" if unique_values else "None")
+            # Update column choices when data is loaded
             column_choices_state.change(
+                update_all_filter_columns,
                 inputs=[column_choices_state],
+                outputs=[filter1_col, filter2_col, filter3_col, filter4_col]
             )
+            # Update value dropdowns when columns are selected
+            filter1_col.change(update_filter_values, inputs=[filter1_col], outputs=[filter1_val])
+            filter2_col.change(update_filter_values, inputs=[filter2_col], outputs=[filter2_val])
+            filter3_col.change(update_filter_values, inputs=[filter3_col], outputs=[filter3_val])
+            filter4_col.change(update_filter_values, inputs=[filter4_col], outputs=[filter4_val])
+            # Multi-filter functionality
+            def apply_multi_filters(col1, val1, col2, val2, col3, val3, col4, val4, progress=gr.Progress()):
+                filters = {}
+                if col1 and col1 != "None" and val1 and val1 != "None":
+                    filters[col1] = val1
+                if col2 and col2 != "None" and val2 and val2 != "None":
+                    filters[col2] = val2
+                if col3 and col3 != "None" and val3 and val3 != "None":
+                    filters[col3] = val3
+                if col4 and col4 != "None" and val4 and val4 != "None":
+                    filters[col4] = val4
+                return feed_reader.apply_multiple_filters(filters, progress)
+            def clear_all_filters():
+                return (
+                    "Filters cleared - select columns and values to filter data",
+                    pd.DataFrame(),
+                    None,
+                    gr.Dropdown(value="None"),
+                    gr.Dropdown(value="None"),
+                    gr.Dropdown(value="None"),
+                    gr.Dropdown(value="None"),
+                    gr.Dropdown(value="None"),
+                    gr.Dropdown(value="None"),
+                    gr.Dropdown(value="None"),
+                    gr.Dropdown(value="None")
+                )
+            multi_filter_btn.click(
+                apply_multi_filters,
+                inputs=[filter1_col, filter1_val, filter2_col, filter2_val,
+                       filter3_col, filter3_val, filter4_col, filter4_val],
+                outputs=[multi_filtered_dataframe, multi_filter_summary, multi_filtered_csv]
+            )
+            clear_filters_btn.click(
+                clear_all_filters,
+                outputs=[multi_filter_summary, multi_filtered_dataframe, multi_filtered_csv,
+                        filter1_col, filter1_val, filter2_col, filter2_val,
+                        filter3_col, filter3_val, filter4_col, filter4_val]
             )
         with gr.Tab("📊 Statistics"):
             )
             # Basic statistics functionality
+            def get_column_stats():
+                """Get statistics for each column"""
+                if feed_reader.df is None:
+                    return pd.DataFrame()
+                try:
+                    stats = []
+                    for column in feed_reader.df.columns:
+                        unique_values = feed_reader.df[column].nunique()
+                        null_count = feed_reader.df[column].isnull().sum()
+                        total_count = len(feed_reader.df)
+                        # Get top 5 most common values
+                        if feed_reader.df[column].dtype == 'object':
+                            top_values = feed_reader.df[column].value_counts().head(5)
+                            top_values_str = ", ".join([f"{val} ({count})" for val, count in top_values.items()])
+                        else:
+                            top_values_str = f"Min: {feed_reader.df[column].min()}, Max: {feed_reader.df[column].max()}"
+                        stats.append({
+                            'Column': column,
+                            'Unique Values': unique_values,
+                            'Null Values': null_count,
+                            'Data Type': str(feed_reader.df[column].dtype),
+                            'Top Values/Range': top_values_str
+                        })
+                    stats_df = pd.DataFrame(stats)
+                    return stats_df
+                except Exception as e:
+                    return pd.DataFrame()
             basic_stats_btn.click(
+                get_column_stats,
                 outputs=[basic_stats_output]
             )
+            # Get weighted statistics functionality
+            def get_weighted_stats_by_group(group_column, reference_col=None, cpa_col=None, cpc_col=None):
+                """Get weighted statistics grouped by specified column with flexible column selection"""
+                if feed_reader.df is None:
+                    return pd.DataFrame(), "Please load a feed first"
+                # Check if group column exists
+                if group_column not in feed_reader.df.columns:
+                    available_columns = [col for col in feed_reader.df.columns if col != 'last_update']
+                    return pd.DataFrame(), f"Column '{group_column}' not found. Available columns: {', '.join(available_columns)}"
+                # Check if selected columns exist
+                selected_columns = [col for col in [reference_col, cpa_col, cpc_col] if col is not None]
+                missing_columns = [col for col in selected_columns if col not in feed_reader.df.columns]
+                if missing_columns:
+                    available_columns = list(feed_reader.df.columns)
+                    return pd.DataFrame(), f"Missing selected columns: {', '.join(missing_columns)}. Available columns: {', '.join(available_columns)}"
+                try:
+                    def calculate_group_stats(group_df):
+                        results = {}
+                        # Always calculate total postings
+                        results["total_postings"] = int(len(group_df))
+                        # Calculate unique references if reference column is provided
+                        if reference_col:
+                            results["unique_references"] = int(group_df[reference_col].nunique())
+                        # Calculate CPA statistics if CPA column is provided
+                        if cpa_col:
+                            cpa_series = pd.to_numeric(group_df[cpa_col], errors='coerce')
+                            results["mean_cpa_goal"] = round(cpa_series.mean(), 2) if not cpa_series.isna().all() else 0
+                            results["min_cpa"] = round(cpa_series.min(), 2) if not cpa_series.isna().all() else 0
+                            results["max_cpa"] = round(cpa_series.max(), 2) if not cpa_series.isna().all() else 0
+                        # Calculate CPC/Payout statistics if CPC column is provided
+                        if cpc_col:
+                            cpc_series = pd.to_numeric(group_df[cpc_col], errors='coerce')
+                            results["mean_payouts"] = round(cpc_series.mean(), 2) if not cpc_series.isna().all() else 0
+                            results["min_payouts"] = round(cpc_series.min(), 2) if not cpc_series.isna().all() else 0
+                            results["max_payouts"] = round(cpc_series.max(), 2) if not cpc_series.isna().all() else 0
+                        # Calculate Target CVR if both CPA and CPC columns are provided
+                        if cpa_col and cpc_col:
+                            mean_cpa = results.get("mean_cpa_goal", 0)
+                            mean_payouts = results.get("mean_payouts", 0)
+                            if mean_cpa > 0 and mean_payouts > 0:
+                                results["target_cvr"] = round((mean_payouts/mean_cpa)*100, 2)
+                            else:
+                                results["target_cvr"] = 0
+                        # Get current time in PST
+                        pacific_tz = pytz.timezone("America/Los_Angeles")
+                        now_pst = datetime.datetime.now(pytz.utc).astimezone(pacific_tz)
+                        results["last_update"] = now_pst.strftime("%Y-%m-%d %H:%M:%S %Z")
+                        return pd.Series(results)
+                    # Group by selected column and apply calculations
+                    grouped_stats = feed_reader.df.groupby(group_column).apply(calculate_group_stats).reset_index()
+                    # Sort by most relevant metric
+                    if "unique_references" in grouped_stats.columns:
+                        grouped_stats = grouped_stats.sort_values('unique_references', ascending=False)
+                    else:
+                        grouped_stats = grouped_stats.sort_values('total_postings', ascending=False)
+                    return grouped_stats, "Success"
+                except Exception as e:
+                    return pd.DataFrame(), f"Error calculating weighted statistics: {str(e)}"
             # Weighted statistics functionality
             def calculate_weighted_stats(group_column, reference_col, cpa_col, cpc_col):
                 if not group_column:
                 if not reference_col and not cpa_col and not cpc_col:
                     return "Please select at least one metric column (Reference ID, CPA Goal, or Payouts)", None, None
+                weighted_df, message = get_weighted_stats_by_group(group_column, reference_col, cpa_col, cpc_col)
                 if not weighted_df.empty:
                     metrics_used = []
                 outputs=[weighted_stats_summary, weighted_stats_output, weighted_stats_csv]
             )
+        with gr.Tab("🌍 Interactive Job Map"):
             with gr.Row():
                 with gr.Column():
+                    gr.Markdown("### 📍 Map Configuration")
+                    gr.Markdown("Select columns for geographic visualization:")
+                    city_col = gr.Dropdown(
+                        label="🏙️ City Column (Required)",
+                        choices=[],
+                        value=None,
+                        info="Column containing city names"
+                    )
+                    state_col = gr.Dropdown(
+                        label="🗺️ State/Province Column (Optional)",
+                        choices=[],
+                        value=None,
+                        info="Column containing state or province names"
+                    )
+                    country_col = gr.Dropdown(
+                        label="🌍 Country Column (Optional)",
+                        choices=[],
+                        value=None,
+                        info="Column containing country names"
+                    )
+                    title_col = gr.Dropdown(
+                        label="🎯 Title/Job ID Column (Optional)",
+                        choices=[],
+                        value=None,
+                        info="Column containing job titles or reference IDs"
+                    )
+                    with gr.Row():
+                        map_btn = gr.Button("🗺️ Generate Interactive Map", variant="primary", size="lg")
+                        clear_map_btn = gr.Button("🧹 Clear Map", variant="secondary")
                 with gr.Column():
                     map_status = gr.Markdown()
+            with gr.Row():
+                map_output = gr.HTML(label="Interactive Job Distribution Map")
             def update_map_choices(column_choices):
                 if not column_choices:
                     return (
+                        gr.Dropdown(choices=[]),
+                        gr.Dropdown(choices=[]),
+                        gr.Dropdown(choices=[]),
+                        gr.Dropdown(choices=[])
                     )
+                optional_choices = ["None"] + column_choices
+                # Auto-detect common column names
+                city_default = None
+                state_default = "None"
+                country_default = "None"
+                title_default = "None"
+                for col in column_choices:
+                    col_lower = col.lower()
+                    if any(term in col_lower for term in ['city', 'ciudad', 'ville', 'location']):
+                        city_default = col
+                    elif any(term in col_lower for term in ['state', 'province', 'region', 'estado']):
+                        state_default = col
+                    elif any(term in col_lower for term in ['country', 'nation', 'pais', 'pays']):
+                        country_default = col
+                    elif any(term in col_lower for term in ['title', 'job', 'position', 'req', 'reference', 'id', 'titulo']):
+                        title_default = col
                 return (
+                    gr.Dropdown(choices=column_choices, value=city_default),
+                    gr.Dropdown(choices=optional_choices, value=state_default),
+                    gr.Dropdown(choices=optional_choices, value=country_default),
+                    gr.Dropdown(choices=optional_choices, value=title_default)
                 )
             column_choices_state.change(
                 update_map_choices,
                 inputs=[column_choices_state],
+                outputs=[city_col, state_col, country_col, title_col]
             )
+            def generate_job_count_map(city_col, state_col, country_col, title_col, progress=gr.Progress()):
+                if not city_col:
+                    return "❌ Please select a city column", None
+                # Handle "None" selections
                 state_col = None if state_col == "None" else state_col
                 country_col = None if country_col == "None" else country_col
+                title_col = None if title_col == "None" else title_col
+                map_html, msg = feed_reader.generate_map_with_job_counts(
+                    city_col, state_col, country_col, title_col, progress=progress
+                )
                 return msg, map_html
+            def clear_map():
+                return "🧹 Map cleared", ""
             map_btn.click(
+                generate_job_count_map,
+                inputs=[city_col, state_col, country_col, title_col],
                 outputs=[map_status, map_output]
             )
+            clear_map_btn.click(
+                clear_map,
                 outputs=[map_status, map_output]
             )
         gr.Markdown("""
         ---
+        ### 📝 Enhanced Features:
+        **🔍 Advanced Multi-Filtering:**
+        - Apply up to 4 simultaneous filters on different columns
+        - Real-time progress tracking during filter operations
+        - Smart dropdown population with available values
+        - Clear filter functionality
+        **🌍 Interactive Map with Progress:**
+        - Real-time progress bar during map generation
+        - Geocoding progress tracking
+        - Location data processing updates
+        - Performance optimizations with delays to prevent API limits
+        **📊 Enhanced Data Processing:**
+        - Improved error handling
+        - Better memory management
+        - Optimized for large datasets
+        - Smart column auto-detection
+        **💡 Usage Tips:**
+        - **Multi-Filtering**: Select "None" to skip a filter, "All" to show all values for that column
+        - **Map Generation**: Progress bar shows geocoding status and success/failure rates
+        - **Performance**: Large datasets may take longer to process - progress bars keep you informed
+        - **Column Detection**: Common column names are automatically detected and pre-selected
+        **🎯 Common Filter Combinations:**
+        - Filter 1: Company/Client + Filter 2: City
+        - Filter 1: Job Title + Filter 2: State + Filter 3: Country
+        - Filter 1: Category + Filter 2: Experience Level + Filter 3: Salary Range
+        **🗺️ Map Features:**
+        - Marker size = Job count per location
+        - Color coding = Job density (red=high, green=low)
+        - Interactive popups with detailed statistics
+        - Automatic legend and geocoding status
         """)
     return app
 if __name__ == "__main__":
+    app = create_enhanced_gradio_app()
     app.launch(share=True, debug=True)