Spaces:

rogergs94
/

feed

Sleeping

App Files Files Community

rogergs94 commited on Sep 18, 2025

Commit

5700668

verified ·

1 Parent(s): 29b8dcc

Update app.py

Browse files

Updated with grouped option and map tab (to be built)

Files changed (1) hide show

app.py +556 -55

app.py CHANGED Viewed

@@ -1,3 +1,7 @@
 import pandas as pd
 import requests
 import xml.etree.ElementTree as ET
@@ -7,6 +11,17 @@ import gzip
 import datetime
 import gradio as gr
 import os
 class FeedReader:
     def __init__(self):
@@ -106,7 +121,7 @@ class FeedReader:
     def process_feed(self, url, job_tag="job"):
         """Main function to process feed and return results"""
         if not url.strip():
-            return "Please enter a valid URL", None, "", ""
         # Load the feed
         result = self.load_feed_to_dataframe(url.strip(), job_tag.strip())
@@ -114,7 +129,7 @@ class FeedReader:
         if isinstance(result, tuple):
             df, message = result
             if df.empty:
-                return f"Error: {message}", None, "", ""
         else:
             df = result
             message = "Success"
@@ -133,17 +148,39 @@ class FeedReader:
 📊 **Feed Processing Results**
 ✅ **Status:** {message}
-📋 **Rows:** {df_processed.shape[0]:,}
-📝 **Columns:** {df_processed.shape[1]}
-🔍 **Column Names:**
-{', '.join(df_processed.columns.tolist())}
-📈 **Data Types:**
-{df_processed.dtypes.to_string()}
         """
-        return summary, df_processed, self.generate_csv(df_processed), self.get_preview(df_processed)
     def filter_by_column(self, column_name, filter_value):
         """Filter dataframe by column value"""
@@ -163,32 +200,73 @@ class FeedReader:
             actual_column = matching_columns[0]
-            # Filter the dataframe
             if self.df[actual_column].dtype == 'object':  # String column
-                filtered_df = self.df[self.df[actual_column].str.contains(filter_value, na=False, case=False)]
             else:  # Numeric column
                 try:
                     filter_val_numeric = float(filter_value)
                     filtered_df = self.df[self.df[actual_column] == filter_val_numeric]
                 except ValueError:
-                    filtered_df = self.df[self.df[actual_column].astype(str).str.contains(filter_value, na=False, case=False)]
             if filtered_df.empty:
-                return f"No records found matching '{filter_value}' in column '{actual_column}'", None, ""
             filtered_df = filtered_df.fillna(0).infer_objects(copy=False)
             summary = f"""
 🔍 **Filtered Results**
 📋 **Matching Rows:** {filtered_df.shape[0]:,}
-🎯 **Filter:** {actual_column} contains '{filter_value}'
             """
-            return summary, filtered_df, self.generate_csv(filtered_df, f"filtered_{filter_value}")
         except Exception as e:
-            return f"Error filtering data: {str(e)}", None, ""
     def get_column_stats(self):
         """Get statistics for each column"""
@@ -223,46 +301,194 @@ class FeedReader:
         except Exception as e:
             return f"Error generating statistics: {str(e)}"
     def generate_csv(self, df, filename_prefix="feed"):
-        """Generate CSV file for download"""
         if df is None or df.empty:
             return None
-        # Create a temporary file
-        import tempfile
-        temp_file = tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False, prefix=f'{filename_prefix}_')
-        df.to_csv(temp_file.name, index=False)
-        temp_file.close()
-        return temp_file.name
     def get_preview(self, df, max_rows=10):
-        """Get a preview of the dataframe"""
         if df is None or df.empty:
-            return "No data to preview"
         # Limit the preview to avoid overwhelming display
-        preview_df = df.head(max_rows)
         # Truncate long string values for better display
-        preview_df = preview_df.copy()
         for col in preview_df.select_dtypes(include=['object']).columns:
-            preview_df[col] = preview_df[col].astype(str).apply(lambda x: x[:50] + '...' if len(str(x)) > 50 else x)
-        preview = preview_df.to_string(max_cols=8, max_rows=max_rows, show_dimensions=True)
-        return f"**Data Preview (First {min(max_rows, len(df))} rows):**\n```\n{preview}\n```"
 # Initialize the feed reader
 feed_reader = FeedReader()
 # Create Gradio interface
 def create_gradio_app():
     with gr.Blocks(title="Feed Reader & Analyzer", theme=gr.themes.Soft()) as app:
-        gr.Markdown("""
-        # 📡 Feed Reader & Analyzer
-        Load and analyze XML or JSON feeds from URLs. Supports compressed files (.gz) and various data formats.
-        """)
         with gr.Tab("📥 Load Feed"):
             with gr.Row():
                 with gr.Column():
@@ -278,36 +504,58 @@ def create_gradio_app():
                     )
                     load_btn = gr.Button("🔄 Load Feed", variant="primary")
                 with gr.Column():
                     summary_output = gr.Markdown(label="Summary")
             with gr.Row():
-                preview_output = gr.Markdown(label="Data Preview")
             with gr.Row():
                 csv_download = gr.File(label="📥 Download Full Dataset (CSV)", visible=True)
             # Load feed functionality
             def process_and_download(url, job_tag):
-                summary, df_processed, csv_file, preview = feed_reader.process_feed(url, job_tag)
-                return summary, preview, csv_file
             load_btn.click(
                 process_and_download,
                 inputs=[url_input, job_tag_input],
-                outputs=[summary_output, preview_output, csv_download]
             )
         with gr.Tab("🔍 Filter Data"):
             with gr.Row():
                 with gr.Column():
-                    filter_column = gr.Textbox(
-                        label="Column Name",
-                        placeholder="e.g., clientname, title, category"
                     )
-                    filter_value = gr.Textbox(
                         label="Filter Value",
-                        placeholder="Value to search for"
                     )
                     filter_btn = gr.Button("🔍 Filter", variant="primary")
@@ -315,36 +563,288 @@ def create_gradio_app():
                     filter_summary = gr.Markdown(label="Filter Results")
             with gr.Row():
-                filtered_csv = gr.File(label="📥 Download Filtered Data (CSV)", visible=False)
             # Filter functionality
             def filter_and_download(column_name, filter_value):
                 summary, df_filtered, csv_file = feed_reader.filter_by_column(column_name, filter_value)
-                return summary, csv_file
             filter_btn.click(
                 filter_and_download,
-                inputs=[filter_column, filter_value],
-                outputs=[filter_summary, filtered_csv]
             )
         with gr.Tab("📊 Statistics"):
-            with gr.Column():
-                stats_btn = gr.Button("📊 Generate Column Statistics", variant="primary")
-                stats_output = gr.Dataframe(label="Column Statistics")
-            # Statistics functionality
-            stats_btn.click(
                 feed_reader.get_column_stats,
-                outputs=[stats_output]
             )
         gr.Markdown("""
         ---
         ### 📝 Instructions:
         1. **Load Feed**: Enter a URL pointing to an XML or JSON feed and click "Load Feed"
-        2. **Filter Data**: Use column names to filter the loaded data
         3. **Statistics**: View detailed statistics about each column in your dataset
         4. **Download**: CSV files are automatically generated for download
@@ -356,9 +856,10 @@ def create_gradio_app():
         **Features:**
         - Automatic format detection
         - Data cleaning and validation
-        - Column-based filtering
         - Statistical analysis
         - CSV export functionality
         """)
     return app

+import folium
+from folium.plugins import MarkerCluster
+from geopy.geocoders import Nominatim
+from functools import lru_cache
 import pandas as pd
 import requests
 import xml.etree.ElementTree as ET
 import datetime
 import gradio as gr
 import os
+import tempfile
+import pytz
+geolocator = Nominatim(user_agent="feed_reader_app")
+@lru_cache(maxsize=10000)
+def geocode_cached(query):
+    try:
+        return geolocator.geocode(query, timeout=10)
+    except Exception:
+        return None
 class FeedReader:
     def __init__(self):
     def process_feed(self, url, job_tag="job"):
         """Main function to process feed and return results"""
         if not url.strip():
+            return "Please enter a valid URL", None, "", "", []
         # Load the feed
         result = self.load_feed_to_dataframe(url.strip(), job_tag.strip())
         if isinstance(result, tuple):
             df, message = result
             if df.empty:
+                return f"Error: {message}", None, "", "", []
         else:
             df = result
             message = "Success"
 📊 **Feed Processing Results**
 ✅ **Status:** {message}
+📋 **Rows:** {df_processed.shape[0]:,}
+📝 **Columns:** {df_processed.shape[1]}
         """
+        # Create metadata dataframe
+        metadata_df = pd.DataFrame({
+            'Column Name': df_processed.columns.tolist(),
+            'Data Type': [str(df_processed[col].dtype) for col in df_processed.columns],
+            'Unique Values': [df_processed[col].nunique() for col in df_processed.columns],
+            'Null Values': [df_processed[col].isnull().sum() for col in df_processed.columns]
+        })
+        # Get column choices for filter tab
+        column_choices = df_processed.columns.tolist()
+        return summary, df_processed, self.generate_csv(df_processed, "feed"), self.get_preview(df_processed), column_choices, metadata_df
+    def get_column_unique_values(self, column_name):
+        """Get unique values for a specific column"""
+        if self.df is None:
+            return []
+        if column_name not in self.df.columns:
+            return []
+        # Get unique values and convert to string, sort them
+        unique_values = self.df[column_name].dropna().astype(str).unique()
+        unique_values = sorted([str(val) for val in unique_values if str(val) != 'nan'])
+        # Add "All" option at the beginning
+        return ["All"] + unique_values
     def filter_by_column(self, column_name, filter_value):
         """Filter dataframe by column value"""
             actual_column = matching_columns[0]
+            # If "All" is selected, return the entire dataframe
+            if filter_value == "All":
+                filtered_df = self.df.copy()
+                filtered_df = filtered_df.fillna(0).infer_objects(copy=False)
+                # Truncate long columns for display only
+                display_df = self.truncate_display_columns(filtered_df.copy())
+                summary = f"""
+🔍 **Filtered Results**
+📋 **Total Rows:** {filtered_df.shape[0]:,}
+🎯 **Filter:** Showing all records from column '{actual_column}'
+                """
+                return summary, display_df, self.generate_csv(filtered_df, f"all_{actual_column}")
+            # Filter the dataframe for specific value
             if self.df[actual_column].dtype == 'object':  # String column
+                # Exact match for dropdown selection
+                filtered_df = self.df[self.df[actual_column].astype(str) == str(filter_value)]
             else:  # Numeric column
                 try:
                     filter_val_numeric = float(filter_value)
                     filtered_df = self.df[self.df[actual_column] == filter_val_numeric]
                 except ValueError:
+                    filtered_df = self.df[self.df[actual_column].astype(str) == str(filter_value)]
             if filtered_df.empty:
+                return f"No records found matching '{filter_value}' in column '{actual_column}'", pd.DataFrame(), ""
             filtered_df = filtered_df.fillna(0).infer_objects(copy=False)
+            # Truncate long columns for display only
+            display_df = self.truncate_display_columns(filtered_df.copy())
             summary = f"""
 🔍 **Filtered Results**
 📋 **Matching Rows:** {filtered_df.shape[0]:,}
+🎯 **Filter:** {actual_column} = '{filter_value}'
             """
+            return summary, display_df, self.generate_csv(filtered_df, f"filtered_{filter_value}")
         except Exception as e:
+            return f"Error filtering data: {str(e)}", pd.DataFrame(), ""
+    def truncate_display_columns(self, df):
+        """Truncate long columns for better display in DataFrames"""
+        display_df = df.copy()
+        # Define columns that typically have long content
+        long_content_columns = ['url', 'description', 'link', 'content', 'summary', 'text']
+        for col in display_df.select_dtypes(include=['object']).columns:
+            # Apply more aggressive truncation to known long columns
+            if any(long_col in col.lower() for long_col in long_content_columns):
+                display_df[col] = display_df[col].astype(str).apply(
+                    lambda x: x[:30] + '...' if len(str(x)) > 30 else x
+                )
+            else:
+                # Standard truncation for other text columns
+                display_df[col] = display_df[col].astype(str).apply(
+                    lambda x: x[:50] + '...' if len(str(x)) > 50 else x
+                )
+        return display_df
     def get_column_stats(self):
         """Get statistics for each column"""
         except Exception as e:
             return f"Error generating statistics: {str(e)}"
+    def calcular_ponderados(self, df):
+        """Función para calcular medias ponderadas"""
+        total_count = df["count"].sum()
+        mean_cpa = (df["cpa_goal"] * df["count"]).sum() / total_count if total_count > 0 else 0
+        mean_sponsored = (df["sponsored"] * df["count"]).sum() / total_count if total_count > 0 else 0
+        min_cpc = (df["sponsored"]).min()
+        max_cpc = (df["sponsored"]).max()
+        min_cpa = (df["cpa_goal"]).min()
+        max_cpa = (df["cpa_goal"]).max()
+        # Obtener la hora actual en PST
+        pacific_tz = pytz.timezone("America/Los_Angeles")
+        now_pst = datetime.datetime.now(pytz.utc).astimezone(pacific_tz)
+        return pd.Series({
+            "total_jobs": int(total_count),
+            "mean_cpa_goal": round(mean_cpa,2),
+            "mean_cpc": round(mean_sponsored,2),
+            "target_cvr": round((mean_sponsored/mean_cpa)*100,2) if mean_cpa > 0 else 0,
+            "min_cpc": round(min_cpc,2),
+            "max_cpc": round(max_cpc,2),
+            "min_cpa": round(min_cpa,2),
+            "max_cpa": round(max_cpa,2),
+            "last_update": now_pst.strftime("%Y-%m-%d %H:%M:%S %Z")
+        })
+    def get_weighted_stats_by_group(self, group_column, reference_col=None, cpa_col=None, cpc_col=None):
+        """Get weighted statistics grouped by specified column with flexible column selection"""
+        if self.df is None:
+            return pd.DataFrame(), "Please load a feed first"
+        # Check if group column exists
+        if group_column not in self.df.columns:
+            available_columns = [col for col in self.df.columns if col != 'last_update']
+            return pd.DataFrame(), f"Column '{group_column}' not found. Available columns: {', '.join(available_columns)}"
+        # Check if selected columns exist
+        selected_columns = [col for col in [reference_col, cpa_col, cpc_col] if col is not None]
+        missing_columns = [col for col in selected_columns if col not in self.df.columns]
+        if missing_columns:
+            available_columns = list(self.df.columns)
+            return pd.DataFrame(), f"Missing selected columns: {', '.join(missing_columns)}. Available columns: {', '.join(available_columns)}"
+        try:
+            def calculate_group_stats(group_df):
+                results = {}
+                # Always calculate total postings
+                results["total_postings"] = int(len(group_df))
+                # Calculate unique references if reference column is provided
+                if reference_col:
+                    results["unique_references"] = int(group_df[reference_col].nunique())
+                # Calculate CPA statistics if CPA column is provided
+                if cpa_col:
+                    cpa_series = pd.to_numeric(group_df[cpa_col], errors='coerce')
+                    results["mean_cpa_goal"] = round(cpa_series.mean(), 2) if not cpa_series.isna().all() else 0
+                    results["min_cpa"] = round(cpa_series.min(), 2) if not cpa_series.isna().all() else 0
+                    results["max_cpa"] = round(cpa_series.max(), 2) if not cpa_series.isna().all() else 0
+                # Calculate CPC/Payout statistics if CPC column is provided
+                if cpc_col:
+                    cpc_series = pd.to_numeric(group_df[cpc_col], errors='coerce')
+                    results["mean_payouts"] = round(cpc_series.mean(), 2) if not cpc_series.isna().all() else 0
+                    results["min_payouts"] = round(cpc_series.min(), 2) if not cpc_series.isna().all() else 0
+                    results["max_payouts"] = round(cpc_series.max(), 2) if not cpc_series.isna().all() else 0
+                # Calculate Target CVR if both CPA and CPC columns are provided
+                if cpa_col and cpc_col:
+                    mean_cpa = results.get("mean_cpa_goal", 0)
+                    mean_payouts = results.get("mean_payouts", 0)
+                    if mean_cpa > 0 and mean_payouts > 0:
+                        results["target_cvr"] = round((mean_payouts/mean_cpa)*100, 2)
+                    else:
+                        results["target_cvr"] = 0
+                # Get current time in PST
+                pacific_tz = pytz.timezone("America/Los_Angeles")
+                now_pst = datetime.datetime.now(pytz.utc).astimezone(pacific_tz)
+                results["last_update"] = now_pst.strftime("%Y-%m-%d %H:%M:%S %Z")
+                return pd.Series(results)
+            # Group by selected column and apply calculations
+            grouped_stats = self.df.groupby(group_column).apply(calculate_group_stats).reset_index()
+            # Sort by most relevant metric
+            if "unique_references" in grouped_stats.columns:
+                grouped_stats = grouped_stats.sort_values('unique_references', ascending=False)
+            else:
+                grouped_stats = grouped_stats.sort_values('total_postings', ascending=False)
+            return grouped_stats, "Success"
+        except Exception as e:
+            return pd.DataFrame(), f"Error calculating weighted statistics: {str(e)}"
     def generate_csv(self, df, filename_prefix="feed"):
+        """Generate CSV file for download with fixed filename"""
         if df is None or df.empty:
             return None
+        # Create a temporary file with the exact name we want
+        temp_file = tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False, prefix='')
+        temp_file.close()  # Close to get the filename
+        # Rename the file to what we want
+        import shutil
+        final_filename = temp_file.name.replace(os.path.basename(temp_file.name), f"{filename_prefix}.csv")
+        # Save CSV with the desired name
+        df.to_csv(final_filename, index=False)
+        return final_filename
     def get_preview(self, df, max_rows=10):
+        """Get a preview of the dataframe as a Gradio DataFrame component with truncated long columns"""
         if df is None or df.empty:
+            return None
         # Limit the preview to avoid overwhelming display
+        preview_df = df.head(max_rows).copy()
         # Truncate long string values for better display
         for col in preview_df.select_dtypes(include=['object']).columns:
+            preview_df[col] = preview_df[col].astype(str).apply(
+                lambda x: x[:50] + '...' if len(str(x)) > 50 else x
+            )
+        return preview_df
+    def generate_map(self, city_col, state_col=None, country_col=None, max_points=500):
+        if self.df is None or self.df.empty:
+            return None, "⚠️ Please load a feed first"
+        if city_col not in self.df.columns:
+            return None, f"⚠️ Column '{city_col}' not found in dataset"
+        m = folium.Map(location=[20, 0], zoom_start=2)
+        marker_cluster = MarkerCluster().add_to(m)
+        count = 0
+        for _, row in self.df.iterrows():
+            if count >= max_points:
+                break
+            city = str(row[city_col]) if city_col else ""
+            state = str(row[state_col]) if state_col and state_col in self.df.columns else ""
+            country = str(row[country_col]) if country_col and country_col in self.df.columns else ""
+            query = ", ".join([p for p in [city, state, country] if p])
+            if not query.strip():
+                continue
+            location = geocode_cached(query)
+            if location:
+                folium.Marker(
+                    location=[location.latitude, location.longitude],
+                    popup=query
+                ).add_to(marker_cluster)
+                count += 1
+        return m._repr_html_(), f"✅ Mapped {count} locations"
 # Initialize the feed reader
 feed_reader = FeedReader()
 # Create Gradio interface
 def create_gradio_app():
     with gr.Blocks(title="Feed Reader & Analyzer", theme=gr.themes.Soft()) as app:
+        # Header with theme toggle
+        with gr.Row():
+            with gr.Column(scale=4):
+                gr.Markdown("""
+                # 📡 Feed Reader & Analyzer
+                Load and analyze XML or JSON feeds from URLs. Supports compressed files (.gz) and various data formats.
+                """)
+        # Theme state
+        is_dark_theme = gr.State(False)
+        # CSS output for theme switching
+        theme_css = gr.HTML()
         with gr.Tab("📥 Load Feed"):
             with gr.Row():
                 with gr.Column():
                     )
                     load_btn = gr.Button("🔄 Load Feed", variant="primary")
+            with gr.Row():
                 with gr.Column():
                     summary_output = gr.Markdown(label="Summary")
+                with gr.Column():
+                    metadata_output = gr.Dataframe(
+                        label="📊 Columns Metadata",
+                        visible=True,
+                        interactive=False,
+                        wrap=False
+                    )
             with gr.Row():
+                preview_dataframe = gr.Dataframe(
+                    label="Data Preview",
+                    visible=True,
+                    interactive=False,
+                    wrap=False,  # Keep rows small
+                    row_count=(1, "dynamic")  # Dynamic row configuration
+                )
             with gr.Row():
                 csv_download = gr.File(label="📥 Download Full Dataset (CSV)", visible=True)
+            # Variable para almacenar las opciones de columnas
+            column_choices_state = gr.State([])
             # Load feed functionality
             def process_and_download(url, job_tag):
+                summary, df_processed, csv_file, preview_df, column_choices, metadata_df = feed_reader.process_feed(url, job_tag)
+                return summary, metadata_df, preview_df, csv_file, column_choices
             load_btn.click(
                 process_and_download,
                 inputs=[url_input, job_tag_input],
+                outputs=[summary_output, metadata_output, preview_dataframe, csv_download, column_choices_state]
             )
         with gr.Tab("🔍 Filter Data"):
             with gr.Row():
                 with gr.Column():
+                    # Botones de columnas (inicialmente vacío)
+                    columns_radio = gr.Radio(
+                        label="Select Column",
+                        choices=[],
+                        value=None
                     )
+                    # Dropdown para los valores de filtro
+                    filter_value_dropdown = gr.Dropdown(
                         label="Filter Value",
+                        choices=[],
+                        value=None,
+                        interactive=True
                     )
                     filter_btn = gr.Button("🔍 Filter", variant="primary")
                     filter_summary = gr.Markdown(label="Filter Results")
             with gr.Row():
+                filtered_dataframe = gr.Dataframe(
+                    label="Filtered Data",
+                    visible=True,
+                    interactive=False,
+                    wrap=False,  # Disable text wrapping to keep rows small
+                    row_count=(1, "dynamic")  # Allow dynamic rows
+                )
+            with gr.Row():
+                filtered_csv = gr.File(label="📥 Download Filtered Data (CSV)", visible=True)
+            # Función para actualizar las opciones de columnas
+            def update_column_choices(column_choices):
+                return gr.Radio(choices=column_choices, value=None if not column_choices else column_choices[0])
+            # Función para actualizar los valores del dropdown cuando se selecciona una columna
+            def update_filter_values(selected_column):
+                if not selected_column or feed_reader.df is None:
+                    return gr.Dropdown(choices=[], value=None)
+                unique_values = feed_reader.get_column_unique_values(selected_column)
+                return gr.Dropdown(
+                    choices=unique_values,
+                    value="All" if unique_values else None
+                )
+            # Actualizar las opciones cuando se carga un feed
+            column_choices_state.change(
+                update_column_choices,
+                inputs=[column_choices_state],
+                outputs=[columns_radio]
+            )
+            # Actualizar los valores del dropdown cuando se selecciona una columna
+            columns_radio.change(
+                update_filter_values,
+                inputs=[columns_radio],
+                outputs=[filter_value_dropdown]
+            )
             # Filter functionality
             def filter_and_download(column_name, filter_value):
                 summary, df_filtered, csv_file = feed_reader.filter_by_column(column_name, filter_value)
+                if df_filtered is not None:
+                    # Show both summary and dataframe
+                    return summary, df_filtered, csv_file
+                else:
+                    # Show error and empty dataframe
+                    return summary, pd.DataFrame(), None
             filter_btn.click(
                 filter_and_download,
+                inputs=[columns_radio, filter_value_dropdown],
+                outputs=[filter_summary, filtered_dataframe, filtered_csv]
             )
         with gr.Tab("📊 Statistics"):
+            with gr.Row():
+                with gr.Column():
+                    gr.Markdown("### 📋 Basic Column Statistics")
+                    basic_stats_btn = gr.Button("📊 Generate Column Statistics", variant="primary")
+                    basic_stats_output = gr.Dataframe(label="Column Statistics")
+                with gr.Column():
+                    gr.Markdown("### 🎯 Weighted Statistics by Group")
+                    # Group selection for weighted stats
+                    stats_group_column = gr.Radio(
+                        label="Group By Column (company, client, etc.)",
+                        choices=[],
+                        value=None
+                    )
+                    # Column mapping for weighted calculations
+                    with gr.Row():
+                        reference_column = gr.Dropdown(
+                            label="Reference ID Column",
+                            choices=[],
+                            value=None
+                        )
+                        cpa_column = gr.Dropdown(
+                            label="CPA Goal Column",
+                            choices=[],
+                            value=None
+                        )
+                    with gr.Row():
+                        cpc_column = gr.Dropdown(
+                            label="Payouts: CPC/CPA Columns",
+                            choices=[],
+                            value=None
+                        )
+                    weighted_stats_btn = gr.Button("🧮 Calculate Weighted Statistics", variant="secondary")
+                    weighted_stats_summary = gr.Markdown(label="Weighted Stats Summary")
+            with gr.Row():
+                weighted_stats_output = gr.Dataframe(
+                    label="📈 Weighted Statistics by Group",
+                    visible=True,
+                    interactive=False,
+                    wrap=False
+                )
+            with gr.Row():
+                weighted_stats_csv = gr.File(label="📥 Download Weighted Statistics (CSV)", visible=True)
+            # Update all column choices when data is loaded
+            def update_all_stats_choices(column_choices):
+                # Filter out timestamp columns for grouping
+                exclude_columns = ['last_update']
+                grouping_choices = [col for col in column_choices if col not in exclude_columns]
+                # All columns available for metric selection with "None" option
+                metric_choices = ["None"] + column_choices
+                # Try to auto-detect common column names
+                reference_default = "None"
+                cpa_default = "None"
+                cpc_default = "None"
+                for col in column_choices:
+                    col_lower = col.lower()
+                    if 'reference' in col_lower or 'req' in col_lower or col_lower == 'referencenumber':
+                        reference_default = col
+                    elif 'cpa' in col_lower or 'goal' in col_lower:
+                        cpa_default = col
+                    elif 'cpc' in col_lower or 'sponsored' in col_lower or 'cost' in col_lower or 'payout' in col_lower:
+                        cpc_default = col
+                return (
+                    gr.Radio(choices=grouping_choices, value=grouping_choices[0] if grouping_choices else None),
+                    gr.Dropdown(choices=metric_choices, value=reference_default),
+                    gr.Dropdown(choices=metric_choices, value=cpa_default),
+                    gr.Dropdown(choices=metric_choices, value=cpc_default)
+                )
+            # Update all dropdown options when feed is loaded
+            column_choices_state.change(
+                update_all_stats_choices,
+                inputs=[column_choices_state],
+                outputs=[stats_group_column, reference_column, cpa_column, cpc_column]
+            )
+            # Basic statistics functionality
+            basic_stats_btn.click(
                 feed_reader.get_column_stats,
+                outputs=[basic_stats_output]
+            )
+            # Weighted statistics functionality
+            def calculate_weighted_stats(group_column, reference_col, cpa_col, cpc_col):
+                if not group_column:
+                    return "Please select a grouping column", None, None
+                # Handle "None" selections
+                reference_col = None if reference_col == "None" else reference_col
+                cpa_col = None if cpa_col == "None" else cpa_col
+                cpc_col = None if cpc_col == "None" else cpc_col
+                # At least one of the metric columns should be selected
+                if not reference_col and not cpa_col and not cpc_col:
+                    return "Please select at least one metric column (Reference ID, CPA Goal, or Payouts)", None, None
+                weighted_df, message = feed_reader.get_weighted_stats_by_group(group_column, reference_col, cpa_col, cpc_col)
+                if not weighted_df.empty:
+                    metrics_used = []
+                    if reference_col: metrics_used.append(f"Reference: {reference_col}")
+                    if cpa_col: metrics_used.append(f"CPA: {cpa_col}")
+                    if cpc_col: metrics_used.append(f"Payouts: {cpc_col}")
+                    summary = f"""
+🎯 **Weighted Statistics Results**
+✅ **Status:** {message}
+📊 **Groups:** {len(weighted_df)}
+🔢 **Grouped by:** {group_column}
+📈 **Metrics Used:** {' | '.join(metrics_used)}
+📊 **Available Metrics:**
+• **Unique References**: Count of unique IDs per group (if Reference ID selected)
+• **Total Postings**: Total rows/postings per group
+• **Mean CPA/Payouts**: Average values across all postings (if columns selected)
+• **Target CVR**: (Mean Payouts / Mean CPA) × 100 (if both selected)
+• **Min/Max Ranges**: Minimum and maximum values per group
+💡 **Note:** Only metrics with selected columns will be calculated and displayed.
+                    """
+                    csv_file = feed_reader.generate_csv(weighted_df, f"weighted_stats_{group_column}")
+                    return summary, weighted_df, csv_file
+                else:
+                    return f"❌ **Error:** {message}", None, None
+            weighted_stats_btn.click(
+                calculate_weighted_stats,
+                inputs=[stats_group_column, reference_column, cpa_column, cpc_column],
+                outputs=[weighted_stats_summary, weighted_stats_output, weighted_stats_csv]
             )
+        with gr.Tab("🌍 Map"):
+            with gr.Row():
+                with gr.Column():
+                    gr.Markdown("### Select Columns for Mapping")
+                    city_col = gr.Dropdown(label="City Column", choices=[], value=None)
+                    state_col = gr.Dropdown(label="State Column (optional)", choices=[], value=None)
+                    country_col = gr.Dropdown(label="Country Column (optional)", choices=[], value=None)
+                    map_btn = gr.Button("🗺️ Generate Map", variant="primary")
+                with gr.Column():
+                    map_status = gr.Markdown()
+                    map_output = gr.HTML()
+            # Actualizar dropdowns cuando se cargue un feed
+            def update_map_choices(column_choices):
+                if not column_choices:
+                    return (
+                        gr.Dropdown.update(choices=[]),
+                        gr.Dropdown.update(choices=[]),
+                        gr.Dropdown.update(choices=[])
+                    )
+                return (
+                    gr.Dropdown.update(choices=column_choices, value=column_choices[0]),
+                    gr.Dropdown.update(choices=["None"] + column_choices, value="None"),
+                    gr.Dropdown.update(choices=["None"] + column_choices, value="None")
+                )
+            column_choices_state.change(
+                update_map_choices,
+                inputs=[column_choices_state],
+                outputs=[city_col, state_col, country_col]
+            )
+            # Generar mapa desde feed_reader
+            def generate_map_handler(city_col, state_col, country_col):
+                state_col = None if state_col == "None" else state_col
+                country_col = None if country_col == "None" else country_col
+                map_html, msg = feed_reader.generate_map(city_col, state_col, country_col)
+                return msg, map_html
+            map_btn.click(
+                generate_map_handler,
+                inputs=[city_col, state_col, country_col],
+                outputs=[map_status, map_output]
+            )
+            # Actualizar dropdowns cuando se cargue un feed
+            def update_map_choices(column_choices):
+                return (
+                    gr.Dropdown(choices=column_choices, value=None),
+                    gr.Dropdown(choices=["None"] + column_choices, value="None"),
+                    gr.Dropdown(choices=["None"] + column_choices, value="None")
+                )
+            column_choices_state.change(
+                update_map_choices,
+                inputs=[column_choices_state],
+                outputs=[city_col, state_col, country_col]
+            )
+            # Función para generar mapa
+            def generate_map(city_col, state_col, country_col):
+                state_col = None if state_col == "None" else state_col
+                country_col = None if country_col == "None" else country_col
+                map_html, msg = feed_reader.generate_map(city_col, state_col, country_col)
+                return msg, map_html
+            map_btn.click(
+                generate_map,
+                inputs=[city_col, state_col, country_col],
+                outputs=[map_status, map_output]
+            )
         gr.Markdown("""
         ---
         ### 📝 Instructions:
         1. **Load Feed**: Enter a URL pointing to an XML or JSON feed and click "Load Feed"
+        2. **Filter Data**: Select a column from the radio buttons and enter a filter value
         3. **Statistics**: View detailed statistics about each column in your dataset
         4. **Download**: CSV files are automatically generated for download
         **Features:**
         - Automatic format detection
         - Data cleaning and validation
+        - Dynamic column-based filtering with dropdown values
         - Statistical analysis
         - CSV export functionality
+        - Resizable dataframe columns (drag column borders to resize)
         """)
     return app