Spaces:

valory
/

Modius-Agent-Performance

Sleeping

App Files Files Community

gauravlochab commited on May 14, 2025

Commit

398c34c

1 Parent(s): b028096

chore: missing adjusted apr data

Browse files

Files changed (1) hide show

app.py +480 -15

app.py CHANGED Viewed

@@ -267,12 +267,158 @@ def fetch_apr_data_from_db():
                     # Log that we're skipping zero or -100 values
                     logger.debug(f"Skipping value for agent {agent_name} ({attr['agent_id']}): {apr_data['apr']} (zero or -100)")
-        # Convert list of dictionaries to DataFrame
         if not apr_data_list:
             logger.error("No valid APR data extracted")
             global_df = pd.DataFrame([])
             return global_df
         global_df = pd.DataFrame(apr_data_list)
         # Log the resulting dataframe
@@ -291,9 +437,7 @@ def fetch_apr_data_from_db():
                 avg_diff = (valid_rows['apr'] - valid_rows['adjusted_apr']).mean()
                 max_diff = (valid_rows['apr'] - valid_rows['adjusted_apr']).max()
                 min_diff = (valid_rows['apr'] - valid_rows['adjusted_apr']).min()
-                logger.info(f"APR vs Adjusted APR difference: avg={avg_diff:.2f}, max={max_diff:.2f}, min={min_diff:.2f}")
-        else:
-            logger.info("No adjusted APR values found in the data")
         # All values are APR type (excluding zero and -100 values)
         logger.info("All values are APR type (excluding zero and -100 values)")
@@ -304,6 +448,10 @@ def fetch_apr_data_from_db():
         for idx, row in global_df.iterrows():
             logger.debug(f"Row {idx}: {row.to_dict()}")
         return global_df
     except requests.exceptions.RequestException as e:
@@ -312,10 +460,146 @@ def fetch_apr_data_from_db():
         return global_df
     except Exception as e:
         logger.error(f"Error fetching APR data: {e}")
-        logger.exception("Exception details:")
         global_df = pd.DataFrame([])
         return global_df
 def generate_apr_visualizations():
     """Generate APR visualizations with real data only (no dummy data)"""
     global global_df
@@ -650,6 +934,9 @@ def create_combined_time_series_graph(df):
     avg_apr_data_with_ma['moving_avg'] = None  # 3-day window for APR
     avg_apr_data_with_ma['adjusted_moving_avg'] = None  # 3-day window for adjusted APR
     # Calculate the moving averages for each timestamp
     for i, row in avg_apr_data_with_ma.iterrows():
         current_time = row['timestamp']
@@ -667,9 +954,18 @@ def create_combined_time_series_graph(df):
             logger.debug(f"APR time window {window_start} to {current_time}: {len(window_data)} points, avg={window_data['apr'].mean()}")
             # Calculate adjusted APR moving average if data exists
-            if 'adjusted_apr' in window_data.columns and window_data['adjusted_apr'].notna().any():
-                avg_apr_data_with_ma.at[i, 'adjusted_moving_avg'] = window_data['adjusted_apr'].mean()
-                logger.debug(f"Adjusted APR time window {window_start} to {current_time}: {len(window_data)} points, avg={window_data['adjusted_apr'].mean()}")
         else:
             # If no data points in the window, use the current value
             avg_apr_data_with_ma.at[i, 'moving_avg'] = row['apr']
@@ -776,11 +1072,18 @@ def create_combined_time_series_graph(df):
         # Add adjusted APR moving average line if it exists
         if 'adjusted_moving_avg' in avg_apr_data_with_ma.columns and avg_apr_data_with_ma['adjusted_moving_avg'].notna().any():
-            y_values_adj_ma = avg_apr_data_with_ma['adjusted_moving_avg'].tolist()
             # Create hover template for the adjusted APR moving average line
             hover_data_adj = []
-            for idx, row in avg_apr_data_with_ma.iterrows():
                 timestamp = row['timestamp']
                 if pd.notna(row['adjusted_moving_avg']):
                     hover_data_adj.append(
@@ -793,7 +1096,7 @@ def create_combined_time_series_graph(df):
             fig.add_trace(
                 go.Scatter(
-                    x=x_values_ma,
                     y=y_values_adj_ma,
                     mode='lines',  # Only lines for moving average
                     line=dict(color='green', width=4),  # Thicker solid line for adjusted APR
@@ -803,7 +1106,9 @@ def create_combined_time_series_graph(df):
                     visible=True  # Visible by default
                 )
             )
-            logger.info(f"Added 3-day moving average Adjusted APR trace with {len(x_values_ma)} points")
         # Removed cumulative APR as requested
         logger.info("Cumulative APR graph line has been removed as requested")
@@ -1728,10 +2033,43 @@ def dashboard():
                 # Function to update the graph without parameters (for refresh button)
                 def refresh_graph():
-                    return update_apr_graph(apr_toggle.value, adjusted_apr_toggle.value)
-                # Set up the button click event
-                refresh_btn.click(fn=refresh_graph, inputs=None, outputs=[combined_graph])
                 # Set up the toggle switch events
                 apr_toggle.change(
@@ -1751,3 +2089,130 @@ def dashboard():
 # Launch the dashboard
 if __name__ == "__main__":
     dashboard().launch()

                     # Log that we're skipping zero or -100 values
                     logger.debug(f"Skipping value for agent {agent_name} ({attr['agent_id']}): {apr_data['apr']} (zero or -100)")
+        logger.info(f"Extracted {len(apr_data_list)} valid APR data points")
+        # Added debug for adjusted APR data after May 10th
+        may_10_2025 = datetime(2025, 5, 10)
+        after_may_10 = [d for d in apr_data_list if d['timestamp'] >= may_10_2025]
+        with_adjusted_after_may_10 = [d for d in after_may_10 if d['adjusted_apr'] is not None]
+        logger.info(f"Data points after May 10th, 2025: {len(after_may_10)}")
+        logger.info(f"Data points with adjusted_apr after May 10th, 2025: {len(with_adjusted_after_may_10)}")
+        # Log detailed information about when data began
+        first_adjusted = None
+        if with_adjusted_after_may_10:
+            first_adjusted_after = min(with_adjusted_after_may_10, key=lambda x: x['timestamp'])
+            logger.info(f"First adjusted_apr after May 10th: {first_adjusted_after['timestamp']} (Agent: {first_adjusted_after['agent_id']})")
+        # Check all data for first adjusted_apr
+        all_with_adjusted = [d for d in apr_data_list if d['adjusted_apr'] is not None]
+        if all_with_adjusted:
+            first_adjusted = min(all_with_adjusted, key=lambda x: x['timestamp'])
+            logger.info(f"First adjusted_apr ever: {first_adjusted['timestamp']} (Agent: {first_adjusted['agent_id']})")
+            last_adjusted = max(all_with_adjusted, key=lambda x: x['timestamp'])
+            logger.info(f"Last adjusted_apr ever: {last_adjusted['timestamp']} (Agent: {last_adjusted['agent_id']})")
+            # Calculate overall coverage
+            adjusted_ratio = len(all_with_adjusted) / len(apr_data_list) * 100
+            logger.info(f"Overall adjusted_apr coverage: {adjusted_ratio:.2f}% ({len(all_with_adjusted)}/{len(apr_data_list)} records)")
+            # Log per-agent adjusted APR statistics
+            agent_stats = {}
+            for record in apr_data_list:
+                agent_id = record['agent_id']
+                has_adjusted = record['adjusted_apr'] is not None
+                if agent_id not in agent_stats:
+                    agent_stats[agent_id] = {'total': 0, 'adjusted': 0}
+                agent_stats[agent_id]['total'] += 1
+                if has_adjusted:
+                    agent_stats[agent_id]['adjusted'] += 1
+            # Log stats for agents with meaningful data
+            for agent_id, stats in agent_stats.items():
+                if stats['total'] > 0:
+                    coverage = (stats['adjusted'] / stats['total']) * 100
+                    if coverage > 0:  # Only log agents that have at least some adjusted data
+                        logger.info(f"Agent {agent_id}: {coverage:.2f}% adjusted coverage ({stats['adjusted']}/{stats['total']} records)")
+            # Check for gaps in adjusted APR data
+            for agent_id in agent_stats:
+                # Get all records for this agent
+                agent_records = [r for r in apr_data_list if r['agent_id'] == agent_id]
+                # Sort by timestamp
+                agent_records.sort(key=lambda x: x['timestamp'])
+                # Find where adjusted APR starts and if there are gaps
+                has_adjusted = False
+                gap_count = 0
+                streak_length = 0
+                for record in agent_records:
+                    if record['adjusted_apr'] is not None:
+                        if not has_adjusted:
+                            has_adjusted = True
+                            logger.info(f"Agent {agent_id}: First adjusted APR at {record['timestamp']}")
+                        streak_length += 1
+                    elif has_adjusted:
+                        # We had adjusted data but now it's missing
+                        gap_count += 1
+                        if streak_length > 0:
+                            logger.warning(f"Agent {agent_id}: Gap in adjusted APR data after {streak_length} consecutive records")
+                            streak_length = 0
+                if gap_count > 0:
+                    logger.warning(f"Agent {agent_id}: Found {gap_count} gaps in adjusted APR data")
+                elif has_adjusted:
+                    logger.info(f"Agent {agent_id}: Continuous adjusted APR data with no gaps")
+            # Provide summary statistics
+            agents_with_data = sum(1 for stats in agent_stats.values() if stats['adjusted'] > 0)
+            agents_with_gaps = sum(1 for agent_id in agent_stats if
+                any(apr_data_list[i]['agent_id'] == agent_id and apr_data_list[i]['adjusted_apr'] is not None and
+                   i+1 < len(apr_data_list) and apr_data_list[i+1]['agent_id'] == agent_id and
+                   apr_data_list[i+1]['adjusted_apr'] is None
+                   for i in range(len(apr_data_list)-1)))
+            logger.info(f"ADJUSTED APR SUMMARY: {agents_with_data}/{len(agent_stats)} agents have adjusted APR data")
+            if agents_with_gaps > 0:
+                logger.warning(f"ATTENTION: {agents_with_gaps} agents have gaps in their adjusted APR data")
+                logger.warning("These gaps may cause discontinuities in the adjusted APR graph")
+            else:
+                logger.info("No gaps detected in adjusted APR data - graph should be continuous")
+        if len(with_adjusted_after_may_10) == 0 and len(after_may_10) > 0:
+            logger.warning("No adjusted_apr values found after May 10th, 2025 despite having APR data")
+            # Log agent IDs with missing adjusted_apr after May 10th
+            agents_after_may_10 = set(d['agent_id'] for d in after_may_10)
+            logger.info(f"Agents with data after May 10th: {agents_after_may_10}")
+            # Check these same agents before May 10th
+            before_may_10 = [d for d in apr_data_list if d['timestamp'] < may_10_2025]
+            agents_with_adjusted_before = {d['agent_id'] for d in before_may_10 if d['adjusted_apr'] is not None}
+            # Agents that had adjusted_apr before but not after
+            missing_adjusted = agents_with_adjusted_before.intersection(agents_after_may_10)
+            if missing_adjusted:
+                logger.warning(f"Agents that had adjusted_apr before May 10th but not after: {missing_adjusted}")
+                # Find the last valid adjusted_apr date for these agents
+                for agent_id in missing_adjusted:
+                    agent_data = [d for d in before_may_10 if d['agent_id'] == agent_id and d['adjusted_apr'] is not None]
+                    if agent_data:
+                        last_entry = max(agent_data, key=lambda d: d['timestamp'])
+                        logger.info(f"Agent {agent_id}: Last adjusted_apr on {last_entry['timestamp']} with value {last_entry['adjusted_apr']}")
+                        # Look at the first entry after the cutoff without adjusted_apr
+                        agent_after = [d for d in after_may_10 if d['agent_id'] == agent_id]
+                        if agent_after:
+                            first_after = min(agent_after, key=lambda d: d['timestamp'])
+                            logger.info(f"Agent {agent_id}: First entry after cutoff on {first_after['timestamp']} missing adjusted_apr")
+                            # If the agent data has the 'adjusted_apr_key' field, log that info
+                            if 'adjusted_apr_key' in first_after:
+                                logger.info(f"Agent {agent_id}: Key used for adjusted_apr: {first_after['adjusted_apr_key']}")
+        # Add debug logic to check for any adjusted_apr after May 10th and which agents have it
+        elif len(with_adjusted_after_may_10) > 0:
+            logger.info("Found adjusted_apr values after May 10th, 2025")
+            # Group by agent and log
+            agent_counts = {}
+            for item in with_adjusted_after_may_10:
+                agent_id = item['agent_id']
+                if agent_id in agent_counts:
+                    agent_counts[agent_id] += 1
+                else:
+                    agent_counts[agent_id] = 1
+            logger.info(f"Agents with adjusted_apr after May 10th: {agent_counts}")
+            # Log adjusted_apr keys used
+            keys_used = {item.get('adjusted_apr_key') for item in with_adjusted_after_may_10 if 'adjusted_apr_key' in item}
+            if keys_used:
+                logger.info(f"Keys used for adjusted_apr after May 10th: {keys_used}")
+        # Convert to DataFrame
         if not apr_data_list:
             logger.error("No valid APR data extracted")
             global_df = pd.DataFrame([])
             return global_df
+        # Convert list of dictionaries to DataFrame
         global_df = pd.DataFrame(apr_data_list)
         # Log the resulting dataframe
                 avg_diff = (valid_rows['apr'] - valid_rows['adjusted_apr']).mean()
                 max_diff = (valid_rows['apr'] - valid_rows['adjusted_apr']).max()
                 min_diff = (valid_rows['apr'] - valid_rows['adjusted_apr']).min()
+                logger.info(f"APR vs. adjusted APR difference: avg={avg_diff:.2f}, min={min_diff:.2f}, max={max_diff:.2f}")
         # All values are APR type (excluding zero and -100 values)
         logger.info("All values are APR type (excluding zero and -100 values)")
         for idx, row in global_df.iterrows():
             logger.debug(f"Row {idx}: {row.to_dict()}")
+        # Add this at the end, right before returning the global_df
+        logger.info("Analyzing adjusted_apr data availability...")
+        log_adjusted_apr_availability(global_df)
         return global_df
     except requests.exceptions.RequestException as e:
         return global_df
     except Exception as e:
         logger.error(f"Error fetching APR data: {e}")
+        logger.exception("Exception traceback:")
         global_df = pd.DataFrame([])
         return global_df
+def log_adjusted_apr_availability(df):
+    """
+    Analyzes and logs detailed information about adjusted_apr data availability.
+    Args:
+        df: DataFrame containing the APR data with adjusted_apr column
+    """
+    if df.empty or 'adjusted_apr' not in df.columns:
+        logger.warning("No adjusted_apr data available for analysis")
+        return
+    # Get only rows with valid adjusted_apr values
+    has_adjusted = df[df['adjusted_apr'].notna()]
+    if has_adjusted.empty:
+        logger.warning("No valid adjusted_apr values found in the dataset")
+        return
+    # 1. When did adjusted_apr data start?
+    first_adjusted = has_adjusted['timestamp'].min()
+    last_adjusted = has_adjusted['timestamp'].max()
+    logger.info(f"ADJUSTED APR SUMMARY: First data point: {first_adjusted}")
+    logger.info(f"ADJUSTED APR SUMMARY: Last data point: {last_adjusted}")
+    logger.info(f"ADJUSTED APR SUMMARY: Data spans {(last_adjusted - first_adjusted).days} days")
+    # Calculate coverage percentage
+    total_records = len(df)
+    records_with_adjusted = len(has_adjusted)
+    coverage_pct = (records_with_adjusted / total_records) * 100 if total_records > 0 else 0
+    logger.info(f"ADJUSTED APR SUMMARY: {records_with_adjusted} out of {total_records} records have adjusted_apr ({coverage_pct:.2f}%)")
+    # 2. How many agents are providing adjusted_apr?
+    agents_with_adjusted = has_adjusted['agent_id'].unique()
+    logger.info(f"ADJUSTED APR SUMMARY: {len(agents_with_adjusted)} agents providing adjusted_apr")
+    logger.info(f"ADJUSTED APR SUMMARY: Agents providing adjusted_apr: {list(agents_with_adjusted)}")
+    # 3. May 10th cutoff analysis
+    may_10_2025 = datetime(2025, 5, 10)
+    before_cutoff = df[df['timestamp'] < may_10_2025]
+    after_cutoff = df[df['timestamp'] >= may_10_2025]
+    if not before_cutoff.empty and not after_cutoff.empty:
+        before_with_adjusted = before_cutoff['adjusted_apr'].notna().sum()
+        before_pct = (before_with_adjusted / len(before_cutoff)) * 100
+        after_with_adjusted = after_cutoff['adjusted_apr'].notna().sum()
+        after_pct = (after_with_adjusted / len(after_cutoff)) * 100
+        logger.info(f"ADJUSTED APR SUMMARY: Before May 10th: {before_with_adjusted}/{len(before_cutoff)} records with adjusted_apr ({before_pct:.2f}%)")
+        logger.info(f"ADJUSTED APR SUMMARY: After May 10th: {after_with_adjusted}/{len(after_cutoff)} records with adjusted_apr ({after_pct:.2f}%)")
+        # Check which agents had data before and after
+        agents_before = set(before_cutoff[before_cutoff['adjusted_apr'].notna()]['agent_id'].unique())
+        agents_after = set(after_cutoff[after_cutoff['adjusted_apr'].notna()]['agent_id'].unique())
+        missing_after = agents_before - agents_after
+        if missing_after:
+            logger.warning(f"ADJUSTED APR SUMMARY: {len(missing_after)} agents stopped providing adjusted_apr after May 10th: {list(missing_after)}")
+        new_after = agents_after - agents_before
+        if new_after:
+            logger.info(f"ADJUSTED APR SUMMARY: {len(new_after)} agents started providing adjusted_apr after May 10th: {list(new_after)}")
+    # 4. Find date ranges for missing adjusted_apr
+    # Group by agent to analyze per-agent data availability
+    logger.info("=== DETAILED AGENT ANALYSIS ===")
+    for agent_id in df['agent_id'].unique():
+        agent_data = df[df['agent_id'] == agent_id]
+        agent_name = agent_data['agent_name'].iloc[0] if not agent_data.empty else f"Agent {agent_id}"
+        # Get the valid adjusted_apr values for this agent
+        agent_adjusted = agent_data[agent_data['adjusted_apr'].notna()]
+        if agent_adjusted.empty:
+            logger.info(f"Agent {agent_name} (ID: {agent_id}): No adjusted_apr data available")
+            continue
+        # Get the date range for this agent's data
+        agent_start = agent_data['timestamp'].min()
+        agent_end = agent_data['timestamp'].max()
+        # Get the date range for adjusted_apr data
+        adjusted_start = agent_adjusted['timestamp'].min()
+        adjusted_end = agent_adjusted['timestamp'].max()
+        total_agent_records = len(agent_data)
+        agent_with_adjusted = len(agent_adjusted)
+        coverage_pct = (agent_with_adjusted / total_agent_records) * 100 if total_agent_records > 0 else 0
+        logger.info(f"Agent {agent_name} (ID: {agent_id}): {agent_with_adjusted}/{total_agent_records} records with adjusted_apr ({coverage_pct:.2f}%)")
+        logger.info(f"Agent {agent_name} (ID: {agent_id}): APR data from {agent_start} to {agent_end}")
+        logger.info(f"Agent {agent_name} (ID: {agent_id}): Adjusted APR data from {adjusted_start} to {adjusted_end}")
+        # Calculate if this agent had data before/after May 10th
+        if not before_cutoff.empty and not after_cutoff.empty:
+            agent_before = before_cutoff[before_cutoff['agent_id'] == agent_id]
+            agent_after = after_cutoff[after_cutoff['agent_id'] == agent_id]
+            has_before = not agent_before.empty and agent_before['adjusted_apr'].notna().any()
+            has_after = not agent_after.empty and agent_after['adjusted_apr'].notna().any()
+            if has_before and not has_after:
+                last_date = agent_before[agent_before['adjusted_apr'].notna()]['timestamp'].max()
+                logger.warning(f"Agent {agent_name} (ID: {agent_id}): Stopped providing adjusted_apr after May 10th. Last data point: {last_date}")
+            elif not has_before and has_after:
+                first_date = agent_after[agent_after['adjusted_apr'].notna()]['timestamp'].min()
+                logger.info(f"Agent {agent_name} (ID: {agent_id}): Started providing adjusted_apr after May 10th. First data point: {first_date}")
+        # Check for gaps in adjusted_apr (periods of 24+ hours without data)
+        if len(agent_adjusted) < 2:
+            continue
+        # Sort by timestamp
+        sorted_data = agent_adjusted.sort_values('timestamp')
+        # Calculate time differences between consecutive data points
+        time_diffs = sorted_data['timestamp'].diff()
+        # Find gaps larger than 24 hours
+        gaps = sorted_data[time_diffs > pd.Timedelta(hours=24)]
+        if not gaps.empty:
+            logger.info(f"Agent {agent_name} (ID: {agent_id}): Found {len(gaps)} gaps in adjusted_apr data")
+            # Log the gaps
+            for i, row in gaps.iterrows():
+                # Find the previous timestamp before the gap
+                prev_idx = sorted_data.index.get_loc(i) - 1
+                prev_time = sorted_data.iloc[prev_idx]['timestamp'] if prev_idx >= 0 else None
+                if prev_time:
+                    gap_start = prev_time
+                    gap_end = row['timestamp']
+                    gap_duration = gap_end - gap_start
+                    logger.info(f"Agent {agent_name} (ID: {agent_id}): Missing adjusted_apr from {gap_start} to {gap_end} ({gap_duration.days} days, {gap_duration.seconds//3600} hours)")
 def generate_apr_visualizations():
     """Generate APR visualizations with real data only (no dummy data)"""
     global global_df
     avg_apr_data_with_ma['moving_avg'] = None  # 3-day window for APR
     avg_apr_data_with_ma['adjusted_moving_avg'] = None  # 3-day window for adjusted APR
+    # Keep track of the last valid adjusted_moving_avg value to handle gaps
+    last_valid_adjusted_moving_avg = None
     # Calculate the moving averages for each timestamp
     for i, row in avg_apr_data_with_ma.iterrows():
         current_time = row['timestamp']
             logger.debug(f"APR time window {window_start} to {current_time}: {len(window_data)} points, avg={window_data['apr'].mean()}")
             # Calculate adjusted APR moving average if data exists
+            has_adjusted_apr = 'adjusted_apr' in window_data.columns and window_data['adjusted_apr'].notna().any()
+            if has_adjusted_apr:
+                adjusted_avg = window_data['adjusted_apr'].dropna().mean()
+                avg_apr_data_with_ma.at[i, 'adjusted_moving_avg'] = adjusted_avg
+                last_valid_adjusted_moving_avg = adjusted_avg
+                logger.debug(f"Adjusted APR time window {window_start} to {current_time}: {len(window_data)} points, avg={adjusted_avg}")
+            else:
+                # If we don't have adjusted_apr data in this window but had some previously,
+                # use the last valid value to maintain continuity in the graph
+                if last_valid_adjusted_moving_avg is not None:
+                    avg_apr_data_with_ma.at[i, 'adjusted_moving_avg'] = last_valid_adjusted_moving_avg
+                    logger.debug(f"No adjusted APR data in window, using last valid value: {last_valid_adjusted_moving_avg}")
         else:
             # If no data points in the window, use the current value
             avg_apr_data_with_ma.at[i, 'moving_avg'] = row['apr']
         # Add adjusted APR moving average line if it exists
         if 'adjusted_moving_avg' in avg_apr_data_with_ma.columns and avg_apr_data_with_ma['adjusted_moving_avg'].notna().any():
+            # Create a copy of the dataframe with forward-filled adjusted_moving_avg values
+            # to ensure the line continues even when we have missing data
+            filled_avg_apr_data = avg_apr_data_with_ma.copy()
+            filled_avg_apr_data['adjusted_moving_avg'] = filled_avg_apr_data['adjusted_moving_avg'].fillna(method='ffill')
+            # Use the filled dataframe for the adjusted APR line
+            x_values_adj = filled_avg_apr_data['timestamp'].tolist()
+            y_values_adj_ma = filled_avg_apr_data['adjusted_moving_avg'].tolist()
             # Create hover template for the adjusted APR moving average line
             hover_data_adj = []
+            for idx, row in filled_avg_apr_data.iterrows():
                 timestamp = row['timestamp']
                 if pd.notna(row['adjusted_moving_avg']):
                     hover_data_adj.append(
             fig.add_trace(
                 go.Scatter(
+                    x=x_values_adj,
                     y=y_values_adj_ma,
                     mode='lines',  # Only lines for moving average
                     line=dict(color='green', width=4),  # Thicker solid line for adjusted APR
                     visible=True  # Visible by default
                 )
             )
+            logger.info(f"Added 3-day moving average Adjusted APR trace with {len(x_values_adj)} points (with forward-filling for missing values)")
+        else:
+            logger.warning("No adjusted APR moving average data available to plot")
         # Removed cumulative APR as requested
         logger.info("Cumulative APR graph line has been removed as requested")
                 # Function to update the graph without parameters (for refresh button)
                 def refresh_graph():
+                    """Refresh APR data from the database and update the visualization"""
+                    try:
+                        # Fetch new APR data
+                        logger.info("Manually refreshing APR data...")
+                        fetch_apr_data_from_db()
+                        # Verify data was fetched successfully
+                        if global_df is None or len(global_df) == 0:
+                            logger.error("Failed to fetch APR data")
+                            return combined_graph.value, "Error: Failed to fetch APR data. Check the logs for details."
+                        # Log info about fetched data with focus on adjusted_apr
+                        may_10_2025 = datetime(2025, 5, 10)
+                        if 'timestamp' in global_df and 'adjusted_apr' in global_df:
+                            after_may_10 = global_df[global_df['timestamp'] >= may_10_2025]
+                            with_adjusted_after_may_10 = after_may_10[after_may_10['adjusted_apr'].notna()]
+                            logger.info(f"Data points after May 10th, 2025: {len(after_may_10)}")
+                            logger.info(f"Data points with adjusted_apr after May 10th, 2025: {len(with_adjusted_after_may_10)}")
+                        # Generate new visualization
+                        logger.info("Generating new APR visualization...")
+                        new_graph = update_apr_graph(apr_toggle.value, adjusted_apr_toggle.value)
+                        return new_graph, "APR data refreshed successfully"
+                    except Exception as e:
+                        logger.error(f"Error refreshing APR data: {e}")
+                        return combined_graph.value, f"Error: {str(e)}"
+                # Add a text area for status messages
+                status_text = gr.Textbox(label="Status", value="Ready", interactive=False)
+                # Set up the button click event for refresh
+                refresh_btn.click(
+                    fn=refresh_graph,
+                    inputs=[],
+                    outputs=[combined_graph, status_text]
+                )
                 # Set up the toggle switch events
                 apr_toggle.change(
 # Launch the dashboard
 if __name__ == "__main__":
     dashboard().launch()
+def generate_adjusted_apr_report():
+    """
+    Generate a detailed report about adjusted_apr data availability and save it to a file.
+    Returns the path to the generated report file.
+    """
+    global global_df
+    if global_df is None or global_df.empty or 'adjusted_apr' not in global_df.columns:
+        logger.warning("No adjusted_apr data available for report generation")
+        return None
+    # Create a report file
+    report_path = "adjusted_apr_report.txt"
+    with open(report_path, "w") as f:
+        f.write("======== ADJUSTED APR DATA AVAILABILITY REPORT ========\n\n")
+        # Summary statistics
+        total_records = len(global_df)
+        records_with_adjusted = global_df['adjusted_apr'].notna().sum()
+        pct_with_adjusted = (records_with_adjusted / total_records) * 100 if total_records > 0 else 0
+        f.write(f"Total APR records: {total_records}\n")
+        f.write(f"Records with adjusted_apr: {records_with_adjusted} ({pct_with_adjusted:.2f}%)\n\n")
+        # First and last data points
+        if records_with_adjusted > 0:
+            has_adjusted = global_df[global_df['adjusted_apr'].notna()]
+            first_date = has_adjusted['timestamp'].min()
+            last_date = has_adjusted['timestamp'].max()
+            f.write(f"First adjusted_apr record: {first_date}\n")
+            f.write(f"Last adjusted_apr record: {last_date}\n")
+            f.write(f"Date range: {(last_date - first_date).days} days\n\n")
+        # Agent statistics
+        f.write("===== AGENT STATISTICS =====\n\n")
+        # Group by agent
+        agent_stats = []
+        for agent_id in global_df['agent_id'].unique():
+            agent_data = global_df[global_df['agent_id'] == agent_id]
+            agent_name = agent_data['agent_name'].iloc[0] if not agent_data.empty else f"Agent {agent_id}"
+            total_agent_records = len(agent_data)
+            agent_with_adjusted = agent_data['adjusted_apr'].notna().sum()
+            coverage_pct = (agent_with_adjusted / total_agent_records) * 100 if total_agent_records > 0 else 0
+            agent_stats.append({
+                'agent_id': agent_id,
+                'agent_name': agent_name,
+                'total_records': total_agent_records,
+                'with_adjusted': agent_with_adjusted,
+                'coverage_pct': coverage_pct
+            })
+        # Sort by coverage percentage (descending)
+        agent_stats.sort(key=lambda x: x['coverage_pct'], reverse=True)
+        # Write agent statistics
+        for agent in agent_stats:
+            f.write(f"Agent: {agent['agent_name']} (ID: {agent['agent_id']})\n")
+            f.write(f"  Records: {agent['total_records']}\n")
+            f.write(f"  With adjusted_apr: {agent['with_adjusted']} ({agent['coverage_pct']:.2f}%)\n")
+            # If agent has adjusted data, show date range
+            agent_data = global_df[global_df['agent_id'] == agent['agent_id']]
+            agent_adjusted = agent_data[agent_data['adjusted_apr'].notna()]
+            if not agent_adjusted.empty:
+                first = agent_adjusted['timestamp'].min()
+                last = agent_adjusted['timestamp'].max()
+                f.write(f"  First adjusted_apr: {first}\n")
+                f.write(f"  Last adjusted_apr: {last}\n")
+            f.write("\n")
+        # Check for May 10th cutoff issue
+        f.write("===== MAY 10TH CUTOFF ANALYSIS =====\n\n")
+        may_10_2025 = datetime(2025, 5, 10)
+        before_cutoff = global_df[global_df['timestamp'] < may_10_2025]
+        after_cutoff = global_df[global_df['timestamp'] >= may_10_2025]
+        # Calculate coverage before and after
+        before_total = len(before_cutoff)
+        before_with_adjusted = before_cutoff['adjusted_apr'].notna().sum()
+        before_pct = (before_with_adjusted / before_total) * 100 if before_total > 0 else 0
+        after_total = len(after_cutoff)
+        after_with_adjusted = after_cutoff['adjusted_apr'].notna().sum()
+        after_pct = (after_with_adjusted / after_total) * 100 if after_total > 0 else 0
+        f.write(f"Before May 10th, 2025:\n")
+        f.write(f"  Records: {before_total}\n")
+        f.write(f"  With adjusted_apr: {before_with_adjusted} ({before_pct:.2f}%)\n\n")
+        f.write(f"After May 10th, 2025:\n")
+        f.write(f"  Records: {after_total}\n")
+        f.write(f"  With adjusted_apr: {after_with_adjusted} ({after_pct:.2f}%)\n\n")
+        # Check for agents that had data before but not after
+        if before_total > 0 and after_total > 0:
+            agents_before = set(before_cutoff[before_cutoff['adjusted_apr'].notna()]['agent_id'].unique())
+            agents_after = set(after_cutoff[after_cutoff['adjusted_apr'].notna()]['agent_id'].unique())
+            missing_after = agents_before - agents_after
+            new_after = agents_after - agents_before
+            if missing_after:
+                f.write(f"Agents with adjusted_apr before May 10th but not after: {list(missing_after)}\n")
+                # For each missing agent, show the last date with adjusted_apr
+                for agent_id in missing_after:
+                    agent_data = before_cutoff[(before_cutoff['agent_id'] == agent_id) &
+                                              (before_cutoff['adjusted_apr'].notna())]
+                    if not agent_data.empty:
+                        last_date = agent_data['timestamp'].max()
+                        agent_name = agent_data['agent_name'].iloc[0]
+                        f.write(f"  {agent_name} (ID: {agent_id}): Last adjusted_apr on {last_date}\n")
+            if new_after:
+                f.write(f"\nAgents with adjusted_apr after May 10th but not before: {list(new_after)}\n")
+    logger.info(f"Adjusted APR report generated: {report_path}")
+    return report_path