Spaces:
Sleeping
Sleeping
gauravlochab
commited on
Commit
·
cf88990
1
Parent(s):
144269c
chore: add preprocessing to the dataframe
Browse files
app.py
CHANGED
|
@@ -838,8 +838,114 @@ def generate_roi_visualizations():
|
|
| 838 |
|
| 839 |
return combined_fig, csv_file
|
| 840 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 841 |
def create_combined_roi_time_series_graph(df):
|
| 842 |
-
"""Create a time series graph showing
|
| 843 |
if len(df) == 0:
|
| 844 |
logger.error("No data to plot combined ROI graph")
|
| 845 |
fig = go.Figure()
|
|
@@ -953,53 +1059,20 @@ def create_combined_roi_time_series_graph(df):
|
|
| 953 |
# Use the filtered data for all subsequent operations
|
| 954 |
df = df_filtered
|
| 955 |
|
| 956 |
-
#
|
| 957 |
-
|
|
|
|
| 958 |
|
| 959 |
-
#
|
| 960 |
-
|
| 961 |
-
|
| 962 |
-
# Log the average ROI data
|
| 963 |
-
logger.info(f"Calculated average ROI data with {len(avg_roi_data)} points")
|
| 964 |
-
for idx, row in avg_roi_data.iterrows():
|
| 965 |
-
logger.info(f" Average point {idx}: timestamp={row['timestamp']}, avg_roi={row['roi']}")
|
| 966 |
-
|
| 967 |
-
# Calculate moving average based on a time window (3 days)
|
| 968 |
-
# Sort data by timestamp
|
| 969 |
-
df_sorted = df.sort_values('timestamp')
|
| 970 |
|
| 971 |
-
#
|
| 972 |
-
|
| 973 |
-
avg_roi_data_with_ma['moving_avg'] = None # Initialize the moving average column
|
| 974 |
|
| 975 |
-
|
| 976 |
-
time_window = pd.Timedelta(days=3)
|
| 977 |
-
logger.info(f"Calculating moving average with time window of {time_window}")
|
| 978 |
-
|
| 979 |
-
# Calculate the moving averages for each timestamp
|
| 980 |
-
for i, row in avg_roi_data_with_ma.iterrows():
|
| 981 |
-
current_time = row['timestamp']
|
| 982 |
-
window_start = current_time - time_window
|
| 983 |
-
|
| 984 |
-
# Get all data points within the 3-day time window
|
| 985 |
-
window_data = df_sorted[
|
| 986 |
-
(df_sorted['timestamp'] >= window_start) &
|
| 987 |
-
(df_sorted['timestamp'] <= current_time)
|
| 988 |
-
]
|
| 989 |
-
|
| 990 |
-
# Calculate the average ROI for the 3-day time window
|
| 991 |
-
if not window_data.empty:
|
| 992 |
-
avg_roi_data_with_ma.at[i, 'moving_avg'] = window_data['roi'].mean()
|
| 993 |
-
logger.debug(f"ROI time window {window_start} to {current_time}: {len(window_data)} points, avg={window_data['roi'].mean()}")
|
| 994 |
-
else:
|
| 995 |
-
# If no data points in the window, use the current value
|
| 996 |
-
avg_roi_data_with_ma.at[i, 'moving_avg'] = row['roi']
|
| 997 |
-
logger.debug(f"No data points in time window for {current_time}, using current value {row['roi']}")
|
| 998 |
-
|
| 999 |
-
logger.info(f"Calculated time-based moving averages with {len(avg_roi_data_with_ma)} points")
|
| 1000 |
|
| 1001 |
# Find the last date where we have valid moving average data
|
| 1002 |
-
last_valid_ma_date =
|
| 1003 |
|
| 1004 |
# If we don't have any valid moving average data, use the max time from the original data
|
| 1005 |
last_valid_date = last_valid_ma_date if last_valid_ma_date is not None else df['timestamp'].max()
|
|
@@ -1007,17 +1080,17 @@ def create_combined_roi_time_series_graph(df):
|
|
| 1007 |
logger.info(f"Last valid moving average date: {last_valid_ma_date}")
|
| 1008 |
logger.info(f"Using last valid date for graph: {last_valid_date}")
|
| 1009 |
|
| 1010 |
-
# Plot individual agent data points with agent names in hover, but limit display for scalability
|
| 1011 |
-
if not
|
| 1012 |
# Group by agent to use different colors for each agent
|
| 1013 |
-
unique_agents =
|
| 1014 |
colors = px.colors.qualitative.Plotly[:len(unique_agents)]
|
| 1015 |
|
| 1016 |
# Create a color map for agents
|
| 1017 |
color_map = {agent: colors[i % len(colors)] for i, agent in enumerate(unique_agents)}
|
| 1018 |
|
| 1019 |
# Calculate the total number of data points per agent to determine which are most active
|
| 1020 |
-
agent_counts =
|
| 1021 |
|
| 1022 |
# Determine how many agents to show individually (limit to top 5 most active)
|
| 1023 |
MAX_VISIBLE_AGENTS = 5
|
|
@@ -1025,9 +1098,9 @@ def create_combined_roi_time_series_graph(df):
|
|
| 1025 |
|
| 1026 |
logger.info(f"Showing {len(top_agents)} agents by default out of {len(unique_agents)} total agents")
|
| 1027 |
|
| 1028 |
-
# Add data points for each agent, but only make top agents visible by default
|
| 1029 |
for agent_name in unique_agents:
|
| 1030 |
-
agent_data =
|
| 1031 |
|
| 1032 |
# Explicitly convert to Python lists
|
| 1033 |
x_values = agent_data['timestamp'].tolist()
|
|
@@ -1048,31 +1121,29 @@ def create_combined_roi_time_series_graph(df):
|
|
| 1048 |
size=10,
|
| 1049 |
line=dict(width=1, color='black')
|
| 1050 |
),
|
| 1051 |
-
name=f'Agent: {agent_name} (ROI)',
|
| 1052 |
-
hovertemplate='Time: %{x}<br>ROI: %{y:.2f}
|
| 1053 |
visible=is_visible # All agents hidden by default
|
| 1054 |
)
|
| 1055 |
)
|
| 1056 |
-
logger.info(f"Added ROI data points for agent {agent_name} with {len(x_values)} points (visible: {is_visible})")
|
| 1057 |
|
| 1058 |
-
# Add ROI moving average as a smooth line
|
| 1059 |
-
x_values_ma =
|
| 1060 |
-
y_values_ma =
|
| 1061 |
|
| 1062 |
# Create hover template for the ROI moving average line
|
| 1063 |
hover_data_roi = []
|
| 1064 |
-
for idx, row in
|
| 1065 |
timestamp = row['timestamp']
|
| 1066 |
-
# Format timestamp to show only
|
| 1067 |
-
formatted_timestamp = timestamp.strftime('%Y-%m-%d
|
| 1068 |
|
| 1069 |
-
# Calculate number of active agents
|
| 1070 |
-
|
| 1071 |
-
active_agents = len(df[(df['timestamp'] >= time_24h_ago) &
|
| 1072 |
-
(df['timestamp'] <= timestamp)]['agent_id'].unique())
|
| 1073 |
|
| 1074 |
hover_data_roi.append(
|
| 1075 |
-
f"
|
| 1076 |
)
|
| 1077 |
|
| 1078 |
fig.add_trace(
|
|
@@ -1080,14 +1151,14 @@ def create_combined_roi_time_series_graph(df):
|
|
| 1080 |
x=x_values_ma,
|
| 1081 |
y=y_values_ma,
|
| 1082 |
mode='lines', # Only lines for moving average
|
| 1083 |
-
line=dict(color='blue', width=
|
| 1084 |
-
name='
|
| 1085 |
hovertext=hover_data_roi,
|
| 1086 |
hoverinfo='text',
|
| 1087 |
visible=True # Visible by default
|
| 1088 |
)
|
| 1089 |
)
|
| 1090 |
-
logger.info(f"Added
|
| 1091 |
|
| 1092 |
# Update layout with average runtime information in the title
|
| 1093 |
fig.update_layout(
|
|
@@ -1148,13 +1219,13 @@ def create_combined_roi_time_series_graph(df):
|
|
| 1148 |
)
|
| 1149 |
)
|
| 1150 |
|
| 1151 |
-
# Update y-axis with
|
| 1152 |
fig.update_yaxes(
|
| 1153 |
showgrid=True,
|
| 1154 |
gridwidth=1,
|
| 1155 |
gridcolor='rgba(0,0,0,0.1)',
|
| 1156 |
-
# Use
|
| 1157 |
-
|
| 1158 |
tickformat=".2f", # Format tick labels with 2 decimal places
|
| 1159 |
tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
|
| 1160 |
title=None # Remove the built-in axis title since we're using annotations
|
|
@@ -1590,79 +1661,47 @@ def create_combined_time_series_graph(df):
|
|
| 1590 |
# Use the filtered data for all subsequent operations
|
| 1591 |
apr_data = apr_data_filtered
|
| 1592 |
|
| 1593 |
-
#
|
| 1594 |
-
|
| 1595 |
-
|
| 1596 |
-
# Sort by timestamp
|
| 1597 |
-
avg_apr_data = avg_apr_data.sort_values('timestamp')
|
| 1598 |
-
|
| 1599 |
-
# Log the average APR data
|
| 1600 |
-
logger.info(f"Calculated average APR data with {len(avg_apr_data)} points")
|
| 1601 |
-
for idx, row in avg_apr_data.iterrows():
|
| 1602 |
-
logger.info(f" Average point {idx}: timestamp={row['timestamp']}, avg_apr={row['apr']}")
|
| 1603 |
-
|
| 1604 |
-
# Calculate moving average based on a time window (2 hours)
|
| 1605 |
-
# Sort data by timestamp
|
| 1606 |
-
apr_data_sorted = apr_data.sort_values('timestamp')
|
| 1607 |
-
|
| 1608 |
-
# Create a new dataframe for the moving average
|
| 1609 |
-
avg_apr_data_with_ma = avg_apr_data.copy()
|
| 1610 |
-
avg_apr_data_with_ma['moving_avg'] = None # Initialize the moving average column
|
| 1611 |
|
| 1612 |
-
#
|
| 1613 |
-
|
| 1614 |
-
logger.info(f"Calculating moving average with time window of {time_window}")
|
| 1615 |
|
| 1616 |
-
#
|
| 1617 |
-
|
| 1618 |
-
avg_apr_data_with_ma['adjusted_moving_avg'] = None # 3-day window for adjusted APR
|
| 1619 |
|
| 1620 |
-
#
|
| 1621 |
-
|
|
|
|
| 1622 |
|
| 1623 |
-
|
| 1624 |
-
|
| 1625 |
-
|
| 1626 |
-
window_start = current_time - time_window
|
| 1627 |
-
|
| 1628 |
-
# Get all data points within the 3-day time window
|
| 1629 |
-
window_data = apr_data_sorted[
|
| 1630 |
-
(apr_data_sorted['timestamp'] >= window_start) &
|
| 1631 |
-
(apr_data_sorted['timestamp'] <= current_time)
|
| 1632 |
-
]
|
| 1633 |
|
| 1634 |
-
|
| 1635 |
-
|
| 1636 |
-
|
| 1637 |
-
|
| 1638 |
-
|
| 1639 |
-
|
| 1640 |
-
|
| 1641 |
-
|
| 1642 |
-
|
| 1643 |
-
|
| 1644 |
-
|
| 1645 |
-
|
| 1646 |
-
|
| 1647 |
-
# If we don't have adjusted_apr data in this window but had some previously,
|
| 1648 |
-
# use the last valid value to maintain continuity in the graph
|
| 1649 |
-
if last_valid_adjusted_moving_avg is not None:
|
| 1650 |
-
avg_apr_data_with_ma.at[i, 'adjusted_moving_avg'] = last_valid_adjusted_moving_avg
|
| 1651 |
-
logger.debug(f"No adjusted APR data in window, using last valid value: {last_valid_adjusted_moving_avg}")
|
| 1652 |
-
else:
|
| 1653 |
-
# If no data points in the window, use the current value
|
| 1654 |
-
avg_apr_data_with_ma.at[i, 'moving_avg'] = row['apr']
|
| 1655 |
-
logger.debug(f"No data points in time window for {current_time}, using current value {row['apr']}")
|
| 1656 |
|
| 1657 |
-
|
| 1658 |
|
| 1659 |
# Find the last date where we have valid moving average data
|
| 1660 |
-
last_valid_ma_date =
|
| 1661 |
|
| 1662 |
# Find the last date where we have valid adjusted moving average data
|
| 1663 |
last_valid_adj_ma_date = None
|
| 1664 |
-
if
|
| 1665 |
-
last_valid_adj_ma_date =
|
| 1666 |
|
| 1667 |
# Determine the last valid date for either moving average
|
| 1668 |
last_valid_date = last_valid_ma_date
|
|
@@ -1748,36 +1787,22 @@ def create_combined_time_series_graph(df):
|
|
| 1748 |
)
|
| 1749 |
logger.info(f"Added Adjusted APR data points for agent {agent_name} with {len(x_values_adj)} points (visible: {is_visible})")
|
| 1750 |
|
| 1751 |
-
# Add APR moving average as a smooth line
|
| 1752 |
-
x_values_ma =
|
| 1753 |
-
y_values_ma =
|
| 1754 |
|
| 1755 |
# Create hover template for the APR moving average line
|
| 1756 |
hover_data_apr = []
|
| 1757 |
-
for idx, row in
|
| 1758 |
timestamp = row['timestamp']
|
| 1759 |
-
# Format timestamp to show only
|
| 1760 |
-
formatted_timestamp = timestamp.strftime('%Y-%m-%d
|
| 1761 |
-
|
| 1762 |
-
# Calculate number of active agents
|
| 1763 |
-
|
| 1764 |
-
time_24h_ago = timestamp - pd.Timedelta(hours=24)
|
| 1765 |
-
april_25_2025 = datetime(2025, 4, 25)
|
| 1766 |
-
|
| 1767 |
-
if timestamp >= april_25_2025 and global_roi_df is not None and not global_roi_df.empty:
|
| 1768 |
-
# After April 25th, 2025: Use ROI data
|
| 1769 |
-
roi_window_data = global_roi_df[(global_roi_df['timestamp'] >= time_24h_ago) &
|
| 1770 |
-
(global_roi_df['timestamp'] <= timestamp)]
|
| 1771 |
-
active_agents = len(roi_window_data['agent_id'].unique())
|
| 1772 |
-
logger.debug(f"Using ROI data for active agent count at {timestamp} (after Apr 25): {active_agents} agents")
|
| 1773 |
-
else:
|
| 1774 |
-
# Before April 25th, 2025 or if ROI data is not available: Use APR data
|
| 1775 |
-
active_agents = len(apr_data[(apr_data['timestamp'] >= time_24h_ago) &
|
| 1776 |
-
(apr_data['timestamp'] <= timestamp)]['agent_id'].unique())
|
| 1777 |
-
logger.debug(f"Using APR data for active agent count at {timestamp} (before Apr 25): {active_agents} agents")
|
| 1778 |
|
| 1779 |
hover_data_apr.append(
|
| 1780 |
-
f"
|
| 1781 |
)
|
| 1782 |
|
| 1783 |
fig.add_trace(
|
|
@@ -1785,72 +1810,47 @@ def create_combined_time_series_graph(df):
|
|
| 1785 |
x=x_values_ma,
|
| 1786 |
y=y_values_ma,
|
| 1787 |
mode='lines', # Only lines for moving average
|
| 1788 |
-
line=dict(color='red', width=
|
| 1789 |
-
name='
|
| 1790 |
hovertext=hover_data_apr,
|
| 1791 |
hoverinfo='text',
|
| 1792 |
visible=True # Visible by default
|
| 1793 |
)
|
| 1794 |
)
|
| 1795 |
-
logger.info(f"Added
|
| 1796 |
|
| 1797 |
-
# Add adjusted APR moving average line if it exists
|
| 1798 |
-
if
|
| 1799 |
-
|
| 1800 |
-
|
| 1801 |
-
filled_avg_apr_data = avg_apr_data_with_ma.copy()
|
| 1802 |
-
filled_avg_apr_data['adjusted_moving_avg'] = filled_avg_apr_data['adjusted_moving_avg'].fillna(method='ffill')
|
| 1803 |
-
|
| 1804 |
-
# Use the filled dataframe for the adjusted APR line
|
| 1805 |
-
x_values_adj = filled_avg_apr_data['timestamp'].tolist()
|
| 1806 |
-
y_values_adj_ma = filled_avg_apr_data['adjusted_moving_avg'].tolist()
|
| 1807 |
|
| 1808 |
# Create hover template for the adjusted APR moving average line
|
| 1809 |
hover_data_adj = []
|
| 1810 |
-
for idx, row in
|
| 1811 |
timestamp = row['timestamp']
|
| 1812 |
-
# Format timestamp to show only
|
| 1813 |
-
formatted_timestamp = timestamp.strftime('%Y-%m-%d
|
| 1814 |
|
| 1815 |
-
# Calculate number of active agents
|
| 1816 |
-
|
| 1817 |
-
time_24h_ago = timestamp - pd.Timedelta(hours=24)
|
| 1818 |
-
april_25_2025 = datetime(2025, 4, 25)
|
| 1819 |
-
|
| 1820 |
-
if timestamp >= april_25_2025 and global_roi_df is not None and not global_roi_df.empty:
|
| 1821 |
-
# After April 25th, 2025: Use ROI data
|
| 1822 |
-
roi_window_data = global_roi_df[(global_roi_df['timestamp'] >= time_24h_ago) &
|
| 1823 |
-
(global_roi_df['timestamp'] <= timestamp)]
|
| 1824 |
-
active_agents = len(roi_window_data['agent_id'].unique())
|
| 1825 |
-
logger.debug(f"Using ROI data for adjusted APR active agent count at {timestamp} (after Apr 25)")
|
| 1826 |
-
else:
|
| 1827 |
-
# Before April 25th, 2025 or if ROI data is not available: Use APR data
|
| 1828 |
-
active_agents = len(apr_data[(apr_data['timestamp'] >= time_24h_ago) &
|
| 1829 |
-
(apr_data['timestamp'] <= timestamp)]['agent_id'].unique())
|
| 1830 |
-
logger.debug(f"Using APR data for adjusted APR active agent count at {timestamp} (before Apr 25)")
|
| 1831 |
|
| 1832 |
-
|
| 1833 |
-
|
| 1834 |
-
|
| 1835 |
-
)
|
| 1836 |
-
else:
|
| 1837 |
-
hover_data_adj.append(
|
| 1838 |
-
f"Time: {formatted_timestamp}<br>Avg ETH Adjusted APR (3d window): N/A<br>Active agents (24h): {active_agents}"
|
| 1839 |
-
)
|
| 1840 |
|
| 1841 |
fig.add_trace(
|
| 1842 |
go.Scatter(
|
| 1843 |
-
x=
|
| 1844 |
y=y_values_adj_ma,
|
| 1845 |
mode='lines', # Only lines for moving average
|
| 1846 |
-
line=dict(color='green', width=
|
| 1847 |
-
name='
|
| 1848 |
hovertext=hover_data_adj,
|
| 1849 |
hoverinfo='text',
|
| 1850 |
visible=True # Visible by default
|
| 1851 |
)
|
| 1852 |
)
|
| 1853 |
-
logger.info(f"Added
|
| 1854 |
else:
|
| 1855 |
logger.warning("No adjusted APR moving average data available to plot")
|
| 1856 |
|
|
@@ -1942,13 +1942,13 @@ def create_combined_time_series_graph(df):
|
|
| 1942 |
)
|
| 1943 |
)
|
| 1944 |
|
| 1945 |
-
# Update y-axis with
|
| 1946 |
fig.update_yaxes(
|
| 1947 |
showgrid=True,
|
| 1948 |
gridwidth=1,
|
| 1949 |
gridcolor='rgba(0,0,0,0.1)',
|
| 1950 |
-
# Use
|
| 1951 |
-
|
| 1952 |
tickformat=".2f", # Format tick labels with 2 decimal places
|
| 1953 |
tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
|
| 1954 |
title=None # Remove the built-in axis title since we're using annotations
|
|
@@ -2519,7 +2519,7 @@ def dashboard():
|
|
| 2519 |
# Update visibility of traces based on toggle values
|
| 2520 |
for i, trace in enumerate(combined_fig.data):
|
| 2521 |
# Check if this is a moving average trace
|
| 2522 |
-
if trace.name == '
|
| 2523 |
trace.visible = show_apr_ma
|
| 2524 |
elif trace.name == 'Average ETH Adjusted APR (3d window)':
|
| 2525 |
trace.visible = show_adjusted_apr_ma
|
|
@@ -2546,7 +2546,7 @@ def dashboard():
|
|
| 2546 |
# Update visibility of traces based on toggle values
|
| 2547 |
for i, trace in enumerate(combined_fig.data):
|
| 2548 |
# Check if this is a moving average trace
|
| 2549 |
-
if trace.name == '
|
| 2550 |
trace.visible = show_roi_ma
|
| 2551 |
|
| 2552 |
return combined_fig
|
|
|
|
| 838 |
|
| 839 |
return combined_fig, csv_file
|
| 840 |
|
| 841 |
+
def aggregate_daily_data(df, metric_column):
|
| 842 |
+
"""
|
| 843 |
+
Aggregate data by date and agent, taking the mean of values within each day.
|
| 844 |
+
|
| 845 |
+
Args:
|
| 846 |
+
df: DataFrame with timestamp, agent_id, and metric data
|
| 847 |
+
metric_column: Name of the metric column ('apr' or 'roi')
|
| 848 |
+
|
| 849 |
+
Returns:
|
| 850 |
+
DataFrame with daily aggregated data per agent
|
| 851 |
+
"""
|
| 852 |
+
if df.empty:
|
| 853 |
+
return df
|
| 854 |
+
|
| 855 |
+
# Convert timestamp to date only (ignore time)
|
| 856 |
+
df = df.copy()
|
| 857 |
+
df['date'] = df['timestamp'].dt.date
|
| 858 |
+
|
| 859 |
+
# Group by date and agent, calculate mean for each day
|
| 860 |
+
daily_agent_data = df.groupby(['date', 'agent_id']).agg({
|
| 861 |
+
metric_column: 'mean',
|
| 862 |
+
'agent_name': 'first',
|
| 863 |
+
'is_dummy': 'first',
|
| 864 |
+
'metric_type': 'first'
|
| 865 |
+
}).reset_index()
|
| 866 |
+
|
| 867 |
+
# Convert date back to datetime for plotting
|
| 868 |
+
daily_agent_data['timestamp'] = pd.to_datetime(daily_agent_data['date'])
|
| 869 |
+
|
| 870 |
+
logger.info(f"Aggregated {len(df)} data points into {len(daily_agent_data)} daily values for {metric_column}")
|
| 871 |
+
|
| 872 |
+
return daily_agent_data
|
| 873 |
+
|
| 874 |
+
def calculate_daily_medians(daily_agent_data, metric_column):
|
| 875 |
+
"""
|
| 876 |
+
Calculate daily medians across all agents for each date.
|
| 877 |
+
|
| 878 |
+
Args:
|
| 879 |
+
daily_agent_data: DataFrame with daily aggregated data per agent
|
| 880 |
+
metric_column: Name of the metric column ('apr' or 'roi')
|
| 881 |
+
|
| 882 |
+
Returns:
|
| 883 |
+
DataFrame with daily median values
|
| 884 |
+
"""
|
| 885 |
+
if daily_agent_data.empty:
|
| 886 |
+
return daily_agent_data
|
| 887 |
+
|
| 888 |
+
# For each date, calculate median across all agents (excluding missing data)
|
| 889 |
+
daily_medians = daily_agent_data.groupby('date').agg({
|
| 890 |
+
metric_column: 'median'
|
| 891 |
+
}).reset_index()
|
| 892 |
+
|
| 893 |
+
# Convert date back to datetime for plotting
|
| 894 |
+
daily_medians['timestamp'] = pd.to_datetime(daily_medians['date'])
|
| 895 |
+
|
| 896 |
+
logger.info(f"Calculated {len(daily_medians)} daily median values for {metric_column}")
|
| 897 |
+
|
| 898 |
+
return daily_medians
|
| 899 |
+
|
| 900 |
+
def calculate_moving_average_medians(daily_medians, metric_column, window_days=7):
|
| 901 |
+
"""
|
| 902 |
+
Calculate moving average of daily medians using a specified time window.
|
| 903 |
+
|
| 904 |
+
Args:
|
| 905 |
+
daily_medians: DataFrame with daily median values
|
| 906 |
+
metric_column: Name of the metric column ('apr' or 'roi')
|
| 907 |
+
window_days: Number of days for the moving average window
|
| 908 |
+
|
| 909 |
+
Returns:
|
| 910 |
+
DataFrame with moving average values added
|
| 911 |
+
"""
|
| 912 |
+
if daily_medians.empty:
|
| 913 |
+
return daily_medians
|
| 914 |
+
|
| 915 |
+
# Sort by timestamp
|
| 916 |
+
daily_medians = daily_medians.sort_values('timestamp').copy()
|
| 917 |
+
|
| 918 |
+
# Initialize moving average column
|
| 919 |
+
daily_medians['moving_avg'] = None
|
| 920 |
+
|
| 921 |
+
# Define the time window
|
| 922 |
+
time_window = pd.Timedelta(days=window_days)
|
| 923 |
+
logger.info(f"Calculating {window_days}-day moving average of daily medians for {metric_column}")
|
| 924 |
+
|
| 925 |
+
# Calculate moving averages for each timestamp
|
| 926 |
+
for i, row in daily_medians.iterrows():
|
| 927 |
+
current_time = row['timestamp']
|
| 928 |
+
window_start = current_time - time_window
|
| 929 |
+
|
| 930 |
+
# Get all median values within the time window
|
| 931 |
+
window_data = daily_medians[
|
| 932 |
+
(daily_medians['timestamp'] >= window_start) &
|
| 933 |
+
(daily_medians['timestamp'] <= current_time)
|
| 934 |
+
]
|
| 935 |
+
|
| 936 |
+
# Calculate the average of medians for the time window
|
| 937 |
+
if not window_data.empty:
|
| 938 |
+
daily_medians.at[i, 'moving_avg'] = window_data[metric_column].mean()
|
| 939 |
+
else:
|
| 940 |
+
# If no data points in the window, use the current value
|
| 941 |
+
daily_medians.at[i, 'moving_avg'] = row[metric_column]
|
| 942 |
+
|
| 943 |
+
logger.info(f"Calculated {window_days}-day moving averages with {len(daily_medians)} points")
|
| 944 |
+
|
| 945 |
+
return daily_medians
|
| 946 |
+
|
| 947 |
def create_combined_roi_time_series_graph(df):
|
| 948 |
+
"""Create a time series graph showing daily median ROI values with 7-day moving average"""
|
| 949 |
if len(df) == 0:
|
| 950 |
logger.error("No data to plot combined ROI graph")
|
| 951 |
fig = go.Figure()
|
|
|
|
| 1059 |
# Use the filtered data for all subsequent operations
|
| 1060 |
df = df_filtered
|
| 1061 |
|
| 1062 |
+
# NEW APPROACH: Daily aggregation and median calculation
|
| 1063 |
+
# Step 1: Aggregate data daily per agent (mean of values within each day)
|
| 1064 |
+
daily_agent_data = aggregate_daily_data(df, 'roi')
|
| 1065 |
|
| 1066 |
+
# Step 2: Calculate daily medians across all agents
|
| 1067 |
+
daily_medians = calculate_daily_medians(daily_agent_data, 'roi')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1068 |
|
| 1069 |
+
# Step 3: Calculate 7-day moving average of daily medians
|
| 1070 |
+
daily_medians_with_ma = calculate_moving_average_medians(daily_medians, 'roi', window_days=7)
|
|
|
|
| 1071 |
|
| 1072 |
+
logger.info(f"NEW APPROACH: Processed {len(df)} raw points → {len(daily_agent_data)} daily agent values → {len(daily_medians)} daily medians")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1073 |
|
| 1074 |
# Find the last date where we have valid moving average data
|
| 1075 |
+
last_valid_ma_date = daily_medians_with_ma[daily_medians_with_ma['moving_avg'].notna()]['timestamp'].max() if not daily_medians_with_ma['moving_avg'].dropna().empty else None
|
| 1076 |
|
| 1077 |
# If we don't have any valid moving average data, use the max time from the original data
|
| 1078 |
last_valid_date = last_valid_ma_date if last_valid_ma_date is not None else df['timestamp'].max()
|
|
|
|
| 1080 |
logger.info(f"Last valid moving average date: {last_valid_ma_date}")
|
| 1081 |
logger.info(f"Using last valid date for graph: {last_valid_date}")
|
| 1082 |
|
| 1083 |
+
# Plot individual agent daily data points with agent names in hover, but limit display for scalability
|
| 1084 |
+
if not daily_agent_data.empty:
|
| 1085 |
# Group by agent to use different colors for each agent
|
| 1086 |
+
unique_agents = daily_agent_data['agent_name'].unique()
|
| 1087 |
colors = px.colors.qualitative.Plotly[:len(unique_agents)]
|
| 1088 |
|
| 1089 |
# Create a color map for agents
|
| 1090 |
color_map = {agent: colors[i % len(colors)] for i, agent in enumerate(unique_agents)}
|
| 1091 |
|
| 1092 |
# Calculate the total number of data points per agent to determine which are most active
|
| 1093 |
+
agent_counts = daily_agent_data['agent_name'].value_counts()
|
| 1094 |
|
| 1095 |
# Determine how many agents to show individually (limit to top 5 most active)
|
| 1096 |
MAX_VISIBLE_AGENTS = 5
|
|
|
|
| 1098 |
|
| 1099 |
logger.info(f"Showing {len(top_agents)} agents by default out of {len(unique_agents)} total agents")
|
| 1100 |
|
| 1101 |
+
# Add daily aggregated data points for each agent, but only make top agents visible by default
|
| 1102 |
for agent_name in unique_agents:
|
| 1103 |
+
agent_data = daily_agent_data[daily_agent_data['agent_name'] == agent_name]
|
| 1104 |
|
| 1105 |
# Explicitly convert to Python lists
|
| 1106 |
x_values = agent_data['timestamp'].tolist()
|
|
|
|
| 1121 |
size=10,
|
| 1122 |
line=dict(width=1, color='black')
|
| 1123 |
),
|
| 1124 |
+
name=f'Agent: {agent_name} (Daily ROI)',
|
| 1125 |
+
hovertemplate='Time: %{x}<br>Daily ROI: %{y:.2f}%<br>Agent: ' + agent_name + '<extra></extra>',
|
| 1126 |
visible=is_visible # All agents hidden by default
|
| 1127 |
)
|
| 1128 |
)
|
| 1129 |
+
logger.info(f"Added daily ROI data points for agent {agent_name} with {len(x_values)} points (visible: {is_visible})")
|
| 1130 |
|
| 1131 |
+
# Add ROI 7-day moving average of daily medians as a smooth line
|
| 1132 |
+
x_values_ma = daily_medians_with_ma['timestamp'].tolist()
|
| 1133 |
+
y_values_ma = daily_medians_with_ma['moving_avg'].tolist()
|
| 1134 |
|
| 1135 |
# Create hover template for the ROI moving average line
|
| 1136 |
hover_data_roi = []
|
| 1137 |
+
for idx, row in daily_medians_with_ma.iterrows():
|
| 1138 |
timestamp = row['timestamp']
|
| 1139 |
+
# Format timestamp to show only date for daily data
|
| 1140 |
+
formatted_timestamp = timestamp.strftime('%Y-%m-%d')
|
| 1141 |
|
| 1142 |
+
# Calculate number of active agents on this date
|
| 1143 |
+
active_agents = len(daily_agent_data[daily_agent_data['timestamp'] == timestamp]['agent_id'].unique())
|
|
|
|
|
|
|
| 1144 |
|
| 1145 |
hover_data_roi.append(
|
| 1146 |
+
f"Date: {formatted_timestamp}<br>Median ROI (7d window): {row['moving_avg']:.2f}%<br>Active agents: {active_agents}"
|
| 1147 |
)
|
| 1148 |
|
| 1149 |
fig.add_trace(
|
|
|
|
| 1151 |
x=x_values_ma,
|
| 1152 |
y=y_values_ma,
|
| 1153 |
mode='lines', # Only lines for moving average
|
| 1154 |
+
line=dict(color='blue', width=3), # Thicker line for main trend
|
| 1155 |
+
name='Median ROI (7d window)',
|
| 1156 |
hovertext=hover_data_roi,
|
| 1157 |
hoverinfo='text',
|
| 1158 |
visible=True # Visible by default
|
| 1159 |
)
|
| 1160 |
)
|
| 1161 |
+
logger.info(f"Added 7-day moving average of daily median ROI trace with {len(x_values_ma)} points")
|
| 1162 |
|
| 1163 |
# Update layout with average runtime information in the title
|
| 1164 |
fig.update_layout(
|
|
|
|
| 1219 |
)
|
| 1220 |
)
|
| 1221 |
|
| 1222 |
+
# Update y-axis with autoscaling for ROI
|
| 1223 |
fig.update_yaxes(
|
| 1224 |
showgrid=True,
|
| 1225 |
gridwidth=1,
|
| 1226 |
gridcolor='rgba(0,0,0,0.1)',
|
| 1227 |
+
# Use autoscaling instead of fixed range
|
| 1228 |
+
autorange=True, # Enable autoscaling to fit the data
|
| 1229 |
tickformat=".2f", # Format tick labels with 2 decimal places
|
| 1230 |
tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
|
| 1231 |
title=None # Remove the built-in axis title since we're using annotations
|
|
|
|
| 1661 |
# Use the filtered data for all subsequent operations
|
| 1662 |
apr_data = apr_data_filtered
|
| 1663 |
|
| 1664 |
+
# NEW APPROACH: Daily aggregation and median calculation for APR
|
| 1665 |
+
# Step 1: Aggregate data daily per agent (mean of values within each day)
|
| 1666 |
+
daily_agent_data = aggregate_daily_data(apr_data, 'apr')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1667 |
|
| 1668 |
+
# Step 2: Calculate daily medians across all agents
|
| 1669 |
+
daily_medians = calculate_daily_medians(daily_agent_data, 'apr')
|
|
|
|
| 1670 |
|
| 1671 |
+
# Step 3: Calculate 7-day moving average of daily medians
|
| 1672 |
+
daily_medians_with_ma = calculate_moving_average_medians(daily_medians, 'apr', window_days=7)
|
|
|
|
| 1673 |
|
| 1674 |
+
# Also handle adjusted APR if it exists
|
| 1675 |
+
daily_medians_adjusted = None
|
| 1676 |
+
daily_medians_adjusted_with_ma = None
|
| 1677 |
|
| 1678 |
+
if 'adjusted_apr' in apr_data.columns and apr_data['adjusted_apr'].notna().any():
|
| 1679 |
+
# Create a separate dataset for adjusted APR
|
| 1680 |
+
apr_data_with_adjusted = apr_data[apr_data['adjusted_apr'].notna()].copy()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1681 |
|
| 1682 |
+
if not apr_data_with_adjusted.empty:
|
| 1683 |
+
# Step 1: Aggregate adjusted APR data daily per agent
|
| 1684 |
+
daily_agent_data_adjusted = aggregate_daily_data(apr_data_with_adjusted, 'adjusted_apr')
|
| 1685 |
+
|
| 1686 |
+
# Step 2: Calculate daily medians for adjusted APR
|
| 1687 |
+
daily_medians_adjusted = calculate_daily_medians(daily_agent_data_adjusted, 'adjusted_apr')
|
| 1688 |
+
|
| 1689 |
+
# Step 3: Calculate 7-day moving average of daily medians for adjusted APR
|
| 1690 |
+
daily_medians_adjusted_with_ma = calculate_moving_average_medians(daily_medians_adjusted, 'adjusted_apr', window_days=7)
|
| 1691 |
+
|
| 1692 |
+
logger.info(f"NEW APPROACH APR: Processed {len(apr_data)} raw points → {len(daily_agent_data)} daily agent values → {len(daily_medians)} daily medians")
|
| 1693 |
+
if daily_medians_adjusted is not None:
|
| 1694 |
+
logger.info(f"NEW APPROACH Adjusted APR: Processed adjusted APR data → {len(daily_medians_adjusted)} daily medians")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1695 |
|
| 1696 |
+
# This old moving average calculation is no longer needed with the new daily median approach
|
| 1697 |
|
| 1698 |
# Find the last date where we have valid moving average data
|
| 1699 |
+
last_valid_ma_date = daily_medians_with_ma[daily_medians_with_ma['moving_avg'].notna()]['timestamp'].max() if not daily_medians_with_ma['moving_avg'].dropna().empty else None
|
| 1700 |
|
| 1701 |
# Find the last date where we have valid adjusted moving average data
|
| 1702 |
last_valid_adj_ma_date = None
|
| 1703 |
+
if daily_medians_adjusted_with_ma is not None and not daily_medians_adjusted_with_ma.empty:
|
| 1704 |
+
last_valid_adj_ma_date = daily_medians_adjusted_with_ma[daily_medians_adjusted_with_ma['moving_avg'].notna()]['timestamp'].max() if not daily_medians_adjusted_with_ma['moving_avg'].dropna().empty else None
|
| 1705 |
|
| 1706 |
# Determine the last valid date for either moving average
|
| 1707 |
last_valid_date = last_valid_ma_date
|
|
|
|
| 1787 |
)
|
| 1788 |
logger.info(f"Added Adjusted APR data points for agent {agent_name} with {len(x_values_adj)} points (visible: {is_visible})")
|
| 1789 |
|
| 1790 |
+
# Add APR 7-day moving average of daily medians as a smooth line
|
| 1791 |
+
x_values_ma = daily_medians_with_ma['timestamp'].tolist()
|
| 1792 |
+
y_values_ma = daily_medians_with_ma['moving_avg'].tolist()
|
| 1793 |
|
| 1794 |
# Create hover template for the APR moving average line
|
| 1795 |
hover_data_apr = []
|
| 1796 |
+
for idx, row in daily_medians_with_ma.iterrows():
|
| 1797 |
timestamp = row['timestamp']
|
| 1798 |
+
# Format timestamp to show only date for daily data
|
| 1799 |
+
formatted_timestamp = timestamp.strftime('%Y-%m-%d')
|
| 1800 |
+
|
| 1801 |
+
# Calculate number of active agents on this date
|
| 1802 |
+
active_agents = len(daily_agent_data[daily_agent_data['timestamp'] == timestamp]['agent_id'].unique())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1803 |
|
| 1804 |
hover_data_apr.append(
|
| 1805 |
+
f"Date: {formatted_timestamp}<br>Median APR (7d window): {row['moving_avg']:.2f}%<br>Active agents: {active_agents}"
|
| 1806 |
)
|
| 1807 |
|
| 1808 |
fig.add_trace(
|
|
|
|
| 1810 |
x=x_values_ma,
|
| 1811 |
y=y_values_ma,
|
| 1812 |
mode='lines', # Only lines for moving average
|
| 1813 |
+
line=dict(color='red', width=3, shape='spline', smoothing=1.3), # Smooth curved line
|
| 1814 |
+
name='Median APR (7d window)',
|
| 1815 |
hovertext=hover_data_apr,
|
| 1816 |
hoverinfo='text',
|
| 1817 |
visible=True # Visible by default
|
| 1818 |
)
|
| 1819 |
)
|
| 1820 |
+
logger.info(f"Added 7-day moving average of daily median APR trace with {len(x_values_ma)} points")
|
| 1821 |
|
| 1822 |
+
# Add adjusted APR 7-day moving average line if it exists
|
| 1823 |
+
if daily_medians_adjusted_with_ma is not None and not daily_medians_adjusted_with_ma.empty:
|
| 1824 |
+
x_values_adj_ma = daily_medians_adjusted_with_ma['timestamp'].tolist()
|
| 1825 |
+
y_values_adj_ma = daily_medians_adjusted_with_ma['moving_avg'].tolist()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1826 |
|
| 1827 |
# Create hover template for the adjusted APR moving average line
|
| 1828 |
hover_data_adj = []
|
| 1829 |
+
for idx, row in daily_medians_adjusted_with_ma.iterrows():
|
| 1830 |
timestamp = row['timestamp']
|
| 1831 |
+
# Format timestamp to show only date for daily data
|
| 1832 |
+
formatted_timestamp = timestamp.strftime('%Y-%m-%d')
|
| 1833 |
|
| 1834 |
+
# Calculate number of active agents on this date
|
| 1835 |
+
active_agents = len(daily_agent_data_adjusted[daily_agent_data_adjusted['timestamp'] == timestamp]['agent_id'].unique()) if 'daily_agent_data_adjusted' in locals() else 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1836 |
|
| 1837 |
+
hover_data_adj.append(
|
| 1838 |
+
f"Date: {formatted_timestamp}<br>Median Adjusted APR (7d window): {row['moving_avg']:.2f}%<br>Active agents: {active_agents}"
|
| 1839 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1840 |
|
| 1841 |
fig.add_trace(
|
| 1842 |
go.Scatter(
|
| 1843 |
+
x=x_values_adj_ma,
|
| 1844 |
y=y_values_adj_ma,
|
| 1845 |
mode='lines', # Only lines for moving average
|
| 1846 |
+
line=dict(color='green', width=3, shape='spline', smoothing=1.3), # Smooth curved line
|
| 1847 |
+
name='Median Adjusted APR (7d window)',
|
| 1848 |
hovertext=hover_data_adj,
|
| 1849 |
hoverinfo='text',
|
| 1850 |
visible=True # Visible by default
|
| 1851 |
)
|
| 1852 |
)
|
| 1853 |
+
logger.info(f"Added 7-day moving average of daily median Adjusted APR trace with {len(x_values_adj_ma)} points")
|
| 1854 |
else:
|
| 1855 |
logger.warning("No adjusted APR moving average data available to plot")
|
| 1856 |
|
|
|
|
| 1942 |
)
|
| 1943 |
)
|
| 1944 |
|
| 1945 |
+
# Update y-axis with autoscaling
|
| 1946 |
fig.update_yaxes(
|
| 1947 |
showgrid=True,
|
| 1948 |
gridwidth=1,
|
| 1949 |
gridcolor='rgba(0,0,0,0.1)',
|
| 1950 |
+
# Use autoscaling instead of fixed range
|
| 1951 |
+
autorange=True, # Enable autoscaling to fit the data
|
| 1952 |
tickformat=".2f", # Format tick labels with 2 decimal places
|
| 1953 |
tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
|
| 1954 |
title=None # Remove the built-in axis title since we're using annotations
|
|
|
|
| 2519 |
# Update visibility of traces based on toggle values
|
| 2520 |
for i, trace in enumerate(combined_fig.data):
|
| 2521 |
# Check if this is a moving average trace
|
| 2522 |
+
if trace.name == 'Median APR (7d window)':
|
| 2523 |
trace.visible = show_apr_ma
|
| 2524 |
elif trace.name == 'Average ETH Adjusted APR (3d window)':
|
| 2525 |
trace.visible = show_adjusted_apr_ma
|
|
|
|
| 2546 |
# Update visibility of traces based on toggle values
|
| 2547 |
for i, trace in enumerate(combined_fig.data):
|
| 2548 |
# Check if this is a moving average trace
|
| 2549 |
+
if trace.name == 'Median ROI (7d window)':
|
| 2550 |
trace.visible = show_roi_ma
|
| 2551 |
|
| 2552 |
return combined_fig
|