Spaces:
Sleeping
Sleeping
gauravlochab
commited on
Commit
·
2425de8
1
Parent(s):
db4f69b
feat: implement data fetching for APR and ROI metrics
Browse files- app.py +102 -1
- fetch_and_preprocess_data.py +274 -0
app.py
CHANGED
|
@@ -18,6 +18,7 @@ from typing import List, Dict, Any, Optional
|
|
| 18 |
# Comment out the import for now and replace with dummy functions
|
| 19 |
# from app_trans_new import create_transcation_visualizations,create_active_agents_visualizations
|
| 20 |
# APR visualization functions integrated directly
|
|
|
|
| 21 |
|
| 22 |
# Set up logging with appropriate verbosity
|
| 23 |
logging.basicConfig(
|
|
@@ -42,6 +43,8 @@ logger.info(f"Running from directory: {os.getcwd()}")
|
|
| 42 |
# Global variables to store the data for reuse
|
| 43 |
global_df = None
|
| 44 |
global_roi_df = None
|
|
|
|
|
|
|
| 45 |
|
| 46 |
# Configuration
|
| 47 |
API_BASE_URL = "https://afmdb.autonolas.tech"
|
|
@@ -465,8 +468,106 @@ def fetch_apr_data_from_db():
|
|
| 465 |
# Convert list of dictionaries to DataFrame for ROI
|
| 466 |
global_roi_df = pd.DataFrame(roi_data_list)
|
| 467 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 468 |
# Log the resulting dataframe
|
| 469 |
-
logger.info(f"Created DataFrame with {len(global_df)} rows")
|
| 470 |
logger.info(f"DataFrame columns: {global_df.columns.tolist()}")
|
| 471 |
logger.info(f"APR statistics: min={global_df['apr'].min()}, max={global_df['apr'].max()}, mean={global_df['apr'].mean()}")
|
| 472 |
|
|
|
|
| 18 |
# Comment out the import for now and replace with dummy functions
|
| 19 |
# from app_trans_new import create_transcation_visualizations,create_active_agents_visualizations
|
| 20 |
# APR visualization functions integrated directly
|
| 21 |
+
from fetch_and_preprocess_data import generate_continuous_random_data
|
| 22 |
|
| 23 |
# Set up logging with appropriate verbosity
|
| 24 |
logging.basicConfig(
|
|
|
|
| 43 |
# Global variables to store the data for reuse
|
| 44 |
global_df = None
|
| 45 |
global_roi_df = None
|
| 46 |
+
global_dummy_apr_df = None # Store dummy APR data separately
|
| 47 |
+
global_dummy_roi_df = None # Store dummy ROI data separately
|
| 48 |
|
| 49 |
# Configuration
|
| 50 |
API_BASE_URL = "https://afmdb.autonolas.tech"
|
|
|
|
| 468 |
# Convert list of dictionaries to DataFrame for ROI
|
| 469 |
global_roi_df = pd.DataFrame(roi_data_list)
|
| 470 |
|
| 471 |
+
# Handle dummy data generation
|
| 472 |
+
global global_dummy_apr_df
|
| 473 |
+
global global_dummy_roi_df
|
| 474 |
+
|
| 475 |
+
logger.info("Handling dummy data...")
|
| 476 |
+
|
| 477 |
+
# Generate dummy APR data only if needed
|
| 478 |
+
if not global_df.empty:
|
| 479 |
+
# Check if we already have dummy data
|
| 480 |
+
if global_dummy_apr_df is None:
|
| 481 |
+
# First time - generate all dummy data
|
| 482 |
+
logger.info("Generating initial dummy APR data...")
|
| 483 |
+
global_dummy_apr_df = generate_continuous_random_data(global_df)
|
| 484 |
+
|
| 485 |
+
# Only keep APR data
|
| 486 |
+
if not global_dummy_apr_df.empty:
|
| 487 |
+
global_dummy_apr_df = global_dummy_apr_df[global_dummy_apr_df['metric_type'] == 'APR']
|
| 488 |
+
logger.info(f"Generated {len(global_dummy_apr_df)} initial dummy APR data points")
|
| 489 |
+
else:
|
| 490 |
+
# We already have dummy data - check if we need to generate more
|
| 491 |
+
# Find the latest timestamp in the real data
|
| 492 |
+
latest_real_timestamp = global_df['timestamp'].max()
|
| 493 |
+
|
| 494 |
+
# Find the latest timestamp in the dummy data
|
| 495 |
+
latest_dummy_timestamp = global_dummy_apr_df['timestamp'].max() if not global_dummy_apr_df.empty else None
|
| 496 |
+
|
| 497 |
+
# If the real data has newer timestamps, generate more dummy data
|
| 498 |
+
if latest_dummy_timestamp is None or latest_real_timestamp > latest_dummy_timestamp:
|
| 499 |
+
logger.info("Generating additional dummy APR data for new timestamps...")
|
| 500 |
+
|
| 501 |
+
# Create a temporary dataframe with only the latest real data
|
| 502 |
+
temp_df = global_df[global_df['timestamp'] > latest_dummy_timestamp] if latest_dummy_timestamp else global_df
|
| 503 |
+
|
| 504 |
+
# Generate dummy data for the new timestamps
|
| 505 |
+
new_dummy_data = generate_continuous_random_data(temp_df)
|
| 506 |
+
|
| 507 |
+
# Only keep APR data
|
| 508 |
+
if not new_dummy_data.empty:
|
| 509 |
+
new_dummy_data = new_dummy_data[new_dummy_data['metric_type'] == 'APR']
|
| 510 |
+
logger.info(f"Generated {len(new_dummy_data)} additional dummy APR data points")
|
| 511 |
+
|
| 512 |
+
# Append the new dummy data to the existing dummy data
|
| 513 |
+
global_dummy_apr_df = pd.concat([global_dummy_apr_df, new_dummy_data], ignore_index=True)
|
| 514 |
+
else:
|
| 515 |
+
logger.info("No new timestamps in real data, using existing dummy APR data")
|
| 516 |
+
|
| 517 |
+
# Combine real and dummy APR data
|
| 518 |
+
if not global_dummy_apr_df.empty:
|
| 519 |
+
apr_dummy_count = len(global_dummy_apr_df)
|
| 520 |
+
global_df = pd.concat([global_df, global_dummy_apr_df], ignore_index=True)
|
| 521 |
+
logger.info(f"Added {apr_dummy_count} dummy APR data points to the dataset")
|
| 522 |
+
|
| 523 |
+
# Generate dummy ROI data only if needed
|
| 524 |
+
if not global_roi_df.empty:
|
| 525 |
+
# Check if we already have dummy data
|
| 526 |
+
if global_dummy_roi_df is None:
|
| 527 |
+
# First time - generate all dummy data
|
| 528 |
+
logger.info("Generating initial dummy ROI data...")
|
| 529 |
+
global_dummy_roi_df = generate_continuous_random_data(global_roi_df)
|
| 530 |
+
|
| 531 |
+
# Only keep ROI data
|
| 532 |
+
if not global_dummy_roi_df.empty:
|
| 533 |
+
global_dummy_roi_df = global_dummy_roi_df[global_dummy_roi_df['metric_type'] == 'ROI']
|
| 534 |
+
logger.info(f"Generated {len(global_dummy_roi_df)} initial dummy ROI data points")
|
| 535 |
+
else:
|
| 536 |
+
# We already have dummy data - check if we need to generate more
|
| 537 |
+
# Find the latest timestamp in the real data
|
| 538 |
+
latest_real_timestamp = global_roi_df['timestamp'].max()
|
| 539 |
+
|
| 540 |
+
# Find the latest timestamp in the dummy data
|
| 541 |
+
latest_dummy_timestamp = global_dummy_roi_df['timestamp'].max() if not global_dummy_roi_df.empty else None
|
| 542 |
+
|
| 543 |
+
# If the real data has newer timestamps, generate more dummy data
|
| 544 |
+
if latest_dummy_timestamp is None or latest_real_timestamp > latest_dummy_timestamp:
|
| 545 |
+
logger.info("Generating additional dummy ROI data for new timestamps...")
|
| 546 |
+
|
| 547 |
+
# Create a temporary dataframe with only the latest real data
|
| 548 |
+
temp_df = global_roi_df[global_roi_df['timestamp'] > latest_dummy_timestamp] if latest_dummy_timestamp else global_roi_df
|
| 549 |
+
|
| 550 |
+
# Generate dummy data for the new timestamps
|
| 551 |
+
new_dummy_data = generate_continuous_random_data(temp_df)
|
| 552 |
+
|
| 553 |
+
# Only keep ROI data
|
| 554 |
+
if not new_dummy_data.empty:
|
| 555 |
+
new_dummy_data = new_dummy_data[new_dummy_data['metric_type'] == 'ROI']
|
| 556 |
+
logger.info(f"Generated {len(new_dummy_data)} additional dummy ROI data points")
|
| 557 |
+
|
| 558 |
+
# Append the new dummy data to the existing dummy data
|
| 559 |
+
global_dummy_roi_df = pd.concat([global_dummy_roi_df, new_dummy_data], ignore_index=True)
|
| 560 |
+
else:
|
| 561 |
+
logger.info("No new timestamps in real data, using existing dummy ROI data")
|
| 562 |
+
|
| 563 |
+
# Combine real and dummy ROI data
|
| 564 |
+
if not global_dummy_roi_df.empty:
|
| 565 |
+
roi_dummy_count = len(global_dummy_roi_df)
|
| 566 |
+
global_roi_df = pd.concat([global_roi_df, global_dummy_roi_df], ignore_index=True)
|
| 567 |
+
logger.info(f"Added {roi_dummy_count} dummy ROI data points to the dataset")
|
| 568 |
+
|
| 569 |
# Log the resulting dataframe
|
| 570 |
+
logger.info(f"Created DataFrame with {len(global_df)} rows (including dummy data)")
|
| 571 |
logger.info(f"DataFrame columns: {global_df.columns.tolist()}")
|
| 572 |
logger.info(f"APR statistics: min={global_df['apr'].min()}, max={global_df['apr'].max()}, mean={global_df['apr'].mean()}")
|
| 573 |
|
fetch_and_preprocess_data.py
ADDED
|
@@ -0,0 +1,274 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import numpy as np
|
| 3 |
+
import random
|
| 4 |
+
from datetime import datetime, timedelta
|
| 5 |
+
import logging
|
| 6 |
+
|
| 7 |
+
# Get the logger
|
| 8 |
+
logger = logging.getLogger(__name__)
|
| 9 |
+
|
| 10 |
+
def generate_continuous_random_data(existing_data, end_time=None):
|
| 11 |
+
"""
|
| 12 |
+
Generate authentic-looking random data that continues from existing data
|
| 13 |
+
with adjusted APR following APR with a small offset
|
| 14 |
+
|
| 15 |
+
Args:
|
| 16 |
+
existing_data: DataFrame containing the existing data
|
| 17 |
+
end_time: Optional end time (defaults to current time)
|
| 18 |
+
|
| 19 |
+
Returns:
|
| 20 |
+
DataFrame with dummy data points
|
| 21 |
+
"""
|
| 22 |
+
# Use current time if not specified
|
| 23 |
+
if end_time is None:
|
| 24 |
+
end_time = datetime.now()
|
| 25 |
+
|
| 26 |
+
# Find the latest timestamp in the existing data
|
| 27 |
+
if not existing_data.empty:
|
| 28 |
+
start_time = existing_data['timestamp'].max() + timedelta(minutes=10)
|
| 29 |
+
else:
|
| 30 |
+
# If no existing data, start from 30 days ago
|
| 31 |
+
start_time = end_time - timedelta(days=30)
|
| 32 |
+
|
| 33 |
+
# Generate timestamps with 10-minute intervals
|
| 34 |
+
timestamps = []
|
| 35 |
+
current = start_time
|
| 36 |
+
while current <= end_time:
|
| 37 |
+
timestamps.append(current)
|
| 38 |
+
current += timedelta(minutes=10)
|
| 39 |
+
|
| 40 |
+
if not timestamps:
|
| 41 |
+
return pd.DataFrame() # No new data needed
|
| 42 |
+
|
| 43 |
+
# Get unique agents from existing data
|
| 44 |
+
if not existing_data.empty:
|
| 45 |
+
unique_agents = existing_data[['agent_id', 'agent_name']].drop_duplicates().to_dict('records')
|
| 46 |
+
else:
|
| 47 |
+
# Create one dummy agent if no existing data
|
| 48 |
+
unique_agents = [{'agent_id': 'dummy_agent', 'agent_name': 'Dummy Agent'}]
|
| 49 |
+
|
| 50 |
+
dummy_data_list = []
|
| 51 |
+
|
| 52 |
+
# For each agent, create continuous dummy data
|
| 53 |
+
for agent in unique_agents:
|
| 54 |
+
agent_id = agent['agent_id']
|
| 55 |
+
|
| 56 |
+
# Get the last real values for this agent to ensure continuity
|
| 57 |
+
last_apr = None
|
| 58 |
+
last_adjusted_apr = None
|
| 59 |
+
last_roi = None
|
| 60 |
+
|
| 61 |
+
if not existing_data.empty:
|
| 62 |
+
# Get last APR value
|
| 63 |
+
agent_apr_data = existing_data[(existing_data['agent_id'] == agent_id) &
|
| 64 |
+
(existing_data['metric_type'] == 'APR')]
|
| 65 |
+
if not agent_apr_data.empty:
|
| 66 |
+
last_apr = agent_apr_data['apr'].iloc[-1]
|
| 67 |
+
last_adjusted_apr = agent_apr_data['adjusted_apr'].iloc[-1]
|
| 68 |
+
|
| 69 |
+
# Get last ROI value
|
| 70 |
+
agent_roi_data = existing_data[(existing_data['agent_id'] == agent_id) &
|
| 71 |
+
(existing_data['metric_type'] == 'ROI')]
|
| 72 |
+
if not agent_roi_data.empty:
|
| 73 |
+
last_roi = agent_roi_data['roi'].iloc[-1]
|
| 74 |
+
|
| 75 |
+
# If no last values, start with reasonable values in our range
|
| 76 |
+
if last_apr is None or pd.isna(last_apr):
|
| 77 |
+
last_apr = random.uniform(-0.1, 0.1) # Start close to zero
|
| 78 |
+
|
| 79 |
+
if last_adjusted_apr is None or pd.isna(last_adjusted_apr):
|
| 80 |
+
# If we have APR but no adjusted APR, make it slightly different than APR
|
| 81 |
+
# Sometimes higher, sometimes lower to look more natural
|
| 82 |
+
if random.random() > 0.5:
|
| 83 |
+
last_adjusted_apr = last_apr + random.uniform(0.05, 0.15)
|
| 84 |
+
else:
|
| 85 |
+
last_adjusted_apr = last_apr - random.uniform(0.05, 0.15)
|
| 86 |
+
last_adjusted_apr = max(-0.5, min(1.0, last_adjusted_apr))
|
| 87 |
+
|
| 88 |
+
if last_roi is None or pd.isna(last_roi):
|
| 89 |
+
last_roi = random.uniform(-0.1, 0.1) # Start close to zero
|
| 90 |
+
|
| 91 |
+
# Generate APR values using smoother random walk
|
| 92 |
+
apr_values = [last_apr]
|
| 93 |
+
|
| 94 |
+
# Create a more natural pattern with some trends
|
| 95 |
+
# Define a few trend periods to make it look more authentic
|
| 96 |
+
num_points = len(timestamps)
|
| 97 |
+
trend_periods = []
|
| 98 |
+
|
| 99 |
+
# Create 3-5 trend periods
|
| 100 |
+
num_trends = random.randint(3, 5)
|
| 101 |
+
period_length = num_points // num_trends
|
| 102 |
+
|
| 103 |
+
for i in range(num_trends):
|
| 104 |
+
# Each trend has a direction (up, down, or sideways)
|
| 105 |
+
# and a strength (how strong the trend is)
|
| 106 |
+
direction = random.choice([-1, 0, 1]) # -1: down, 0: sideways, 1: up
|
| 107 |
+
strength = random.uniform(0.01, 0.03) # Smaller changes for more natural look
|
| 108 |
+
|
| 109 |
+
start_idx = i * period_length
|
| 110 |
+
end_idx = min((i + 1) * period_length, num_points)
|
| 111 |
+
|
| 112 |
+
trend_periods.append({
|
| 113 |
+
'start': start_idx,
|
| 114 |
+
'end': end_idx,
|
| 115 |
+
'direction': direction,
|
| 116 |
+
'strength': strength
|
| 117 |
+
})
|
| 118 |
+
|
| 119 |
+
# Generate values following the trends
|
| 120 |
+
for i in range(1, num_points):
|
| 121 |
+
# Find which trend period we're in
|
| 122 |
+
current_trend = None
|
| 123 |
+
for trend in trend_periods:
|
| 124 |
+
if trend['start'] <= i < trend['end']:
|
| 125 |
+
current_trend = trend
|
| 126 |
+
break
|
| 127 |
+
|
| 128 |
+
# If we couldn't find a trend (shouldn't happen), use a neutral trend
|
| 129 |
+
if current_trend is None:
|
| 130 |
+
current_trend = {'direction': 0, 'strength': 0.01}
|
| 131 |
+
|
| 132 |
+
# Base change is influenced by the trend
|
| 133 |
+
base_change = current_trend['direction'] * current_trend['strength']
|
| 134 |
+
|
| 135 |
+
# Add some randomness
|
| 136 |
+
random_change = random.normalvariate(0, 0.01) # Normal distribution for more natural randomness
|
| 137 |
+
|
| 138 |
+
# Previous momentum (30% influence to make it smoother)
|
| 139 |
+
prev_change = 0 if i == 1 else apr_values[i-1] - apr_values[i-2]
|
| 140 |
+
momentum = 0.3 * prev_change
|
| 141 |
+
|
| 142 |
+
# Combine all factors
|
| 143 |
+
total_change = base_change + random_change + momentum
|
| 144 |
+
|
| 145 |
+
# Apply the change
|
| 146 |
+
new_value = apr_values[i-1] + total_change
|
| 147 |
+
|
| 148 |
+
# Keep within reasonable bounds (-0.5 to 1.0)
|
| 149 |
+
new_value = max(-0.5, min(1.0, new_value))
|
| 150 |
+
|
| 151 |
+
apr_values.append(new_value)
|
| 152 |
+
|
| 153 |
+
# Generate adjusted APR values that follow APR with a small, varying offset
|
| 154 |
+
adjusted_apr_values = []
|
| 155 |
+
for i, apr_value in enumerate(apr_values):
|
| 156 |
+
# Make adjusted APR follow APR but with a small, varying offset
|
| 157 |
+
# Sometimes higher, sometimes lower to look more natural
|
| 158 |
+
if i % 5 == 0: # Periodically recalculate the offset direction
|
| 159 |
+
offset_direction = 1 if random.random() > 0.5 else -1
|
| 160 |
+
|
| 161 |
+
offset = offset_direction * random.uniform(0.05, 0.15)
|
| 162 |
+
adjusted_value = apr_value + offset
|
| 163 |
+
|
| 164 |
+
# Keep within reasonable bounds (-0.5 to 1.0)
|
| 165 |
+
adjusted_value = max(-0.5, min(1.0, adjusted_value))
|
| 166 |
+
adjusted_apr_values.append(adjusted_value)
|
| 167 |
+
|
| 168 |
+
# Generate ROI values with a completely different approach to ensure better distribution
|
| 169 |
+
# Note: ROI values will be multiplied by 100 in app.py, so we need to generate values
|
| 170 |
+
# between -0.01 and 0 to get final values between -1 and 0
|
| 171 |
+
|
| 172 |
+
# Instead of building on the last_roi value, we'll generate a completely new sequence
|
| 173 |
+
# that's well-distributed between -0.01 and 0
|
| 174 |
+
|
| 175 |
+
# First, create a sequence of target values that we want to hit
|
| 176 |
+
# This ensures we get good coverage of the entire range
|
| 177 |
+
target_points = []
|
| 178 |
+
for i in range(5): # Create 5 target points
|
| 179 |
+
# Distribute targets across the range, but avoid exactly 0
|
| 180 |
+
target = -0.01 + (i * 0.0025) # Values from -0.01 to -0.0025
|
| 181 |
+
target_points.append(target)
|
| 182 |
+
|
| 183 |
+
# Shuffle the targets to make the pattern less predictable
|
| 184 |
+
random.shuffle(target_points)
|
| 185 |
+
|
| 186 |
+
# Divide the total points into segments, one for each target
|
| 187 |
+
segment_length = num_points // len(target_points)
|
| 188 |
+
|
| 189 |
+
# Generate the ROI values
|
| 190 |
+
roi_values = []
|
| 191 |
+
|
| 192 |
+
# Start with the last real value, or a random value in our range if none exists
|
| 193 |
+
if last_roi is None or pd.isna(last_roi) or last_roi < -0.01 or last_roi > 0:
|
| 194 |
+
# If no valid last value, start in the middle of our range
|
| 195 |
+
current_value = -0.005
|
| 196 |
+
else:
|
| 197 |
+
current_value = last_roi
|
| 198 |
+
|
| 199 |
+
roi_values.append(current_value)
|
| 200 |
+
|
| 201 |
+
# For each segment, gradually move toward the target value
|
| 202 |
+
for segment_idx, target in enumerate(target_points):
|
| 203 |
+
start_idx = segment_idx * segment_length
|
| 204 |
+
end_idx = min((segment_idx + 1) * segment_length, num_points)
|
| 205 |
+
|
| 206 |
+
# How many steps we have to reach the target
|
| 207 |
+
steps = end_idx - start_idx
|
| 208 |
+
|
| 209 |
+
if steps <= 0:
|
| 210 |
+
continue # Skip if this segment has no points
|
| 211 |
+
|
| 212 |
+
# Current value is the last value in roi_values
|
| 213 |
+
current_value = roi_values[-1]
|
| 214 |
+
|
| 215 |
+
# Calculate how much to change per step to reach the target
|
| 216 |
+
step_change = (target - current_value) / steps
|
| 217 |
+
|
| 218 |
+
# Generate values for this segment
|
| 219 |
+
for step in range(steps):
|
| 220 |
+
# Base change to move toward target
|
| 221 |
+
base_change = step_change
|
| 222 |
+
|
| 223 |
+
# Add some randomness, but make sure we're still generally moving toward the target
|
| 224 |
+
random_factor = random.uniform(-0.0005, 0.0005)
|
| 225 |
+
|
| 226 |
+
# Calculate new value
|
| 227 |
+
new_value = current_value + base_change + random_factor
|
| 228 |
+
|
| 229 |
+
# Ensure we stay within range
|
| 230 |
+
new_value = max(-0.01, min(0, new_value))
|
| 231 |
+
|
| 232 |
+
roi_values.append(new_value)
|
| 233 |
+
current_value = new_value
|
| 234 |
+
|
| 235 |
+
# If we didn't generate enough points, add more
|
| 236 |
+
while len(roi_values) < num_points + 1:
|
| 237 |
+
# Add a point with small random variation from the last point
|
| 238 |
+
last_value = roi_values[-1]
|
| 239 |
+
new_value = last_value + random.uniform(-0.001, 0.001)
|
| 240 |
+
new_value = max(-0.01, min(0, new_value))
|
| 241 |
+
roi_values.append(new_value)
|
| 242 |
+
|
| 243 |
+
# If we generated too many points, trim the list
|
| 244 |
+
roi_values = roi_values[:num_points + 1]
|
| 245 |
+
|
| 246 |
+
# Create dummy data points
|
| 247 |
+
for i, timestamp in enumerate(timestamps):
|
| 248 |
+
# APR data
|
| 249 |
+
dummy_apr = {
|
| 250 |
+
'timestamp': timestamp,
|
| 251 |
+
'apr': apr_values[i],
|
| 252 |
+
'adjusted_apr': adjusted_apr_values[i],
|
| 253 |
+
'roi': None,
|
| 254 |
+
'agent_id': agent_id,
|
| 255 |
+
'agent_name': agent['agent_name'],
|
| 256 |
+
'is_dummy': True,
|
| 257 |
+
'metric_type': 'APR'
|
| 258 |
+
}
|
| 259 |
+
dummy_data_list.append(dummy_apr)
|
| 260 |
+
|
| 261 |
+
# ROI data
|
| 262 |
+
dummy_roi = {
|
| 263 |
+
'timestamp': timestamp,
|
| 264 |
+
'apr': None,
|
| 265 |
+
'adjusted_apr': None,
|
| 266 |
+
'roi': roi_values[i],
|
| 267 |
+
'agent_id': agent_id,
|
| 268 |
+
'agent_name': agent['agent_name'],
|
| 269 |
+
'is_dummy': True,
|
| 270 |
+
'metric_type': 'ROI'
|
| 271 |
+
}
|
| 272 |
+
dummy_data_list.append(dummy_roi)
|
| 273 |
+
|
| 274 |
+
return pd.DataFrame(dummy_data_list)
|