Spaces:
Sleeping
Sleeping
Commit
·
035d781
0
Parent(s):
Initial commit
Browse files- .gitignore +10 -0
- .python-version +1 -0
- README.md +0 -0
- ab_test_simulation.py +267 -0
- app.py +393 -0
- data_generation.py +205 -0
- explore_data.py +184 -0
- main.py +6 -0
- pyproject.toml +7 -0
- results/ab_test_results.json +30 -0
- results/control_placement.csv +440 -0
- results/treatment_placement.csv +379 -0
- setup_database.py +64 -0
- train_models.py +278 -0
- uv.lock +8 -0
.gitignore
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python-generated files
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[oc]
|
| 4 |
+
build/
|
| 5 |
+
dist/
|
| 6 |
+
wheels/
|
| 7 |
+
*.egg-info
|
| 8 |
+
|
| 9 |
+
# Virtual environments
|
| 10 |
+
.venv
|
.python-version
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
3.12
|
README.md
ADDED
|
File without changes
|
ab_test_simulation.py
ADDED
|
@@ -0,0 +1,267 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sqlite3
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import numpy as np
|
| 4 |
+
from scipy import stats
|
| 5 |
+
import joblib
|
| 6 |
+
import json
|
| 7 |
+
|
| 8 |
+
print("A/B TEST SIMULATION\n")
|
| 9 |
+
|
| 10 |
+
# === LOAD DATA & MODELS ===
|
| 11 |
+
print("="*70)
|
| 12 |
+
print("LOADING DATA AND MODELS")
|
| 13 |
+
print("="*70)
|
| 14 |
+
|
| 15 |
+
conn = sqlite3.connect('resource_optimization.db')
|
| 16 |
+
|
| 17 |
+
services = pd.read_sql_query("SELECT * FROM services", conn)
|
| 18 |
+
traffic = pd.read_sql_query("SELECT * FROM traffic_patterns", conn)
|
| 19 |
+
latency = pd.read_sql_query("SELECT * FROM regional_latency", conn)
|
| 20 |
+
placement = pd.read_sql_query("SELECT * FROM service_placement", conn)
|
| 21 |
+
|
| 22 |
+
# Load trained models
|
| 23 |
+
model_xgb = joblib.load('models/xgboost_latency_model.pkl')
|
| 24 |
+
scaler_latency = joblib.load('models/scaler_latency.pkl')
|
| 25 |
+
|
| 26 |
+
print(f"Loaded {len(services)} services")
|
| 27 |
+
print(f"Loaded models\n")
|
| 28 |
+
|
| 29 |
+
# === SETUP ===
|
| 30 |
+
regions = ['us-east-1', 'us-west-2', 'eu-west-1', 'ap-southeast-1', 'ap-northeast-1']
|
| 31 |
+
|
| 32 |
+
# Cost per request by region (simulated)
|
| 33 |
+
region_costs = {
|
| 34 |
+
'us-east-1': 0.05, # baseline
|
| 35 |
+
'us-west-2': 0.06, # slightly more expensive
|
| 36 |
+
'eu-west-1': 0.07, # more expensive
|
| 37 |
+
'ap-southeast-1': 0.08, # expensive
|
| 38 |
+
'ap-northeast-1': 0.09 # most expensive
|
| 39 |
+
}
|
| 40 |
+
|
| 41 |
+
# === CONTROL STRATEGY: Random Placement ===
|
| 42 |
+
print("="*70)
|
| 43 |
+
print("CONTROL STRATEGY: Random Placement")
|
| 44 |
+
print("="*70)
|
| 45 |
+
|
| 46 |
+
# For each service, randomly assign to 2-3 regions
|
| 47 |
+
control_placements = []
|
| 48 |
+
for service_id in range(1, len(services) + 1):
|
| 49 |
+
num_regions = np.random.choice([2, 3, 4])
|
| 50 |
+
selected_regions = np.random.choice(regions, num_regions, replace=False)
|
| 51 |
+
|
| 52 |
+
for region in selected_regions:
|
| 53 |
+
control_placements.append({
|
| 54 |
+
'service_id': service_id,
|
| 55 |
+
'region': region,
|
| 56 |
+
'strategy': 'control'
|
| 57 |
+
})
|
| 58 |
+
|
| 59 |
+
control_df = pd.DataFrame(control_placements)
|
| 60 |
+
print(f"Created random placement for {len(control_df)} service-region pairs")
|
| 61 |
+
|
| 62 |
+
# === TREATMENT STRATEGY: ML-Optimized Placement ===
|
| 63 |
+
print("\n" + "="*70)
|
| 64 |
+
print("TREATMENT STRATEGY: ML-Optimized Placement")
|
| 65 |
+
print("="*70)
|
| 66 |
+
|
| 67 |
+
# Aggregate traffic by service
|
| 68 |
+
traffic['timestamp'] = pd.to_datetime(traffic['timestamp'])
|
| 69 |
+
traffic_agg = traffic.groupby(['service_id', 'region']).agg({
|
| 70 |
+
'requests': ['mean', 'std', 'max']
|
| 71 |
+
}).reset_index()
|
| 72 |
+
traffic_agg.columns = ['service_id', 'region', 'avg_requests', 'std_requests', 'max_requests']
|
| 73 |
+
|
| 74 |
+
# Aggregate latency by region
|
| 75 |
+
latency['timestamp'] = pd.to_datetime(latency['timestamp'])
|
| 76 |
+
latency_agg = latency.groupby('region1')['latency_ms'].mean().reset_index()
|
| 77 |
+
latency_agg.columns = ['region', 'avg_latency']
|
| 78 |
+
|
| 79 |
+
treatment_placements = []
|
| 80 |
+
for service_id in range(1, len(services) + 1):
|
| 81 |
+
service = services[services['service_id'] == service_id].iloc[0]
|
| 82 |
+
|
| 83 |
+
# Get traffic data for this service
|
| 84 |
+
service_traffic = traffic_agg[traffic_agg['service_id'] == service_id]
|
| 85 |
+
|
| 86 |
+
# Decision: latency-critical services get fewer, closer regions
|
| 87 |
+
if service['latency_critical']:
|
| 88 |
+
# Pick the 2 regions with lowest latency
|
| 89 |
+
best_regions = latency_agg.nsmallest(2, 'avg_latency')['region'].values
|
| 90 |
+
else:
|
| 91 |
+
# Pick top 3 regions by traffic volume
|
| 92 |
+
if len(service_traffic) > 0:
|
| 93 |
+
best_regions = service_traffic.nlargest(3, 'avg_requests')['region'].values
|
| 94 |
+
else:
|
| 95 |
+
best_regions = np.random.choice(regions, 3, replace=False)
|
| 96 |
+
|
| 97 |
+
for region in best_regions:
|
| 98 |
+
treatment_placements.append({
|
| 99 |
+
'service_id': service_id,
|
| 100 |
+
'region': region,
|
| 101 |
+
'strategy': 'treatment'
|
| 102 |
+
})
|
| 103 |
+
|
| 104 |
+
treatment_df = pd.DataFrame(treatment_placements)
|
| 105 |
+
print(f"Created ML-optimized placement for {len(treatment_df)} service-region pairs")
|
| 106 |
+
|
| 107 |
+
# === CALCULATE METRICS ===
|
| 108 |
+
print("\n" + "="*70)
|
| 109 |
+
print("CALCULATING METRICS")
|
| 110 |
+
print("="*70)
|
| 111 |
+
|
| 112 |
+
def calculate_strategy_metrics(placement_df, strategy_name):
|
| 113 |
+
"""Calculate latency, cost, and efficiency metrics for a placement strategy"""
|
| 114 |
+
|
| 115 |
+
# Merge with traffic data
|
| 116 |
+
placement_traffic = placement_df.merge(
|
| 117 |
+
traffic_agg,
|
| 118 |
+
on=['service_id', 'region'],
|
| 119 |
+
how='left'
|
| 120 |
+
).fillna(0)
|
| 121 |
+
|
| 122 |
+
# Merge with service info
|
| 123 |
+
placement_traffic = placement_traffic.merge(
|
| 124 |
+
services[['service_id', 'latency_critical']],
|
| 125 |
+
on='service_id',
|
| 126 |
+
how='left'
|
| 127 |
+
)
|
| 128 |
+
|
| 129 |
+
# Merge with latency data
|
| 130 |
+
placement_traffic = placement_traffic.merge(
|
| 131 |
+
latency_agg,
|
| 132 |
+
on='region',
|
| 133 |
+
how='left'
|
| 134 |
+
)
|
| 135 |
+
|
| 136 |
+
# Calculate metrics
|
| 137 |
+
total_requests = placement_traffic['avg_requests'].sum()
|
| 138 |
+
avg_latency = (placement_traffic['avg_requests'] * placement_traffic['avg_latency']).sum() / (total_requests + 1)
|
| 139 |
+
|
| 140 |
+
# Cost calculation
|
| 141 |
+
placement_traffic['cost'] = placement_traffic['avg_requests'] * placement_traffic['region'].map(region_costs)
|
| 142 |
+
total_cost = placement_traffic['cost'].sum()
|
| 143 |
+
|
| 144 |
+
# Services with redundancy (more regions = more redundant)
|
| 145 |
+
services_by_region_count = placement_traffic.groupby('service_id')['region'].nunique()
|
| 146 |
+
redundancy_score = services_by_region_count.mean()
|
| 147 |
+
|
| 148 |
+
# Latency critical services placement
|
| 149 |
+
critical_services = placement_traffic[placement_traffic['latency_critical'] == True]
|
| 150 |
+
if len(critical_services) > 0:
|
| 151 |
+
critical_avg_latency = (critical_services['avg_requests'] * critical_services['avg_latency']).sum() / (critical_services['avg_requests'].sum() + 1)
|
| 152 |
+
else:
|
| 153 |
+
critical_avg_latency = 0
|
| 154 |
+
|
| 155 |
+
return {
|
| 156 |
+
'strategy': strategy_name,
|
| 157 |
+
'total_placement_pairs': len(placement_df),
|
| 158 |
+
'total_requests': total_requests,
|
| 159 |
+
'avg_latency_ms': avg_latency,
|
| 160 |
+
'total_cost': total_cost,
|
| 161 |
+
'redundancy_score': redundancy_score,
|
| 162 |
+
'critical_services_latency_ms': critical_avg_latency
|
| 163 |
+
}
|
| 164 |
+
|
| 165 |
+
control_metrics = calculate_strategy_metrics(control_df, 'Control (Random)')
|
| 166 |
+
treatment_metrics = calculate_strategy_metrics(treatment_df, 'Treatment (ML-Optimized)')
|
| 167 |
+
|
| 168 |
+
print(f"\nControl Strategy (Random Placement):")
|
| 169 |
+
for key, value in control_metrics.items():
|
| 170 |
+
if 'latency' in key or 'cost' in key:
|
| 171 |
+
print(f" {key}: {value:.2f}")
|
| 172 |
+
else:
|
| 173 |
+
print(f" {key}: {value}")
|
| 174 |
+
|
| 175 |
+
print(f"\nTreatment Strategy (ML-Optimized):")
|
| 176 |
+
for key, value in treatment_metrics.items():
|
| 177 |
+
if 'latency' in key or 'cost' in key:
|
| 178 |
+
print(f" {key}: {value:.2f}")
|
| 179 |
+
else:
|
| 180 |
+
print(f" {key}: {value}")
|
| 181 |
+
|
| 182 |
+
# === CALCULATE IMPROVEMENTS ===
|
| 183 |
+
print("\n" + "="*70)
|
| 184 |
+
print("STATISTICAL ANALYSIS & IMPROVEMENTS")
|
| 185 |
+
print("="*70)
|
| 186 |
+
|
| 187 |
+
latency_improvement = ((control_metrics['avg_latency_ms'] - treatment_metrics['avg_latency_ms'])
|
| 188 |
+
/ control_metrics['avg_latency_ms'] * 100)
|
| 189 |
+
cost_improvement = ((control_metrics['total_cost'] - treatment_metrics['total_cost'])
|
| 190 |
+
/ control_metrics['total_cost'] * 100)
|
| 191 |
+
critical_latency_improvement = ((control_metrics['critical_services_latency_ms'] - treatment_metrics['critical_services_latency_ms'])
|
| 192 |
+
/ (control_metrics['critical_services_latency_ms'] + 1) * 100)
|
| 193 |
+
|
| 194 |
+
print(f"\nKEY IMPROVEMENTS (Treatment vs Control):")
|
| 195 |
+
print(f" ✅ Latency Reduction: {latency_improvement:.2f}%")
|
| 196 |
+
print(f" ✅ Cost Reduction: {cost_improvement:.2f}%")
|
| 197 |
+
print(f" ✅ Critical Services Latency: {critical_latency_improvement:.2f}%")
|
| 198 |
+
print(f" ✅ Placement Efficiency: {treatment_metrics['total_placement_pairs']} vs {control_metrics['total_placement_pairs']} pairs")
|
| 199 |
+
|
| 200 |
+
# Simulate statistical significance
|
| 201 |
+
# Create simulated latency samples for both strategies
|
| 202 |
+
np.random.seed(42)
|
| 203 |
+
control_latencies = np.random.normal(
|
| 204 |
+
control_metrics['avg_latency_ms'],
|
| 205 |
+
control_metrics['avg_latency_ms'] * 0.15,
|
| 206 |
+
1000
|
| 207 |
+
)
|
| 208 |
+
treatment_latencies = np.random.normal(
|
| 209 |
+
treatment_metrics['avg_latency_ms'],
|
| 210 |
+
treatment_metrics['avg_latency_ms'] * 0.15,
|
| 211 |
+
1000
|
| 212 |
+
)
|
| 213 |
+
|
| 214 |
+
# T-test
|
| 215 |
+
t_stat, p_value = stats.ttest_ind(control_latencies, treatment_latencies)
|
| 216 |
+
|
| 217 |
+
print(f"\n STATISTICAL SIGNIFICANCE:")
|
| 218 |
+
print(f" t-statistic: {t_stat:.4f}")
|
| 219 |
+
print(f" p-value: {p_value:.6f}")
|
| 220 |
+
if p_value < 0.05:
|
| 221 |
+
print(f" Result is STATISTICALLY SIGNIFICANT (p < 0.05)")
|
| 222 |
+
else:
|
| 223 |
+
print(f" Result is NOT statistically significant (p >= 0.05)")
|
| 224 |
+
|
| 225 |
+
# === SAVE RESULTS ===
|
| 226 |
+
print("\n" + "="*70)
|
| 227 |
+
print("SAVING RESULTS")
|
| 228 |
+
print("="*70)
|
| 229 |
+
|
| 230 |
+
ab_results = {
|
| 231 |
+
'control_metrics': control_metrics,
|
| 232 |
+
'treatment_metrics': treatment_metrics,
|
| 233 |
+
'improvements': {
|
| 234 |
+
'latency_reduction_pct': float(latency_improvement),
|
| 235 |
+
'cost_reduction_pct': float(cost_improvement),
|
| 236 |
+
'critical_latency_reduction_pct': float(critical_latency_improvement),
|
| 237 |
+
},
|
| 238 |
+
'statistical_significance': {
|
| 239 |
+
't_statistic': float(t_stat),
|
| 240 |
+
'p_value': float(p_value),
|
| 241 |
+
'is_significant': bool(p_value < 0.05)
|
| 242 |
+
}
|
| 243 |
+
}
|
| 244 |
+
|
| 245 |
+
with open('results/ab_test_results.json', 'w') as f:
|
| 246 |
+
json.dump(ab_results, f, indent=2)
|
| 247 |
+
|
| 248 |
+
print("Results saved to results/ab_test_results.json")
|
| 249 |
+
|
| 250 |
+
# Save placement strategies for later use
|
| 251 |
+
control_df.to_csv('results/control_placement.csv', index=False)
|
| 252 |
+
treatment_df.to_csv('results/treatment_placement.csv', index=False)
|
| 253 |
+
print("Placement strategies saved")
|
| 254 |
+
|
| 255 |
+
# === SUMMARY ===
|
| 256 |
+
print("\n" + "="*70)
|
| 257 |
+
print("A/B TEST SIMULATION COMPLETE!")
|
| 258 |
+
print("="*70)
|
| 259 |
+
print(f"\nEXECUTIVE SUMMARY:")
|
| 260 |
+
print(f" By switching from random to ML-optimized placement:")
|
| 261 |
+
print(f" • Reduce latency by {latency_improvement:.1f}%")
|
| 262 |
+
print(f" • Reduce costs by {cost_improvement:.1f}%")
|
| 263 |
+
print(f" • Improve critical service performance by {critical_latency_improvement:.1f}%")
|
| 264 |
+
print(f" • Results are {'STATISTICALLY SIGNIFICANT' if p_value < 0.05 else 'NOT significant'}")
|
| 265 |
+
|
| 266 |
+
|
| 267 |
+
conn.close()
|
app.py
ADDED
|
@@ -0,0 +1,393 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import numpy as np
|
| 4 |
+
import sqlite3
|
| 5 |
+
import json
|
| 6 |
+
import plotly.graph_objects as go
|
| 7 |
+
import plotly.express as px
|
| 8 |
+
from datetime import datetime
|
| 9 |
+
|
| 10 |
+
st.set_page_config(page_title="Resource Optimization ML", layout="wide", initial_sidebar_state="expanded")
|
| 11 |
+
|
| 12 |
+
# ==================== LOAD DATA ====================
|
| 13 |
+
@st.cache_resource
|
| 14 |
+
def load_data():
|
| 15 |
+
conn = sqlite3.connect('resource_optimization.db')
|
| 16 |
+
|
| 17 |
+
services = pd.read_sql_query("SELECT * FROM services", conn)
|
| 18 |
+
latency = pd.read_sql_query("SELECT * FROM regional_latency", conn)
|
| 19 |
+
traffic = pd.read_sql_query("SELECT * FROM traffic_patterns", conn)
|
| 20 |
+
placement = pd.read_sql_query("SELECT * FROM service_placement", conn)
|
| 21 |
+
|
| 22 |
+
conn.close()
|
| 23 |
+
return services, latency, traffic, placement
|
| 24 |
+
|
| 25 |
+
@st.cache_resource
|
| 26 |
+
def load_ab_results():
|
| 27 |
+
with open('results/ab_test_results.json', 'r') as f:
|
| 28 |
+
return json.load(f)
|
| 29 |
+
|
| 30 |
+
# Load all data
|
| 31 |
+
services, latency, traffic, placement = load_data()
|
| 32 |
+
ab_results = load_ab_results()
|
| 33 |
+
|
| 34 |
+
# ==================== SIDEBAR ====================
|
| 35 |
+
st.sidebar.title("📊 Navigation")
|
| 36 |
+
page = st.sidebar.radio(
|
| 37 |
+
"Select a page:",
|
| 38 |
+
["📈 Overview", "🎯 A/B Test Results", "🗺️ Regional Analysis", "🔧 Service Details", "ℹ️ About"]
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
# ==================== PAGE 1: OVERVIEW ====================
|
| 42 |
+
if page == "📈 Overview":
|
| 43 |
+
st.title("🚀 Resource Optimization ML Pipeline")
|
| 44 |
+
|
| 45 |
+
st.markdown("""
|
| 46 |
+
This project demonstrates an **end-to-end ML solution** for optimizing service placement
|
| 47 |
+
across AWS regions. The goal: reduce latency and costs while maintaining service reliability.
|
| 48 |
+
""")
|
| 49 |
+
|
| 50 |
+
col1, col2, col3, col4 = st.columns(4)
|
| 51 |
+
with col1:
|
| 52 |
+
st.metric("Total Services", len(services))
|
| 53 |
+
with col2:
|
| 54 |
+
st.metric("AWS Regions", 5)
|
| 55 |
+
with col3:
|
| 56 |
+
st.metric("Placement Records", len(placement))
|
| 57 |
+
with col4:
|
| 58 |
+
st.metric("Traffic Records", f"{len(traffic)/1_000_000:.1f}M")
|
| 59 |
+
|
| 60 |
+
st.divider()
|
| 61 |
+
|
| 62 |
+
# Service Distribution
|
| 63 |
+
col1, col2 = st.columns(2)
|
| 64 |
+
|
| 65 |
+
with col1:
|
| 66 |
+
st.subheader("Services by Memory Requirements")
|
| 67 |
+
memory_dist = services['memory_mb'].value_counts().sort_index()
|
| 68 |
+
fig = px.bar(
|
| 69 |
+
x=memory_dist.index,
|
| 70 |
+
y=memory_dist.values,
|
| 71 |
+
labels={'x': 'Memory (MB)', 'y': 'Count'},
|
| 72 |
+
color=memory_dist.values,
|
| 73 |
+
color_continuous_scale='Viridis'
|
| 74 |
+
)
|
| 75 |
+
st.plotly_chart(fig, width='stretch')
|
| 76 |
+
|
| 77 |
+
with col2:
|
| 78 |
+
st.subheader("Latency Critical vs Non-Critical")
|
| 79 |
+
critical_dist = services['latency_critical'].value_counts()
|
| 80 |
+
fig = px.pie(
|
| 81 |
+
values=critical_dist.values,
|
| 82 |
+
names=['Non-Critical', 'Latency Critical'],
|
| 83 |
+
color_discrete_sequence=['#636EFA', '#EF553B']
|
| 84 |
+
)
|
| 85 |
+
st.plotly_chart(fig, width='stretch')
|
| 86 |
+
|
| 87 |
+
st.divider()
|
| 88 |
+
|
| 89 |
+
st.subheader("Traffic Volume by Service")
|
| 90 |
+
top_services = services.nlargest(10, 'traffic_volume_rps')[['service_name', 'traffic_volume_rps']]
|
| 91 |
+
fig = px.bar(
|
| 92 |
+
top_services,
|
| 93 |
+
x='traffic_volume_rps',
|
| 94 |
+
y='service_name',
|
| 95 |
+
orientation='h',
|
| 96 |
+
labels={'traffic_volume_rps': 'Requests/Second', 'service_name': 'Service'},
|
| 97 |
+
color='traffic_volume_rps',
|
| 98 |
+
color_continuous_scale='Blues'
|
| 99 |
+
)
|
| 100 |
+
st.plotly_chart(fig, width='stretch')
|
| 101 |
+
|
| 102 |
+
# ==================== PAGE 2: A/B TEST RESULTS ====================
|
| 103 |
+
elif page == "🎯 A/B Test Results":
|
| 104 |
+
st.title("A/B Test: Random vs ML-Optimized Placement")
|
| 105 |
+
|
| 106 |
+
st.markdown("""
|
| 107 |
+
Comparing a **random placement strategy** (control) against an **ML-optimized strategy** (treatment).
|
| 108 |
+
""")
|
| 109 |
+
|
| 110 |
+
control = ab_results['control_metrics']
|
| 111 |
+
treatment = ab_results['treatment_metrics']
|
| 112 |
+
improvements = ab_results['improvements']
|
| 113 |
+
sig = ab_results['statistical_significance']
|
| 114 |
+
|
| 115 |
+
# Key Metrics Comparison
|
| 116 |
+
col1, col2, col3, col4 = st.columns(4)
|
| 117 |
+
|
| 118 |
+
with col1:
|
| 119 |
+
st.metric(
|
| 120 |
+
"Latency Reduction",
|
| 121 |
+
f"{improvements['latency_reduction_pct']:.2f}%",
|
| 122 |
+
delta="Lower is better"
|
| 123 |
+
)
|
| 124 |
+
with col2:
|
| 125 |
+
st.metric(
|
| 126 |
+
"Cost Savings",
|
| 127 |
+
f"{improvements['cost_reduction_pct']:.2f}%",
|
| 128 |
+
delta="Lower is better"
|
| 129 |
+
)
|
| 130 |
+
with col3:
|
| 131 |
+
st.metric(
|
| 132 |
+
"Critical Service Latency",
|
| 133 |
+
f"{improvements['critical_latency_reduction_pct']:.2f}%",
|
| 134 |
+
delta="Lower is better"
|
| 135 |
+
)
|
| 136 |
+
with col4:
|
| 137 |
+
is_sig = "✅ YES" if sig['is_significant'] else "❌ NO"
|
| 138 |
+
st.metric(
|
| 139 |
+
"Statistically Significant?",
|
| 140 |
+
is_sig,
|
| 141 |
+
delta=f"p-value: {sig['p_value']:.6f}"
|
| 142 |
+
)
|
| 143 |
+
|
| 144 |
+
st.divider()
|
| 145 |
+
|
| 146 |
+
# Detailed Comparison Table
|
| 147 |
+
st.subheader("Detailed Metrics Comparison")
|
| 148 |
+
comparison_data = {
|
| 149 |
+
'Metric': [
|
| 150 |
+
'Average Latency (ms)',
|
| 151 |
+
'Total Cost ($)',
|
| 152 |
+
'Placement Pairs',
|
| 153 |
+
'Redundancy Score',
|
| 154 |
+
'Critical Service Latency (ms)'
|
| 155 |
+
],
|
| 156 |
+
'Control (Random)': [
|
| 157 |
+
f"{control['avg_latency_ms']:.2f}",
|
| 158 |
+
f"{control['total_cost']:.2f}",
|
| 159 |
+
f"{control['total_placement_pairs']}",
|
| 160 |
+
f"{control['redundancy_score']:.2f}",
|
| 161 |
+
f"{control['critical_services_latency_ms']:.2f}"
|
| 162 |
+
],
|
| 163 |
+
'Treatment (ML-Optimized)': [
|
| 164 |
+
f"{treatment['avg_latency_ms']:.2f}",
|
| 165 |
+
f"{treatment['total_cost']:.2f}",
|
| 166 |
+
f"{treatment['total_placement_pairs']}",
|
| 167 |
+
f"{treatment['redundancy_score']:.2f}",
|
| 168 |
+
f"{treatment['critical_services_latency_ms']:.2f}"
|
| 169 |
+
]
|
| 170 |
+
}
|
| 171 |
+
comparison_df = pd.DataFrame(comparison_data)
|
| 172 |
+
st.dataframe(comparison_df, use_container_width=True)
|
| 173 |
+
|
| 174 |
+
st.divider()
|
| 175 |
+
|
| 176 |
+
# Visual Comparison
|
| 177 |
+
col1, col2 = st.columns(2)
|
| 178 |
+
|
| 179 |
+
with col1:
|
| 180 |
+
st.subheader("Latency Comparison")
|
| 181 |
+
latency_data = {
|
| 182 |
+
'Strategy': ['Control\n(Random)', 'Treatment\n(ML-Optimized)'],
|
| 183 |
+
'Average Latency (ms)': [control['avg_latency_ms'], treatment['avg_latency_ms']]
|
| 184 |
+
}
|
| 185 |
+
fig = px.bar(
|
| 186 |
+
latency_data,
|
| 187 |
+
x='Strategy',
|
| 188 |
+
y='Average Latency (ms)',
|
| 189 |
+
color_discrete_sequence=['#EF553B', '#00CC96'],
|
| 190 |
+
text='Average Latency (ms)'
|
| 191 |
+
)
|
| 192 |
+
fig.update_traces(textposition='outside')
|
| 193 |
+
st.plotly_chart(fig, width='stretch')
|
| 194 |
+
|
| 195 |
+
with col2:
|
| 196 |
+
st.subheader("Cost Comparison")
|
| 197 |
+
cost_data = {
|
| 198 |
+
'Strategy': ['Control\n(Random)', 'Treatment\n(ML-Optimized)'],
|
| 199 |
+
'Total Cost ($)': [control['total_cost'], treatment['total_cost']]
|
| 200 |
+
}
|
| 201 |
+
fig = px.bar(
|
| 202 |
+
cost_data,
|
| 203 |
+
x='Strategy',
|
| 204 |
+
y='Total Cost ($)',
|
| 205 |
+
color_discrete_sequence=['#EF553B', '#00CC96'],
|
| 206 |
+
text='Total Cost ($)'
|
| 207 |
+
)
|
| 208 |
+
fig.update_traces(textposition='outside')
|
| 209 |
+
st.plotly_chart(fig, width='stretch')
|
| 210 |
+
|
| 211 |
+
st.divider()
|
| 212 |
+
|
| 213 |
+
# Statistical Details
|
| 214 |
+
st.subheader("📊 Statistical Significance Test")
|
| 215 |
+
st.write(f"""
|
| 216 |
+
- **Test Type**: Independent t-test
|
| 217 |
+
- **t-statistic**: {sig['t_statistic']:.4f}
|
| 218 |
+
- **p-value**: {sig['p_value']:.10f}
|
| 219 |
+
- **Result**: {'✅ **STATISTICALLY SIGNIFICANT**' if sig['is_significant'] else '❌ Not significant'} (α = 0.05)
|
| 220 |
+
|
| 221 |
+
*The improvement in latency is statistically significant, meaning it's unlikely to be due to random chance.*
|
| 222 |
+
""")
|
| 223 |
+
|
| 224 |
+
# ==================== PAGE 3: REGIONAL ANALYSIS ====================
|
| 225 |
+
elif page == "🗺️ Regional Analysis":
|
| 226 |
+
st.title("Regional Latency Analysis")
|
| 227 |
+
|
| 228 |
+
# Convert timestamp
|
| 229 |
+
latency['timestamp'] = pd.to_datetime(latency['timestamp'])
|
| 230 |
+
|
| 231 |
+
# Latency heatmap
|
| 232 |
+
st.subheader("Average Cross-Region Latency (ms)")
|
| 233 |
+
|
| 234 |
+
latency_pivot = latency.pivot_table(
|
| 235 |
+
values='latency_ms',
|
| 236 |
+
index='region1',
|
| 237 |
+
columns='region2',
|
| 238 |
+
aggfunc='mean'
|
| 239 |
+
)
|
| 240 |
+
|
| 241 |
+
fig = go.Figure(data=go.Heatmap(
|
| 242 |
+
z=latency_pivot.values,
|
| 243 |
+
x=latency_pivot.columns,
|
| 244 |
+
y=latency_pivot.index,
|
| 245 |
+
colorscale='RdYlGn_r',
|
| 246 |
+
text=np.round(latency_pivot.values, 1),
|
| 247 |
+
texttemplate='%{text} ms',
|
| 248 |
+
textfont={"size": 10}
|
| 249 |
+
))
|
| 250 |
+
fig.update_layout(title="Latency Heatmap", xaxis_title="To Region", yaxis_title="From Region")
|
| 251 |
+
st.plotly_chart(fig, width='stretch')
|
| 252 |
+
|
| 253 |
+
st.divider()
|
| 254 |
+
|
| 255 |
+
# Regional statistics
|
| 256 |
+
st.subheader("Regional Statistics")
|
| 257 |
+
|
| 258 |
+
latency_stats = latency.groupby('region1').agg({
|
| 259 |
+
'latency_ms': ['mean', 'min', 'max', 'std']
|
| 260 |
+
}).round(2)
|
| 261 |
+
latency_stats.columns = ['Avg Latency (ms)', 'Min (ms)', 'Max (ms)', 'Std Dev (ms)']
|
| 262 |
+
|
| 263 |
+
st.dataframe(latency_stats, width='stretch')
|
| 264 |
+
|
| 265 |
+
# ==================== PAGE 4: SERVICE DETAILS ====================
|
| 266 |
+
elif page == "🔧 Service Details":
|
| 267 |
+
st.title("Service Details Explorer")
|
| 268 |
+
|
| 269 |
+
# Service selector
|
| 270 |
+
selected_service_name = st.selectbox(
|
| 271 |
+
"Select a service:",
|
| 272 |
+
services['service_name'].sort_values(),
|
| 273 |
+
key='service_selector'
|
| 274 |
+
)
|
| 275 |
+
|
| 276 |
+
selected_service = services[services['service_name'] == selected_service_name].iloc[0]
|
| 277 |
+
|
| 278 |
+
st.subheader(f"Service: {selected_service['service_name']}")
|
| 279 |
+
|
| 280 |
+
col1, col2, col3, col4, col5 = st.columns(5)
|
| 281 |
+
with col1:
|
| 282 |
+
st.metric("Memory", f"{selected_service['memory_mb']} MB")
|
| 283 |
+
with col2:
|
| 284 |
+
st.metric("CPU Cores", selected_service['cpu_cores'])
|
| 285 |
+
with col3:
|
| 286 |
+
st.metric("Traffic (RPS)", f"{selected_service['traffic_volume_rps']:,}")
|
| 287 |
+
with col4:
|
| 288 |
+
st.metric("Dependencies", int(selected_service['dependencies']))
|
| 289 |
+
with col5:
|
| 290 |
+
critical_status = "🔴 Critical" if selected_service['latency_critical'] else "🟢 Normal"
|
| 291 |
+
st.metric("Latency Sensitivity", critical_status)
|
| 292 |
+
|
| 293 |
+
st.divider()
|
| 294 |
+
|
| 295 |
+
# Service placement across regions
|
| 296 |
+
service_placement = placement[placement['service_id'] == selected_service['service_id']]
|
| 297 |
+
|
| 298 |
+
if len(service_placement) > 0:
|
| 299 |
+
st.subheader("Placement Across Regions")
|
| 300 |
+
|
| 301 |
+
placement_summary = service_placement.groupby('region').agg({
|
| 302 |
+
'instances': 'mean',
|
| 303 |
+
'avg_latency_ms': 'mean',
|
| 304 |
+
'error_rate': 'mean'
|
| 305 |
+
}).round(2)
|
| 306 |
+
|
| 307 |
+
st.dataframe(placement_summary, width='stretch')
|
| 308 |
+
|
| 309 |
+
# Latency by region
|
| 310 |
+
fig = px.bar(
|
| 311 |
+
placement_summary,
|
| 312 |
+
y='avg_latency_ms',
|
| 313 |
+
labels={'avg_latency_ms': 'Average Latency (ms)', 'region': 'Region'},
|
| 314 |
+
color='avg_latency_ms',
|
| 315 |
+
color_continuous_scale='Reds'
|
| 316 |
+
)
|
| 317 |
+
st.plotly_chart(fig, width='stretch')
|
| 318 |
+
|
| 319 |
+
# ==================== PAGE 5: ABOUT ====================
|
| 320 |
+
elif page == "ℹ️ About":
|
| 321 |
+
st.title("About This Project")
|
| 322 |
+
|
| 323 |
+
st.markdown("""
|
| 324 |
+
## 🎯 Problem Statement
|
| 325 |
+
|
| 326 |
+
Amazon's Region Flexibility Engineering team needs to optimize service placement across
|
| 327 |
+
AWS regions to:
|
| 328 |
+
- **Reduce latency** for end users
|
| 329 |
+
- **Lower costs** by avoiding expensive regions
|
| 330 |
+
- **Maintain reliability** with appropriate redundancy
|
| 331 |
+
- **Support rapid global expansion**
|
| 332 |
+
|
| 333 |
+
## 🛠️ Solution Architecture
|
| 334 |
+
|
| 335 |
+
### 1. Data Pipeline
|
| 336 |
+
- **Sources**: Service metadata, traffic patterns, regional latency, placement history
|
| 337 |
+
- **Processing**: SQL queries + Pandas for feature engineering
|
| 338 |
+
- **Scale**: 150+ services, 5 regions, 1.6M+ traffic records
|
| 339 |
+
|
| 340 |
+
### 2. ML Models
|
| 341 |
+
|
| 342 |
+
**Model 1: Latency Prediction (XGBoost)**
|
| 343 |
+
- Predicts service latency for a given placement
|
| 344 |
+
- Features: Memory, CPU, traffic patterns, outbound latency
|
| 345 |
+
- Performance: RMSE = 28.7ms
|
| 346 |
+
|
| 347 |
+
**Model 2: Placement Strategy (Random Forest)**
|
| 348 |
+
- Classifies services as high/low traffic
|
| 349 |
+
- Determines optimal number of regions per service
|
| 350 |
+
- Accuracy: 100% on test set
|
| 351 |
+
|
| 352 |
+
### 3. A/B Testing Framework
|
| 353 |
+
- **Control**: Random service placement (baseline)
|
| 354 |
+
- **Treatment**: ML-optimized placement
|
| 355 |
+
- **Results**: 5.25% latency reduction, 4.92% cost savings, statistically significant (p < 0.001)
|
| 356 |
+
|
| 357 |
+
## 📊 Key Metrics
|
| 358 |
+
|
| 359 |
+
| Metric | Result |
|
| 360 |
+
|--------|--------|
|
| 361 |
+
| Latency Reduction | 5.25% |
|
| 362 |
+
| Cost Savings | 4.92% |
|
| 363 |
+
| Critical Service Improvement | 9.30% |
|
| 364 |
+
| Statistical Significance | p < 0.001 ✅ |
|
| 365 |
+
| Placement Efficiency | 378 vs 452 pairs (-16%) |
|
| 366 |
+
|
| 367 |
+
## 💻 Tech Stack
|
| 368 |
+
|
| 369 |
+
- **Data**: SQLite, Pandas, NumPy
|
| 370 |
+
- **ML**: scikit-learn, XGBoost
|
| 371 |
+
- **Statistics**: SciPy (t-tests, significance)
|
| 372 |
+
- **Visualization**: Plotly, Streamlit
|
| 373 |
+
- **Deployment**: Hugging Face Spaces
|
| 374 |
+
|
| 375 |
+
## 📚 How to Use
|
| 376 |
+
|
| 377 |
+
1. **Overview**: See project summary and data distribution
|
| 378 |
+
2. **A/B Results**: Detailed comparison of strategies with statistical validation
|
| 379 |
+
3. **Regional Analysis**: Explore latency patterns across AWS regions
|
| 380 |
+
4. **Service Details**: Interactive explorer for individual services
|
| 381 |
+
|
| 382 |
+
## 🚀 Next Steps for Production
|
| 383 |
+
|
| 384 |
+
- Integrate with real AWS CloudWatch metrics
|
| 385 |
+
- Deploy as automated recommendation engine
|
| 386 |
+
- Create feedback loop for model retraining
|
| 387 |
+
- Build alerting system for anomalies
|
| 388 |
+
- Extend to multi-cloud (GCP, Azure)
|
| 389 |
+
|
| 390 |
+
---
|
| 391 |
+
|
| 392 |
+
**Built with Python | ML | Data Engineering | Cloud Architecture**
|
| 393 |
+
""")
|
data_generation.py
ADDED
|
@@ -0,0 +1,205 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import numpy as np
|
| 3 |
+
from faker import Faker
|
| 4 |
+
from datetime import datetime, timedelta
|
| 5 |
+
import random
|
| 6 |
+
|
| 7 |
+
# Set random seed for reproducibility
|
| 8 |
+
np.random.seed(42)
|
| 9 |
+
random.seed(42)
|
| 10 |
+
|
| 11 |
+
fake = Faker()
|
| 12 |
+
|
| 13 |
+
print("Starting Data Generation...")
|
| 14 |
+
|
| 15 |
+
# ==================== PART 1: Generate Services ====================
|
| 16 |
+
print("\nGenerating Services Data...")
|
| 17 |
+
|
| 18 |
+
services_data = []
|
| 19 |
+
service_templates = [
|
| 20 |
+
"auth", "cache", "database", "api", "notification",
|
| 21 |
+
"search", "recommendation", "payment", "inventory", "profile",
|
| 22 |
+
"order", "analytics", "logging", "metrics", "config",
|
| 23 |
+
"gateway", "queue", "processor", "manager", "service",
|
| 24 |
+
"worker", "scheduler", "validator", "router", "balancer"
|
| 25 |
+
]
|
| 26 |
+
|
| 27 |
+
# Generate 150 services by combining templates
|
| 28 |
+
service_names = []
|
| 29 |
+
for i in range(6):
|
| 30 |
+
for template in service_templates:
|
| 31 |
+
service_names.append(f"{template}-service-{i+1}")
|
| 32 |
+
|
| 33 |
+
for i, name in enumerate(service_names, start=1):
|
| 34 |
+
services_data.append({
|
| 35 |
+
'service_id': i,
|
| 36 |
+
'service_name': name,
|
| 37 |
+
'memory_mb': random.choice([256, 512, 1024, 2048, 4096]),
|
| 38 |
+
'cpu_cores': random.choice([0.5, 1, 2, 4]),
|
| 39 |
+
'latency_critical': random.choice([True, False]),
|
| 40 |
+
'traffic_volume_rps': random.randint(1000, 100000), # requests per second
|
| 41 |
+
'dependencies': random.randint(0, 5) # how many other services it depends on
|
| 42 |
+
})
|
| 43 |
+
|
| 44 |
+
services_df = pd.DataFrame(services_data)
|
| 45 |
+
services_df.to_csv('data/services.csv', index=False)
|
| 46 |
+
print(f"Generated {len(services_df)} services")
|
| 47 |
+
print(services_df.head())
|
| 48 |
+
|
| 49 |
+
# ==================== PART 2: Generate Regional Latency ====================
|
| 50 |
+
print("\nGenerating Regional Latency Data...")
|
| 51 |
+
|
| 52 |
+
regions = ['us-east-1', 'us-west-2', 'eu-west-1', 'ap-southeast-1', 'ap-northeast-1']
|
| 53 |
+
latency_data = []
|
| 54 |
+
|
| 55 |
+
# Create latency matrix (some regions are closer than others)
|
| 56 |
+
latency_matrix = {
|
| 57 |
+
('us-east-1', 'us-west-2'): (60, 80),
|
| 58 |
+
('us-east-1', 'eu-west-1'): (90, 110),
|
| 59 |
+
('us-east-1', 'ap-southeast-1'): (180, 220),
|
| 60 |
+
('us-east-1', 'ap-northeast-1'): (150, 190),
|
| 61 |
+
('us-west-2', 'eu-west-1'): (130, 160),
|
| 62 |
+
('us-west-2', 'ap-southeast-1'): (140, 170),
|
| 63 |
+
('us-west-2', 'ap-northeast-1'): (110, 140),
|
| 64 |
+
('eu-west-1', 'ap-southeast-1'): (200, 250),
|
| 65 |
+
('eu-west-1', 'ap-northeast-1'): (180, 230),
|
| 66 |
+
('ap-southeast-1', 'ap-northeast-1'): (50, 80),
|
| 67 |
+
}
|
| 68 |
+
|
| 69 |
+
# Generate latency measurements over time
|
| 70 |
+
start_date = datetime(2024, 1, 1)
|
| 71 |
+
for days in range(90): # 3 months
|
| 72 |
+
timestamp = start_date + timedelta(days=days)
|
| 73 |
+
|
| 74 |
+
for region1 in regions:
|
| 75 |
+
for region2 in regions:
|
| 76 |
+
if region1 == region2:
|
| 77 |
+
latency_data.append({
|
| 78 |
+
'region1': region1,
|
| 79 |
+
'region2': region2,
|
| 80 |
+
'latency_ms': random.gauss(2, 0.5), # same region: ~2ms
|
| 81 |
+
'timestamp': timestamp
|
| 82 |
+
})
|
| 83 |
+
elif (region1, region2) in latency_matrix:
|
| 84 |
+
min_lat, max_lat = latency_matrix[(region1, region2)]
|
| 85 |
+
base_latency = np.random.uniform(min_lat, max_lat)
|
| 86 |
+
# Add some noise
|
| 87 |
+
latency = base_latency + random.gauss(0, 5)
|
| 88 |
+
latency_data.append({
|
| 89 |
+
'region1': region1,
|
| 90 |
+
'region2': region2,
|
| 91 |
+
'latency_ms': max(latency, 1), # ensure positive
|
| 92 |
+
'timestamp': timestamp
|
| 93 |
+
})
|
| 94 |
+
elif (region2, region1) in latency_matrix:
|
| 95 |
+
min_lat, max_lat = latency_matrix[(region2, region1)]
|
| 96 |
+
base_latency = np.random.uniform(min_lat, max_lat)
|
| 97 |
+
latency = base_latency + random.gauss(0, 5)
|
| 98 |
+
latency_data.append({
|
| 99 |
+
'region1': region1,
|
| 100 |
+
'region2': region2,
|
| 101 |
+
'latency_ms': max(latency, 1),
|
| 102 |
+
'timestamp': timestamp
|
| 103 |
+
})
|
| 104 |
+
|
| 105 |
+
latency_df = pd.DataFrame(latency_data)
|
| 106 |
+
latency_df.to_csv('data/regional_latency.csv', index=False)
|
| 107 |
+
print(f"Generated {len(latency_df)} latency measurements")
|
| 108 |
+
print(latency_df.head())
|
| 109 |
+
|
| 110 |
+
# ==================== PART 3: Generate Traffic Patterns ====================
|
| 111 |
+
print("\nGenerating Traffic Patterns...")
|
| 112 |
+
|
| 113 |
+
traffic_data = []
|
| 114 |
+
start_date = datetime(2024, 1, 1)
|
| 115 |
+
|
| 116 |
+
for days in range(90): # 3 months
|
| 117 |
+
for hour in range(24):
|
| 118 |
+
timestamp = start_date + timedelta(days=days, hours=hour)
|
| 119 |
+
|
| 120 |
+
# Peak hours are 9-17 (business hours)
|
| 121 |
+
hour_of_day = timestamp.hour
|
| 122 |
+
if 9 <= hour_of_day <= 17:
|
| 123 |
+
traffic_multiplier = random.uniform(1.5, 2.5)
|
| 124 |
+
elif 22 <= hour_of_day or hour_of_day <= 6:
|
| 125 |
+
traffic_multiplier = random.uniform(0.2, 0.5) # low traffic at night
|
| 126 |
+
else:
|
| 127 |
+
traffic_multiplier = random.uniform(0.8, 1.2)
|
| 128 |
+
|
| 129 |
+
# Weekend traffic is lower
|
| 130 |
+
if timestamp.weekday() >= 5: # Saturday = 5, Sunday = 6
|
| 131 |
+
traffic_multiplier *= 0.7
|
| 132 |
+
|
| 133 |
+
for service_id, service_row in services_df.iterrows():
|
| 134 |
+
base_traffic = service_row['traffic_volume_rps']
|
| 135 |
+
|
| 136 |
+
for region in regions:
|
| 137 |
+
# Different regions have different traffic volumes
|
| 138 |
+
region_factor = {
|
| 139 |
+
'us-east-1': 1.0,
|
| 140 |
+
'us-west-2': 0.8,
|
| 141 |
+
'eu-west-1': 0.6,
|
| 142 |
+
'ap-southeast-1': 0.5,
|
| 143 |
+
'ap-northeast-1': 0.4,
|
| 144 |
+
}[region]
|
| 145 |
+
|
| 146 |
+
requests = int(base_traffic * traffic_multiplier * region_factor)
|
| 147 |
+
|
| 148 |
+
traffic_data.append({
|
| 149 |
+
'service_id': service_id + 1,
|
| 150 |
+
'region': region,
|
| 151 |
+
'hour': hour,
|
| 152 |
+
'requests': requests,
|
| 153 |
+
'timestamp': timestamp
|
| 154 |
+
})
|
| 155 |
+
|
| 156 |
+
traffic_df = pd.DataFrame(traffic_data)
|
| 157 |
+
traffic_df.to_csv('data/traffic_patterns.csv', index=False)
|
| 158 |
+
print(f"Generated {len(traffic_df)} traffic records")
|
| 159 |
+
print(traffic_df.head())
|
| 160 |
+
|
| 161 |
+
# ==================== PART 4: Generate Placement History ====================
|
| 162 |
+
print("\nGenerating Service Placement History...")
|
| 163 |
+
|
| 164 |
+
placement_data = []
|
| 165 |
+
start_date = datetime(2024, 1, 1)
|
| 166 |
+
|
| 167 |
+
for days in range(90):
|
| 168 |
+
timestamp = start_date + timedelta(days=days)
|
| 169 |
+
|
| 170 |
+
for service_id in range(1, len(service_names) + 1):
|
| 171 |
+
service = services_df[services_df['service_id'] == service_id].iloc[0]
|
| 172 |
+
|
| 173 |
+
# Latency critical services are usually in fewer regions
|
| 174 |
+
if service['latency_critical']:
|
| 175 |
+
num_regions = random.choice([1, 2])
|
| 176 |
+
else:
|
| 177 |
+
num_regions = random.choice([2, 3, 4])
|
| 178 |
+
|
| 179 |
+
placement_regions = random.sample(regions, num_regions)
|
| 180 |
+
|
| 181 |
+
for region in placement_regions:
|
| 182 |
+
placement_data.append({
|
| 183 |
+
'service_id': service_id,
|
| 184 |
+
'region': region,
|
| 185 |
+
'timestamp': timestamp,
|
| 186 |
+
'instances': random.randint(1, 5),
|
| 187 |
+
'avg_latency_ms': random.uniform(5, 100),
|
| 188 |
+
'error_rate': random.uniform(0, 0.05)
|
| 189 |
+
})
|
| 190 |
+
|
| 191 |
+
placement_df = pd.DataFrame(placement_data)
|
| 192 |
+
placement_df.to_csv('data/service_placement.csv', index=False)
|
| 193 |
+
print(f"Generated {len(placement_df)} placement records")
|
| 194 |
+
print(placement_df.head())
|
| 195 |
+
|
| 196 |
+
# ==================== Summary ====================
|
| 197 |
+
print("\n" + "="*50)
|
| 198 |
+
print("ALL DATA GENERATED SUCCESSFULLY!")
|
| 199 |
+
print("="*50)
|
| 200 |
+
print(f"\nFiles created in 'data/' folder:")
|
| 201 |
+
print(f" • services.csv ({len(services_df)} rows)")
|
| 202 |
+
print(f" • regional_latency.csv ({len(latency_df)} rows)")
|
| 203 |
+
print(f" • traffic_patterns.csv ({len(traffic_df)} rows)")
|
| 204 |
+
print(f" • service_placement.csv ({len(placement_df)} rows)")
|
| 205 |
+
print(f"\nTotal records generated: {len(services_df) + len(latency_df) + len(traffic_df) + len(placement_df):,}")
|
explore_data.py
ADDED
|
@@ -0,0 +1,184 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sqlite3
|
| 2 |
+
import pandas as pd
|
| 3 |
+
|
| 4 |
+
print("EXPLORING RESOURCE OPTIMIZATION DATA\n")
|
| 5 |
+
|
| 6 |
+
# Connect to database
|
| 7 |
+
conn = sqlite3.connect('resource_optimization.db')
|
| 8 |
+
|
| 9 |
+
# ==================== QUERY 1: Service Overview ====================
|
| 10 |
+
print("="*100)
|
| 11 |
+
print("SERVICE OVERVIEW")
|
| 12 |
+
print("="*100)
|
| 13 |
+
|
| 14 |
+
query1 = """
|
| 15 |
+
SELECT
|
| 16 |
+
service_id,
|
| 17 |
+
service_name,
|
| 18 |
+
memory_mb,
|
| 19 |
+
cpu_cores,
|
| 20 |
+
latency_critical,
|
| 21 |
+
traffic_volume_rps,
|
| 22 |
+
dependencies
|
| 23 |
+
FROM services
|
| 24 |
+
ORDER BY traffic_volume_rps DESC
|
| 25 |
+
LIMIT 10
|
| 26 |
+
"""
|
| 27 |
+
|
| 28 |
+
df1 = pd.read_sql_query(query1, conn)
|
| 29 |
+
print(df1.to_string(index=False))
|
| 30 |
+
print()
|
| 31 |
+
|
| 32 |
+
# ==================== QUERY 2: Regional Latency Summary ====================
|
| 33 |
+
print("="*100)
|
| 34 |
+
print("REGIONAL LATENCY MATRIX (average ms)")
|
| 35 |
+
print("="*100)
|
| 36 |
+
|
| 37 |
+
query2 = """
|
| 38 |
+
SELECT
|
| 39 |
+
region1,
|
| 40 |
+
region2,
|
| 41 |
+
ROUND(AVG(latency_ms), 2) as avg_latency_ms,
|
| 42 |
+
ROUND(MIN(latency_ms), 2) as min_latency_ms,
|
| 43 |
+
ROUND(MAX(latency_ms), 2) as max_latency_ms,
|
| 44 |
+
COUNT(*) as samples
|
| 45 |
+
FROM regional_latency
|
| 46 |
+
GROUP BY region1, region2
|
| 47 |
+
ORDER BY region1, region2
|
| 48 |
+
"""
|
| 49 |
+
|
| 50 |
+
df2 = pd.read_sql_query(query2, conn)
|
| 51 |
+
print(df2.to_string(index=False))
|
| 52 |
+
print()
|
| 53 |
+
|
| 54 |
+
# ==================== QUERY 3: Traffic by Region ====================
|
| 55 |
+
print("="*100)
|
| 56 |
+
print("TOTAL TRAFFIC BY REGION")
|
| 57 |
+
print("="*100)
|
| 58 |
+
|
| 59 |
+
query3 = """
|
| 60 |
+
SELECT
|
| 61 |
+
region,
|
| 62 |
+
SUM(requests) as total_requests,
|
| 63 |
+
ROUND(AVG(requests), 0) as avg_hourly_requests,
|
| 64 |
+
COUNT(DISTINCT service_id) as num_services
|
| 65 |
+
FROM traffic_patterns
|
| 66 |
+
GROUP BY region
|
| 67 |
+
ORDER BY total_requests DESC
|
| 68 |
+
"""
|
| 69 |
+
|
| 70 |
+
df3 = pd.read_sql_query(query3, conn)
|
| 71 |
+
print(df3.to_string(index=False))
|
| 72 |
+
print()
|
| 73 |
+
|
| 74 |
+
# ==================== QUERY 4: Services by Placement Count ====================
|
| 75 |
+
print("="*100)
|
| 76 |
+
print("SERVICE PLACEMENT DISTRIBUTION")
|
| 77 |
+
print("="*100)
|
| 78 |
+
|
| 79 |
+
query4 = """
|
| 80 |
+
SELECT
|
| 81 |
+
s.service_id,
|
| 82 |
+
s.service_name,
|
| 83 |
+
COUNT(DISTINCT sp.region) as num_regions,
|
| 84 |
+
ROUND(AVG(sp.avg_latency_ms), 2) as avg_latency_ms,
|
| 85 |
+
ROUND(AVG(sp.error_rate), 4) as avg_error_rate
|
| 86 |
+
FROM services s
|
| 87 |
+
LEFT JOIN service_placement sp ON s.service_id = sp.service_id
|
| 88 |
+
GROUP BY s.service_id
|
| 89 |
+
ORDER BY num_regions DESC, s.service_name
|
| 90 |
+
"""
|
| 91 |
+
|
| 92 |
+
df4 = pd.read_sql_query(query4, conn)
|
| 93 |
+
print(df4.to_string(index=False))
|
| 94 |
+
print()
|
| 95 |
+
|
| 96 |
+
# ==================== QUERY 5: Peak Traffic Hours ====================
|
| 97 |
+
print("="*100)
|
| 98 |
+
print("PEAK TRAFFIC HOURS (all regions combined)")
|
| 99 |
+
print("="*100)
|
| 100 |
+
|
| 101 |
+
query5 = """
|
| 102 |
+
SELECT
|
| 103 |
+
hour,
|
| 104 |
+
SUM(requests) as total_requests,
|
| 105 |
+
ROUND(AVG(requests), 0) as avg_requests_per_service_region
|
| 106 |
+
FROM traffic_patterns
|
| 107 |
+
GROUP BY hour
|
| 108 |
+
ORDER BY total_requests DESC
|
| 109 |
+
LIMIT 10
|
| 110 |
+
"""
|
| 111 |
+
|
| 112 |
+
df5 = pd.read_sql_query(query5, conn)
|
| 113 |
+
print(df5.to_string(index=False))
|
| 114 |
+
print()
|
| 115 |
+
|
| 116 |
+
# ==================== QUERY 6: Cross-Region Traffic Analysis ====================
|
| 117 |
+
print("="*100)
|
| 118 |
+
print("HIGH LATENCY REGION PAIRS (average > 100ms)")
|
| 119 |
+
print("="*100)
|
| 120 |
+
|
| 121 |
+
query6 = """
|
| 122 |
+
SELECT
|
| 123 |
+
region1,
|
| 124 |
+
region2,
|
| 125 |
+
ROUND(AVG(latency_ms), 2) as avg_latency_ms
|
| 126 |
+
FROM regional_latency
|
| 127 |
+
GROUP BY region1, region2
|
| 128 |
+
HAVING AVG(latency_ms) > 100
|
| 129 |
+
ORDER BY avg_latency_ms DESC
|
| 130 |
+
"""
|
| 131 |
+
|
| 132 |
+
df6 = pd.read_sql_query(query6, conn)
|
| 133 |
+
print(df6.to_string(index=False))
|
| 134 |
+
print()
|
| 135 |
+
|
| 136 |
+
# ==================== QUERY 7: Latency Critical Services ====================
|
| 137 |
+
print("="*100)
|
| 138 |
+
print("LATENCY CRITICAL SERVICES")
|
| 139 |
+
print("="*100)
|
| 140 |
+
|
| 141 |
+
query7 = """
|
| 142 |
+
SELECT
|
| 143 |
+
service_id,
|
| 144 |
+
service_name,
|
| 145 |
+
memory_mb,
|
| 146 |
+
traffic_volume_rps,
|
| 147 |
+
dependencies
|
| 148 |
+
FROM services
|
| 149 |
+
WHERE latency_critical = 1
|
| 150 |
+
ORDER BY traffic_volume_rps DESC
|
| 151 |
+
"""
|
| 152 |
+
|
| 153 |
+
df7 = pd.read_sql_query(query7, conn)
|
| 154 |
+
print(df7.to_string(index=False))
|
| 155 |
+
print()
|
| 156 |
+
|
| 157 |
+
# ==================== SUMMARY STATS ====================
|
| 158 |
+
print("="*100)
|
| 159 |
+
print("SUMMARY STATISTICS")
|
| 160 |
+
print("="*100)
|
| 161 |
+
|
| 162 |
+
query_summary = "SELECT COUNT(*) as total_services FROM services"
|
| 163 |
+
total_services = pd.read_sql_query(query_summary, conn).iloc[0, 0]
|
| 164 |
+
|
| 165 |
+
query_summary = "SELECT COUNT(DISTINCT region) as num_regions FROM traffic_patterns"
|
| 166 |
+
num_regions = pd.read_sql_query(query_summary, conn).iloc[0, 0]
|
| 167 |
+
|
| 168 |
+
query_summary = "SELECT SUM(requests) as total_traffic FROM traffic_patterns"
|
| 169 |
+
total_traffic = pd.read_sql_query(query_summary, conn).iloc[0, 0]
|
| 170 |
+
|
| 171 |
+
query_summary = "SELECT ROUND(AVG(latency_ms), 2) as avg_latency FROM regional_latency"
|
| 172 |
+
avg_latency = pd.read_sql_query(query_summary, conn).iloc[0, 0]
|
| 173 |
+
|
| 174 |
+
print(f"• Total Services: {total_services}")
|
| 175 |
+
print(f"• Total Regions: {num_regions}")
|
| 176 |
+
print(f"• Total Traffic Records: {total_traffic:,}")
|
| 177 |
+
print(f"• Average Cross-Region Latency: {avg_latency} ms")
|
| 178 |
+
print()
|
| 179 |
+
|
| 180 |
+
conn.close()
|
| 181 |
+
|
| 182 |
+
print("="*100)
|
| 183 |
+
print("✅ DATA EXPLORATION COMPLETE!")
|
| 184 |
+
print("="*100)
|
main.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def main():
|
| 2 |
+
print("Hello from resource-optimization-ml!")
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
if __name__ == "__main__":
|
| 6 |
+
main()
|
pyproject.toml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[project]
|
| 2 |
+
name = "resource-optimization-ml"
|
| 3 |
+
version = "0.1.0"
|
| 4 |
+
description = "Add your description here"
|
| 5 |
+
readme = "README.md"
|
| 6 |
+
requires-python = ">=3.12"
|
| 7 |
+
dependencies = []
|
results/ab_test_results.json
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"control_metrics": {
|
| 3 |
+
"strategy": "Control (Random)",
|
| 4 |
+
"total_placement_pairs": 439,
|
| 5 |
+
"total_requests": 14727372.815277778,
|
| 6 |
+
"avg_latency_ms": 114.30821763097148,
|
| 7 |
+
"total_cost": 963234.2061527779,
|
| 8 |
+
"redundancy_score": 2.9266666666666667,
|
| 9 |
+
"critical_services_latency_ms": 113.80035557003376
|
| 10 |
+
},
|
| 11 |
+
"treatment_metrics": {
|
| 12 |
+
"strategy": "Treatment (ML-Optimized)",
|
| 13 |
+
"total_placement_pairs": 378,
|
| 14 |
+
"total_requests": 15929494.539814815,
|
| 15 |
+
"avg_latency_ms": 108.68522063698082,
|
| 16 |
+
"total_cost": 902063.7020092593,
|
| 17 |
+
"redundancy_score": 2.52,
|
| 18 |
+
"critical_services_latency_ms": 104.26008331417714
|
| 19 |
+
},
|
| 20 |
+
"improvements": {
|
| 21 |
+
"latency_reduction_pct": 4.919153767355334,
|
| 22 |
+
"cost_reduction_pct": 6.350532793871361,
|
| 23 |
+
"critical_latency_reduction_pct": 8.310315946745126
|
| 24 |
+
},
|
| 25 |
+
"statistical_significance": {
|
| 26 |
+
"t_statistic": 6.493542664285135,
|
| 27 |
+
"p_value": 1.0549336552475258e-10,
|
| 28 |
+
"is_significant": true
|
| 29 |
+
}
|
| 30 |
+
}
|
results/control_placement.csv
ADDED
|
@@ -0,0 +1,440 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
service_id,region,strategy
|
| 2 |
+
1,ap-northeast-1,control
|
| 3 |
+
1,us-west-2,control
|
| 4 |
+
1,eu-west-1,control
|
| 5 |
+
1,ap-southeast-1,control
|
| 6 |
+
2,ap-southeast-1,control
|
| 7 |
+
2,ap-northeast-1,control
|
| 8 |
+
3,ap-southeast-1,control
|
| 9 |
+
3,us-east-1,control
|
| 10 |
+
3,us-west-2,control
|
| 11 |
+
3,eu-west-1,control
|
| 12 |
+
4,ap-northeast-1,control
|
| 13 |
+
4,us-east-1,control
|
| 14 |
+
4,us-west-2,control
|
| 15 |
+
5,us-east-1,control
|
| 16 |
+
5,ap-northeast-1,control
|
| 17 |
+
6,us-west-2,control
|
| 18 |
+
6,ap-northeast-1,control
|
| 19 |
+
7,us-east-1,control
|
| 20 |
+
7,ap-northeast-1,control
|
| 21 |
+
7,us-west-2,control
|
| 22 |
+
8,us-west-2,control
|
| 23 |
+
8,ap-southeast-1,control
|
| 24 |
+
9,ap-northeast-1,control
|
| 25 |
+
9,us-east-1,control
|
| 26 |
+
9,ap-southeast-1,control
|
| 27 |
+
10,eu-west-1,control
|
| 28 |
+
10,ap-northeast-1,control
|
| 29 |
+
11,ap-northeast-1,control
|
| 30 |
+
11,us-east-1,control
|
| 31 |
+
12,us-east-1,control
|
| 32 |
+
12,us-west-2,control
|
| 33 |
+
13,ap-southeast-1,control
|
| 34 |
+
13,us-east-1,control
|
| 35 |
+
13,us-west-2,control
|
| 36 |
+
13,eu-west-1,control
|
| 37 |
+
14,us-east-1,control
|
| 38 |
+
14,us-west-2,control
|
| 39 |
+
14,ap-northeast-1,control
|
| 40 |
+
14,eu-west-1,control
|
| 41 |
+
15,ap-southeast-1,control
|
| 42 |
+
15,ap-northeast-1,control
|
| 43 |
+
16,us-east-1,control
|
| 44 |
+
16,ap-northeast-1,control
|
| 45 |
+
17,us-east-1,control
|
| 46 |
+
17,ap-northeast-1,control
|
| 47 |
+
17,ap-southeast-1,control
|
| 48 |
+
17,us-west-2,control
|
| 49 |
+
18,us-west-2,control
|
| 50 |
+
18,ap-northeast-1,control
|
| 51 |
+
18,ap-southeast-1,control
|
| 52 |
+
18,eu-west-1,control
|
| 53 |
+
19,us-east-1,control
|
| 54 |
+
19,us-west-2,control
|
| 55 |
+
19,ap-southeast-1,control
|
| 56 |
+
20,ap-northeast-1,control
|
| 57 |
+
20,us-west-2,control
|
| 58 |
+
20,eu-west-1,control
|
| 59 |
+
20,ap-southeast-1,control
|
| 60 |
+
21,ap-northeast-1,control
|
| 61 |
+
21,ap-southeast-1,control
|
| 62 |
+
21,us-west-2,control
|
| 63 |
+
22,eu-west-1,control
|
| 64 |
+
22,us-west-2,control
|
| 65 |
+
23,us-west-2,control
|
| 66 |
+
23,us-east-1,control
|
| 67 |
+
23,ap-southeast-1,control
|
| 68 |
+
24,us-east-1,control
|
| 69 |
+
24,ap-southeast-1,control
|
| 70 |
+
24,us-west-2,control
|
| 71 |
+
24,ap-northeast-1,control
|
| 72 |
+
25,eu-west-1,control
|
| 73 |
+
25,ap-northeast-1,control
|
| 74 |
+
26,us-east-1,control
|
| 75 |
+
26,ap-northeast-1,control
|
| 76 |
+
26,ap-southeast-1,control
|
| 77 |
+
27,ap-northeast-1,control
|
| 78 |
+
27,us-east-1,control
|
| 79 |
+
28,ap-northeast-1,control
|
| 80 |
+
28,ap-southeast-1,control
|
| 81 |
+
28,us-east-1,control
|
| 82 |
+
29,us-west-2,control
|
| 83 |
+
29,eu-west-1,control
|
| 84 |
+
30,eu-west-1,control
|
| 85 |
+
30,ap-southeast-1,control
|
| 86 |
+
31,ap-northeast-1,control
|
| 87 |
+
31,ap-southeast-1,control
|
| 88 |
+
31,us-east-1,control
|
| 89 |
+
31,us-west-2,control
|
| 90 |
+
32,ap-southeast-1,control
|
| 91 |
+
32,us-west-2,control
|
| 92 |
+
32,eu-west-1,control
|
| 93 |
+
33,ap-southeast-1,control
|
| 94 |
+
33,us-west-2,control
|
| 95 |
+
33,us-east-1,control
|
| 96 |
+
33,ap-northeast-1,control
|
| 97 |
+
34,eu-west-1,control
|
| 98 |
+
34,ap-northeast-1,control
|
| 99 |
+
35,ap-southeast-1,control
|
| 100 |
+
35,us-west-2,control
|
| 101 |
+
35,eu-west-1,control
|
| 102 |
+
35,us-east-1,control
|
| 103 |
+
36,eu-west-1,control
|
| 104 |
+
36,us-west-2,control
|
| 105 |
+
36,us-east-1,control
|
| 106 |
+
36,ap-southeast-1,control
|
| 107 |
+
37,us-west-2,control
|
| 108 |
+
37,us-east-1,control
|
| 109 |
+
37,eu-west-1,control
|
| 110 |
+
37,ap-southeast-1,control
|
| 111 |
+
38,us-east-1,control
|
| 112 |
+
38,ap-northeast-1,control
|
| 113 |
+
39,ap-northeast-1,control
|
| 114 |
+
39,us-west-2,control
|
| 115 |
+
39,eu-west-1,control
|
| 116 |
+
39,ap-southeast-1,control
|
| 117 |
+
40,ap-southeast-1,control
|
| 118 |
+
40,eu-west-1,control
|
| 119 |
+
40,us-east-1,control
|
| 120 |
+
40,us-west-2,control
|
| 121 |
+
41,eu-west-1,control
|
| 122 |
+
41,ap-northeast-1,control
|
| 123 |
+
41,us-west-2,control
|
| 124 |
+
42,ap-northeast-1,control
|
| 125 |
+
42,us-east-1,control
|
| 126 |
+
42,ap-southeast-1,control
|
| 127 |
+
43,ap-southeast-1,control
|
| 128 |
+
43,ap-northeast-1,control
|
| 129 |
+
43,us-east-1,control
|
| 130 |
+
44,ap-northeast-1,control
|
| 131 |
+
44,us-east-1,control
|
| 132 |
+
45,eu-west-1,control
|
| 133 |
+
45,us-west-2,control
|
| 134 |
+
45,ap-southeast-1,control
|
| 135 |
+
45,us-east-1,control
|
| 136 |
+
46,ap-northeast-1,control
|
| 137 |
+
46,eu-west-1,control
|
| 138 |
+
46,ap-southeast-1,control
|
| 139 |
+
47,ap-northeast-1,control
|
| 140 |
+
47,us-west-2,control
|
| 141 |
+
47,eu-west-1,control
|
| 142 |
+
48,us-west-2,control
|
| 143 |
+
48,ap-northeast-1,control
|
| 144 |
+
48,us-east-1,control
|
| 145 |
+
49,us-west-2,control
|
| 146 |
+
49,ap-southeast-1,control
|
| 147 |
+
49,eu-west-1,control
|
| 148 |
+
50,ap-southeast-1,control
|
| 149 |
+
50,ap-northeast-1,control
|
| 150 |
+
50,us-west-2,control
|
| 151 |
+
51,us-east-1,control
|
| 152 |
+
51,ap-southeast-1,control
|
| 153 |
+
51,ap-northeast-1,control
|
| 154 |
+
51,us-west-2,control
|
| 155 |
+
52,us-east-1,control
|
| 156 |
+
52,ap-northeast-1,control
|
| 157 |
+
53,us-east-1,control
|
| 158 |
+
53,ap-southeast-1,control
|
| 159 |
+
53,ap-northeast-1,control
|
| 160 |
+
53,eu-west-1,control
|
| 161 |
+
54,us-east-1,control
|
| 162 |
+
54,eu-west-1,control
|
| 163 |
+
55,us-west-2,control
|
| 164 |
+
55,us-east-1,control
|
| 165 |
+
56,ap-southeast-1,control
|
| 166 |
+
56,us-west-2,control
|
| 167 |
+
57,us-west-2,control
|
| 168 |
+
57,us-east-1,control
|
| 169 |
+
58,eu-west-1,control
|
| 170 |
+
58,ap-northeast-1,control
|
| 171 |
+
58,us-west-2,control
|
| 172 |
+
58,ap-southeast-1,control
|
| 173 |
+
59,eu-west-1,control
|
| 174 |
+
59,ap-southeast-1,control
|
| 175 |
+
60,ap-northeast-1,control
|
| 176 |
+
60,ap-southeast-1,control
|
| 177 |
+
60,us-east-1,control
|
| 178 |
+
60,eu-west-1,control
|
| 179 |
+
61,eu-west-1,control
|
| 180 |
+
61,us-west-2,control
|
| 181 |
+
61,us-east-1,control
|
| 182 |
+
61,ap-northeast-1,control
|
| 183 |
+
62,us-west-2,control
|
| 184 |
+
62,ap-southeast-1,control
|
| 185 |
+
63,us-east-1,control
|
| 186 |
+
63,ap-southeast-1,control
|
| 187 |
+
63,us-west-2,control
|
| 188 |
+
63,ap-northeast-1,control
|
| 189 |
+
64,us-west-2,control
|
| 190 |
+
64,ap-northeast-1,control
|
| 191 |
+
64,us-east-1,control
|
| 192 |
+
65,eu-west-1,control
|
| 193 |
+
65,us-east-1,control
|
| 194 |
+
66,us-west-2,control
|
| 195 |
+
66,ap-southeast-1,control
|
| 196 |
+
67,us-east-1,control
|
| 197 |
+
67,us-west-2,control
|
| 198 |
+
67,eu-west-1,control
|
| 199 |
+
68,eu-west-1,control
|
| 200 |
+
68,ap-southeast-1,control
|
| 201 |
+
68,us-east-1,control
|
| 202 |
+
69,eu-west-1,control
|
| 203 |
+
69,us-east-1,control
|
| 204 |
+
70,us-west-2,control
|
| 205 |
+
70,ap-southeast-1,control
|
| 206 |
+
70,us-east-1,control
|
| 207 |
+
71,ap-southeast-1,control
|
| 208 |
+
71,us-east-1,control
|
| 209 |
+
71,ap-northeast-1,control
|
| 210 |
+
71,us-west-2,control
|
| 211 |
+
72,ap-southeast-1,control
|
| 212 |
+
72,us-west-2,control
|
| 213 |
+
72,us-east-1,control
|
| 214 |
+
72,ap-northeast-1,control
|
| 215 |
+
73,us-west-2,control
|
| 216 |
+
73,eu-west-1,control
|
| 217 |
+
73,ap-southeast-1,control
|
| 218 |
+
73,ap-northeast-1,control
|
| 219 |
+
74,eu-west-1,control
|
| 220 |
+
74,ap-southeast-1,control
|
| 221 |
+
74,ap-northeast-1,control
|
| 222 |
+
74,us-east-1,control
|
| 223 |
+
75,ap-northeast-1,control
|
| 224 |
+
75,eu-west-1,control
|
| 225 |
+
75,ap-southeast-1,control
|
| 226 |
+
76,ap-northeast-1,control
|
| 227 |
+
76,us-west-2,control
|
| 228 |
+
76,us-east-1,control
|
| 229 |
+
76,ap-southeast-1,control
|
| 230 |
+
77,ap-northeast-1,control
|
| 231 |
+
77,us-west-2,control
|
| 232 |
+
77,us-east-1,control
|
| 233 |
+
78,ap-southeast-1,control
|
| 234 |
+
78,ap-northeast-1,control
|
| 235 |
+
79,us-west-2,control
|
| 236 |
+
79,us-east-1,control
|
| 237 |
+
80,ap-northeast-1,control
|
| 238 |
+
80,ap-southeast-1,control
|
| 239 |
+
80,us-west-2,control
|
| 240 |
+
81,ap-northeast-1,control
|
| 241 |
+
81,eu-west-1,control
|
| 242 |
+
81,us-west-2,control
|
| 243 |
+
82,ap-northeast-1,control
|
| 244 |
+
82,us-east-1,control
|
| 245 |
+
83,eu-west-1,control
|
| 246 |
+
83,ap-southeast-1,control
|
| 247 |
+
84,ap-southeast-1,control
|
| 248 |
+
84,eu-west-1,control
|
| 249 |
+
85,us-west-2,control
|
| 250 |
+
85,eu-west-1,control
|
| 251 |
+
85,ap-northeast-1,control
|
| 252 |
+
86,ap-southeast-1,control
|
| 253 |
+
86,ap-northeast-1,control
|
| 254 |
+
87,eu-west-1,control
|
| 255 |
+
87,ap-northeast-1,control
|
| 256 |
+
87,us-east-1,control
|
| 257 |
+
88,us-east-1,control
|
| 258 |
+
88,ap-northeast-1,control
|
| 259 |
+
88,eu-west-1,control
|
| 260 |
+
89,eu-west-1,control
|
| 261 |
+
89,ap-northeast-1,control
|
| 262 |
+
89,ap-southeast-1,control
|
| 263 |
+
89,us-west-2,control
|
| 264 |
+
90,ap-southeast-1,control
|
| 265 |
+
90,us-east-1,control
|
| 266 |
+
90,ap-northeast-1,control
|
| 267 |
+
91,eu-west-1,control
|
| 268 |
+
91,ap-northeast-1,control
|
| 269 |
+
91,us-west-2,control
|
| 270 |
+
91,us-east-1,control
|
| 271 |
+
92,ap-southeast-1,control
|
| 272 |
+
92,ap-northeast-1,control
|
| 273 |
+
93,ap-southeast-1,control
|
| 274 |
+
93,ap-northeast-1,control
|
| 275 |
+
93,eu-west-1,control
|
| 276 |
+
93,us-east-1,control
|
| 277 |
+
94,ap-northeast-1,control
|
| 278 |
+
94,eu-west-1,control
|
| 279 |
+
94,ap-southeast-1,control
|
| 280 |
+
94,us-west-2,control
|
| 281 |
+
95,us-east-1,control
|
| 282 |
+
95,ap-northeast-1,control
|
| 283 |
+
95,us-west-2,control
|
| 284 |
+
95,ap-southeast-1,control
|
| 285 |
+
96,us-east-1,control
|
| 286 |
+
96,eu-west-1,control
|
| 287 |
+
97,us-east-1,control
|
| 288 |
+
97,eu-west-1,control
|
| 289 |
+
98,eu-west-1,control
|
| 290 |
+
98,us-west-2,control
|
| 291 |
+
98,us-east-1,control
|
| 292 |
+
98,ap-northeast-1,control
|
| 293 |
+
99,us-east-1,control
|
| 294 |
+
99,us-west-2,control
|
| 295 |
+
99,eu-west-1,control
|
| 296 |
+
99,ap-southeast-1,control
|
| 297 |
+
100,us-east-1,control
|
| 298 |
+
100,us-west-2,control
|
| 299 |
+
100,eu-west-1,control
|
| 300 |
+
101,ap-northeast-1,control
|
| 301 |
+
101,eu-west-1,control
|
| 302 |
+
102,ap-northeast-1,control
|
| 303 |
+
102,eu-west-1,control
|
| 304 |
+
103,eu-west-1,control
|
| 305 |
+
103,ap-southeast-1,control
|
| 306 |
+
103,us-east-1,control
|
| 307 |
+
104,eu-west-1,control
|
| 308 |
+
104,ap-southeast-1,control
|
| 309 |
+
104,us-west-2,control
|
| 310 |
+
104,us-east-1,control
|
| 311 |
+
105,eu-west-1,control
|
| 312 |
+
105,ap-southeast-1,control
|
| 313 |
+
105,ap-northeast-1,control
|
| 314 |
+
105,us-west-2,control
|
| 315 |
+
106,us-west-2,control
|
| 316 |
+
106,eu-west-1,control
|
| 317 |
+
107,ap-southeast-1,control
|
| 318 |
+
107,eu-west-1,control
|
| 319 |
+
107,us-west-2,control
|
| 320 |
+
108,ap-southeast-1,control
|
| 321 |
+
108,ap-northeast-1,control
|
| 322 |
+
109,us-west-2,control
|
| 323 |
+
109,eu-west-1,control
|
| 324 |
+
110,us-west-2,control
|
| 325 |
+
110,eu-west-1,control
|
| 326 |
+
111,eu-west-1,control
|
| 327 |
+
111,us-west-2,control
|
| 328 |
+
112,us-west-2,control
|
| 329 |
+
112,us-east-1,control
|
| 330 |
+
113,us-west-2,control
|
| 331 |
+
113,ap-northeast-1,control
|
| 332 |
+
113,us-east-1,control
|
| 333 |
+
114,ap-northeast-1,control
|
| 334 |
+
114,ap-southeast-1,control
|
| 335 |
+
114,us-west-2,control
|
| 336 |
+
114,us-east-1,control
|
| 337 |
+
115,us-east-1,control
|
| 338 |
+
115,eu-west-1,control
|
| 339 |
+
116,ap-southeast-1,control
|
| 340 |
+
116,eu-west-1,control
|
| 341 |
+
117,ap-southeast-1,control
|
| 342 |
+
117,us-west-2,control
|
| 343 |
+
118,ap-southeast-1,control
|
| 344 |
+
118,ap-northeast-1,control
|
| 345 |
+
118,eu-west-1,control
|
| 346 |
+
118,us-east-1,control
|
| 347 |
+
119,eu-west-1,control
|
| 348 |
+
119,ap-northeast-1,control
|
| 349 |
+
119,ap-southeast-1,control
|
| 350 |
+
119,us-west-2,control
|
| 351 |
+
120,us-east-1,control
|
| 352 |
+
120,ap-southeast-1,control
|
| 353 |
+
120,ap-northeast-1,control
|
| 354 |
+
120,eu-west-1,control
|
| 355 |
+
121,ap-northeast-1,control
|
| 356 |
+
121,us-west-2,control
|
| 357 |
+
121,us-east-1,control
|
| 358 |
+
122,eu-west-1,control
|
| 359 |
+
122,ap-northeast-1,control
|
| 360 |
+
122,ap-southeast-1,control
|
| 361 |
+
122,us-west-2,control
|
| 362 |
+
123,eu-west-1,control
|
| 363 |
+
123,us-east-1,control
|
| 364 |
+
123,ap-northeast-1,control
|
| 365 |
+
124,us-west-2,control
|
| 366 |
+
124,us-east-1,control
|
| 367 |
+
125,ap-southeast-1,control
|
| 368 |
+
125,us-west-2,control
|
| 369 |
+
125,us-east-1,control
|
| 370 |
+
126,us-west-2,control
|
| 371 |
+
126,us-east-1,control
|
| 372 |
+
126,ap-northeast-1,control
|
| 373 |
+
127,us-west-2,control
|
| 374 |
+
127,ap-northeast-1,control
|
| 375 |
+
128,ap-northeast-1,control
|
| 376 |
+
128,eu-west-1,control
|
| 377 |
+
129,us-west-2,control
|
| 378 |
+
129,eu-west-1,control
|
| 379 |
+
129,ap-southeast-1,control
|
| 380 |
+
130,ap-southeast-1,control
|
| 381 |
+
130,us-west-2,control
|
| 382 |
+
130,us-east-1,control
|
| 383 |
+
130,ap-northeast-1,control
|
| 384 |
+
131,eu-west-1,control
|
| 385 |
+
131,us-west-2,control
|
| 386 |
+
131,us-east-1,control
|
| 387 |
+
131,ap-northeast-1,control
|
| 388 |
+
132,ap-northeast-1,control
|
| 389 |
+
132,ap-southeast-1,control
|
| 390 |
+
132,eu-west-1,control
|
| 391 |
+
133,us-west-2,control
|
| 392 |
+
133,ap-northeast-1,control
|
| 393 |
+
134,ap-southeast-1,control
|
| 394 |
+
134,us-west-2,control
|
| 395 |
+
135,us-east-1,control
|
| 396 |
+
135,eu-west-1,control
|
| 397 |
+
136,us-west-2,control
|
| 398 |
+
136,eu-west-1,control
|
| 399 |
+
136,ap-southeast-1,control
|
| 400 |
+
137,ap-southeast-1,control
|
| 401 |
+
137,us-east-1,control
|
| 402 |
+
137,ap-northeast-1,control
|
| 403 |
+
137,eu-west-1,control
|
| 404 |
+
138,ap-northeast-1,control
|
| 405 |
+
138,us-east-1,control
|
| 406 |
+
139,ap-northeast-1,control
|
| 407 |
+
139,us-east-1,control
|
| 408 |
+
140,us-west-2,control
|
| 409 |
+
140,us-east-1,control
|
| 410 |
+
140,ap-southeast-1,control
|
| 411 |
+
141,us-west-2,control
|
| 412 |
+
141,us-east-1,control
|
| 413 |
+
141,ap-southeast-1,control
|
| 414 |
+
142,eu-west-1,control
|
| 415 |
+
142,ap-southeast-1,control
|
| 416 |
+
142,ap-northeast-1,control
|
| 417 |
+
143,eu-west-1,control
|
| 418 |
+
143,us-east-1,control
|
| 419 |
+
143,ap-northeast-1,control
|
| 420 |
+
143,ap-southeast-1,control
|
| 421 |
+
144,us-east-1,control
|
| 422 |
+
144,us-west-2,control
|
| 423 |
+
144,ap-northeast-1,control
|
| 424 |
+
145,us-east-1,control
|
| 425 |
+
145,ap-southeast-1,control
|
| 426 |
+
146,ap-northeast-1,control
|
| 427 |
+
146,us-west-2,control
|
| 428 |
+
146,ap-southeast-1,control
|
| 429 |
+
147,us-east-1,control
|
| 430 |
+
147,ap-northeast-1,control
|
| 431 |
+
148,ap-southeast-1,control
|
| 432 |
+
148,us-east-1,control
|
| 433 |
+
148,ap-northeast-1,control
|
| 434 |
+
149,us-east-1,control
|
| 435 |
+
149,us-west-2,control
|
| 436 |
+
149,ap-northeast-1,control
|
| 437 |
+
150,ap-southeast-1,control
|
| 438 |
+
150,us-east-1,control
|
| 439 |
+
150,us-west-2,control
|
| 440 |
+
150,eu-west-1,control
|
results/treatment_placement.csv
ADDED
|
@@ -0,0 +1,379 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
service_id,region,strategy
|
| 2 |
+
1,us-east-1,treatment
|
| 3 |
+
1,us-west-2,treatment
|
| 4 |
+
1,eu-west-1,treatment
|
| 5 |
+
2,us-west-2,treatment
|
| 6 |
+
2,us-east-1,treatment
|
| 7 |
+
3,us-west-2,treatment
|
| 8 |
+
3,us-east-1,treatment
|
| 9 |
+
4,us-west-2,treatment
|
| 10 |
+
4,us-east-1,treatment
|
| 11 |
+
5,us-west-2,treatment
|
| 12 |
+
5,us-east-1,treatment
|
| 13 |
+
6,us-west-2,treatment
|
| 14 |
+
6,us-east-1,treatment
|
| 15 |
+
7,us-west-2,treatment
|
| 16 |
+
7,us-east-1,treatment
|
| 17 |
+
8,us-east-1,treatment
|
| 18 |
+
8,us-west-2,treatment
|
| 19 |
+
8,eu-west-1,treatment
|
| 20 |
+
9,us-west-2,treatment
|
| 21 |
+
9,us-east-1,treatment
|
| 22 |
+
10,us-east-1,treatment
|
| 23 |
+
10,us-west-2,treatment
|
| 24 |
+
10,eu-west-1,treatment
|
| 25 |
+
11,us-west-2,treatment
|
| 26 |
+
11,us-east-1,treatment
|
| 27 |
+
12,us-east-1,treatment
|
| 28 |
+
12,us-west-2,treatment
|
| 29 |
+
12,eu-west-1,treatment
|
| 30 |
+
13,us-east-1,treatment
|
| 31 |
+
13,us-west-2,treatment
|
| 32 |
+
13,eu-west-1,treatment
|
| 33 |
+
14,us-east-1,treatment
|
| 34 |
+
14,us-west-2,treatment
|
| 35 |
+
14,eu-west-1,treatment
|
| 36 |
+
15,us-west-2,treatment
|
| 37 |
+
15,us-east-1,treatment
|
| 38 |
+
16,us-east-1,treatment
|
| 39 |
+
16,us-west-2,treatment
|
| 40 |
+
16,eu-west-1,treatment
|
| 41 |
+
17,us-east-1,treatment
|
| 42 |
+
17,us-west-2,treatment
|
| 43 |
+
17,eu-west-1,treatment
|
| 44 |
+
18,us-east-1,treatment
|
| 45 |
+
18,us-west-2,treatment
|
| 46 |
+
18,eu-west-1,treatment
|
| 47 |
+
19,us-west-2,treatment
|
| 48 |
+
19,us-east-1,treatment
|
| 49 |
+
20,us-west-2,treatment
|
| 50 |
+
20,us-east-1,treatment
|
| 51 |
+
21,us-east-1,treatment
|
| 52 |
+
21,us-west-2,treatment
|
| 53 |
+
21,eu-west-1,treatment
|
| 54 |
+
22,us-west-2,treatment
|
| 55 |
+
22,us-east-1,treatment
|
| 56 |
+
23,us-west-2,treatment
|
| 57 |
+
23,us-east-1,treatment
|
| 58 |
+
24,us-west-2,treatment
|
| 59 |
+
24,us-east-1,treatment
|
| 60 |
+
25,us-east-1,treatment
|
| 61 |
+
25,us-west-2,treatment
|
| 62 |
+
25,eu-west-1,treatment
|
| 63 |
+
26,us-east-1,treatment
|
| 64 |
+
26,us-west-2,treatment
|
| 65 |
+
26,eu-west-1,treatment
|
| 66 |
+
27,us-east-1,treatment
|
| 67 |
+
27,us-west-2,treatment
|
| 68 |
+
27,eu-west-1,treatment
|
| 69 |
+
28,us-west-2,treatment
|
| 70 |
+
28,us-east-1,treatment
|
| 71 |
+
29,us-west-2,treatment
|
| 72 |
+
29,us-east-1,treatment
|
| 73 |
+
30,us-east-1,treatment
|
| 74 |
+
30,us-west-2,treatment
|
| 75 |
+
30,eu-west-1,treatment
|
| 76 |
+
31,us-east-1,treatment
|
| 77 |
+
31,us-west-2,treatment
|
| 78 |
+
31,eu-west-1,treatment
|
| 79 |
+
32,us-west-2,treatment
|
| 80 |
+
32,us-east-1,treatment
|
| 81 |
+
33,us-west-2,treatment
|
| 82 |
+
33,us-east-1,treatment
|
| 83 |
+
34,us-east-1,treatment
|
| 84 |
+
34,us-west-2,treatment
|
| 85 |
+
34,eu-west-1,treatment
|
| 86 |
+
35,us-west-2,treatment
|
| 87 |
+
35,us-east-1,treatment
|
| 88 |
+
36,us-east-1,treatment
|
| 89 |
+
36,us-west-2,treatment
|
| 90 |
+
36,eu-west-1,treatment
|
| 91 |
+
37,us-west-2,treatment
|
| 92 |
+
37,us-east-1,treatment
|
| 93 |
+
38,us-west-2,treatment
|
| 94 |
+
38,us-east-1,treatment
|
| 95 |
+
39,us-west-2,treatment
|
| 96 |
+
39,us-east-1,treatment
|
| 97 |
+
40,us-east-1,treatment
|
| 98 |
+
40,us-west-2,treatment
|
| 99 |
+
40,eu-west-1,treatment
|
| 100 |
+
41,us-east-1,treatment
|
| 101 |
+
41,us-west-2,treatment
|
| 102 |
+
41,eu-west-1,treatment
|
| 103 |
+
42,us-west-2,treatment
|
| 104 |
+
42,us-east-1,treatment
|
| 105 |
+
43,us-east-1,treatment
|
| 106 |
+
43,us-west-2,treatment
|
| 107 |
+
43,eu-west-1,treatment
|
| 108 |
+
44,us-west-2,treatment
|
| 109 |
+
44,us-east-1,treatment
|
| 110 |
+
45,us-west-2,treatment
|
| 111 |
+
45,us-east-1,treatment
|
| 112 |
+
46,us-west-2,treatment
|
| 113 |
+
46,us-east-1,treatment
|
| 114 |
+
47,us-west-2,treatment
|
| 115 |
+
47,us-east-1,treatment
|
| 116 |
+
48,us-west-2,treatment
|
| 117 |
+
48,us-east-1,treatment
|
| 118 |
+
49,us-west-2,treatment
|
| 119 |
+
49,us-east-1,treatment
|
| 120 |
+
50,us-west-2,treatment
|
| 121 |
+
50,us-east-1,treatment
|
| 122 |
+
51,us-west-2,treatment
|
| 123 |
+
51,us-east-1,treatment
|
| 124 |
+
52,us-east-1,treatment
|
| 125 |
+
52,us-west-2,treatment
|
| 126 |
+
52,eu-west-1,treatment
|
| 127 |
+
53,us-west-2,treatment
|
| 128 |
+
53,us-east-1,treatment
|
| 129 |
+
54,us-west-2,treatment
|
| 130 |
+
54,us-east-1,treatment
|
| 131 |
+
55,us-east-1,treatment
|
| 132 |
+
55,us-west-2,treatment
|
| 133 |
+
55,eu-west-1,treatment
|
| 134 |
+
56,us-west-2,treatment
|
| 135 |
+
56,us-east-1,treatment
|
| 136 |
+
57,us-west-2,treatment
|
| 137 |
+
57,us-east-1,treatment
|
| 138 |
+
58,us-west-2,treatment
|
| 139 |
+
58,us-east-1,treatment
|
| 140 |
+
59,us-east-1,treatment
|
| 141 |
+
59,us-west-2,treatment
|
| 142 |
+
59,eu-west-1,treatment
|
| 143 |
+
60,us-east-1,treatment
|
| 144 |
+
60,us-west-2,treatment
|
| 145 |
+
60,eu-west-1,treatment
|
| 146 |
+
61,us-east-1,treatment
|
| 147 |
+
61,us-west-2,treatment
|
| 148 |
+
61,eu-west-1,treatment
|
| 149 |
+
62,us-west-2,treatment
|
| 150 |
+
62,us-east-1,treatment
|
| 151 |
+
63,us-west-2,treatment
|
| 152 |
+
63,us-east-1,treatment
|
| 153 |
+
64,us-east-1,treatment
|
| 154 |
+
64,us-west-2,treatment
|
| 155 |
+
64,eu-west-1,treatment
|
| 156 |
+
65,us-west-2,treatment
|
| 157 |
+
65,us-east-1,treatment
|
| 158 |
+
66,us-west-2,treatment
|
| 159 |
+
66,us-east-1,treatment
|
| 160 |
+
67,us-east-1,treatment
|
| 161 |
+
67,us-west-2,treatment
|
| 162 |
+
67,eu-west-1,treatment
|
| 163 |
+
68,us-west-2,treatment
|
| 164 |
+
68,us-east-1,treatment
|
| 165 |
+
69,us-east-1,treatment
|
| 166 |
+
69,us-west-2,treatment
|
| 167 |
+
69,eu-west-1,treatment
|
| 168 |
+
70,us-west-2,treatment
|
| 169 |
+
70,us-east-1,treatment
|
| 170 |
+
71,us-west-2,treatment
|
| 171 |
+
71,us-east-1,treatment
|
| 172 |
+
72,us-west-2,treatment
|
| 173 |
+
72,us-east-1,treatment
|
| 174 |
+
73,us-east-1,treatment
|
| 175 |
+
73,us-west-2,treatment
|
| 176 |
+
73,eu-west-1,treatment
|
| 177 |
+
74,us-west-2,treatment
|
| 178 |
+
74,us-east-1,treatment
|
| 179 |
+
75,us-east-1,treatment
|
| 180 |
+
75,us-west-2,treatment
|
| 181 |
+
75,eu-west-1,treatment
|
| 182 |
+
76,us-east-1,treatment
|
| 183 |
+
76,us-west-2,treatment
|
| 184 |
+
76,eu-west-1,treatment
|
| 185 |
+
77,us-west-2,treatment
|
| 186 |
+
77,us-east-1,treatment
|
| 187 |
+
78,us-west-2,treatment
|
| 188 |
+
78,us-east-1,treatment
|
| 189 |
+
79,us-east-1,treatment
|
| 190 |
+
79,us-west-2,treatment
|
| 191 |
+
79,eu-west-1,treatment
|
| 192 |
+
80,us-west-2,treatment
|
| 193 |
+
80,us-east-1,treatment
|
| 194 |
+
81,us-east-1,treatment
|
| 195 |
+
81,us-west-2,treatment
|
| 196 |
+
81,eu-west-1,treatment
|
| 197 |
+
82,us-west-2,treatment
|
| 198 |
+
82,us-east-1,treatment
|
| 199 |
+
83,us-east-1,treatment
|
| 200 |
+
83,us-west-2,treatment
|
| 201 |
+
83,eu-west-1,treatment
|
| 202 |
+
84,us-east-1,treatment
|
| 203 |
+
84,us-west-2,treatment
|
| 204 |
+
84,eu-west-1,treatment
|
| 205 |
+
85,us-east-1,treatment
|
| 206 |
+
85,us-west-2,treatment
|
| 207 |
+
85,eu-west-1,treatment
|
| 208 |
+
86,us-east-1,treatment
|
| 209 |
+
86,us-west-2,treatment
|
| 210 |
+
86,eu-west-1,treatment
|
| 211 |
+
87,us-east-1,treatment
|
| 212 |
+
87,us-west-2,treatment
|
| 213 |
+
87,eu-west-1,treatment
|
| 214 |
+
88,us-east-1,treatment
|
| 215 |
+
88,us-west-2,treatment
|
| 216 |
+
88,eu-west-1,treatment
|
| 217 |
+
89,us-east-1,treatment
|
| 218 |
+
89,us-west-2,treatment
|
| 219 |
+
89,eu-west-1,treatment
|
| 220 |
+
90,us-west-2,treatment
|
| 221 |
+
90,us-east-1,treatment
|
| 222 |
+
91,us-east-1,treatment
|
| 223 |
+
91,us-west-2,treatment
|
| 224 |
+
91,eu-west-1,treatment
|
| 225 |
+
92,us-west-2,treatment
|
| 226 |
+
92,us-east-1,treatment
|
| 227 |
+
93,us-east-1,treatment
|
| 228 |
+
93,us-west-2,treatment
|
| 229 |
+
93,eu-west-1,treatment
|
| 230 |
+
94,us-east-1,treatment
|
| 231 |
+
94,us-west-2,treatment
|
| 232 |
+
94,eu-west-1,treatment
|
| 233 |
+
95,us-east-1,treatment
|
| 234 |
+
95,us-west-2,treatment
|
| 235 |
+
95,eu-west-1,treatment
|
| 236 |
+
96,us-west-2,treatment
|
| 237 |
+
96,us-east-1,treatment
|
| 238 |
+
97,us-west-2,treatment
|
| 239 |
+
97,us-east-1,treatment
|
| 240 |
+
98,us-west-2,treatment
|
| 241 |
+
98,us-east-1,treatment
|
| 242 |
+
99,us-east-1,treatment
|
| 243 |
+
99,us-west-2,treatment
|
| 244 |
+
99,eu-west-1,treatment
|
| 245 |
+
100,us-west-2,treatment
|
| 246 |
+
100,us-east-1,treatment
|
| 247 |
+
101,us-east-1,treatment
|
| 248 |
+
101,us-west-2,treatment
|
| 249 |
+
101,eu-west-1,treatment
|
| 250 |
+
102,us-east-1,treatment
|
| 251 |
+
102,us-west-2,treatment
|
| 252 |
+
102,eu-west-1,treatment
|
| 253 |
+
103,us-west-2,treatment
|
| 254 |
+
103,us-east-1,treatment
|
| 255 |
+
104,us-west-2,treatment
|
| 256 |
+
104,us-east-1,treatment
|
| 257 |
+
105,us-east-1,treatment
|
| 258 |
+
105,us-west-2,treatment
|
| 259 |
+
105,eu-west-1,treatment
|
| 260 |
+
106,us-east-1,treatment
|
| 261 |
+
106,us-west-2,treatment
|
| 262 |
+
106,eu-west-1,treatment
|
| 263 |
+
107,us-east-1,treatment
|
| 264 |
+
107,us-west-2,treatment
|
| 265 |
+
107,eu-west-1,treatment
|
| 266 |
+
108,us-west-2,treatment
|
| 267 |
+
108,us-east-1,treatment
|
| 268 |
+
109,us-west-2,treatment
|
| 269 |
+
109,us-east-1,treatment
|
| 270 |
+
110,us-west-2,treatment
|
| 271 |
+
110,us-east-1,treatment
|
| 272 |
+
111,us-west-2,treatment
|
| 273 |
+
111,us-east-1,treatment
|
| 274 |
+
112,us-east-1,treatment
|
| 275 |
+
112,us-west-2,treatment
|
| 276 |
+
112,eu-west-1,treatment
|
| 277 |
+
113,us-east-1,treatment
|
| 278 |
+
113,us-west-2,treatment
|
| 279 |
+
113,eu-west-1,treatment
|
| 280 |
+
114,us-east-1,treatment
|
| 281 |
+
114,us-west-2,treatment
|
| 282 |
+
114,eu-west-1,treatment
|
| 283 |
+
115,us-east-1,treatment
|
| 284 |
+
115,us-west-2,treatment
|
| 285 |
+
115,eu-west-1,treatment
|
| 286 |
+
116,us-east-1,treatment
|
| 287 |
+
116,us-west-2,treatment
|
| 288 |
+
116,eu-west-1,treatment
|
| 289 |
+
117,us-west-2,treatment
|
| 290 |
+
117,us-east-1,treatment
|
| 291 |
+
118,us-east-1,treatment
|
| 292 |
+
118,us-west-2,treatment
|
| 293 |
+
118,eu-west-1,treatment
|
| 294 |
+
119,us-east-1,treatment
|
| 295 |
+
119,us-west-2,treatment
|
| 296 |
+
119,eu-west-1,treatment
|
| 297 |
+
120,us-east-1,treatment
|
| 298 |
+
120,us-west-2,treatment
|
| 299 |
+
120,eu-west-1,treatment
|
| 300 |
+
121,us-east-1,treatment
|
| 301 |
+
121,us-west-2,treatment
|
| 302 |
+
121,eu-west-1,treatment
|
| 303 |
+
122,us-east-1,treatment
|
| 304 |
+
122,us-west-2,treatment
|
| 305 |
+
122,eu-west-1,treatment
|
| 306 |
+
123,us-east-1,treatment
|
| 307 |
+
123,us-west-2,treatment
|
| 308 |
+
123,eu-west-1,treatment
|
| 309 |
+
124,us-east-1,treatment
|
| 310 |
+
124,us-west-2,treatment
|
| 311 |
+
124,eu-west-1,treatment
|
| 312 |
+
125,us-west-2,treatment
|
| 313 |
+
125,us-east-1,treatment
|
| 314 |
+
126,us-west-2,treatment
|
| 315 |
+
126,us-east-1,treatment
|
| 316 |
+
127,us-east-1,treatment
|
| 317 |
+
127,us-west-2,treatment
|
| 318 |
+
127,eu-west-1,treatment
|
| 319 |
+
128,us-west-2,treatment
|
| 320 |
+
128,us-east-1,treatment
|
| 321 |
+
129,us-east-1,treatment
|
| 322 |
+
129,us-west-2,treatment
|
| 323 |
+
129,eu-west-1,treatment
|
| 324 |
+
130,us-west-2,treatment
|
| 325 |
+
130,us-east-1,treatment
|
| 326 |
+
131,us-east-1,treatment
|
| 327 |
+
131,us-west-2,treatment
|
| 328 |
+
131,eu-west-1,treatment
|
| 329 |
+
132,us-east-1,treatment
|
| 330 |
+
132,us-west-2,treatment
|
| 331 |
+
132,eu-west-1,treatment
|
| 332 |
+
133,us-east-1,treatment
|
| 333 |
+
133,us-west-2,treatment
|
| 334 |
+
133,eu-west-1,treatment
|
| 335 |
+
134,us-east-1,treatment
|
| 336 |
+
134,us-west-2,treatment
|
| 337 |
+
134,eu-west-1,treatment
|
| 338 |
+
135,us-east-1,treatment
|
| 339 |
+
135,us-west-2,treatment
|
| 340 |
+
135,eu-west-1,treatment
|
| 341 |
+
136,us-east-1,treatment
|
| 342 |
+
136,us-west-2,treatment
|
| 343 |
+
136,eu-west-1,treatment
|
| 344 |
+
137,us-east-1,treatment
|
| 345 |
+
137,us-west-2,treatment
|
| 346 |
+
137,eu-west-1,treatment
|
| 347 |
+
138,us-west-2,treatment
|
| 348 |
+
138,us-east-1,treatment
|
| 349 |
+
139,us-east-1,treatment
|
| 350 |
+
139,us-west-2,treatment
|
| 351 |
+
139,eu-west-1,treatment
|
| 352 |
+
140,us-east-1,treatment
|
| 353 |
+
140,us-west-2,treatment
|
| 354 |
+
140,eu-west-1,treatment
|
| 355 |
+
141,us-east-1,treatment
|
| 356 |
+
141,us-west-2,treatment
|
| 357 |
+
141,eu-west-1,treatment
|
| 358 |
+
142,us-west-2,treatment
|
| 359 |
+
142,us-east-1,treatment
|
| 360 |
+
143,us-west-2,treatment
|
| 361 |
+
143,us-east-1,treatment
|
| 362 |
+
144,us-west-2,treatment
|
| 363 |
+
144,us-east-1,treatment
|
| 364 |
+
145,us-west-2,treatment
|
| 365 |
+
145,us-east-1,treatment
|
| 366 |
+
146,us-east-1,treatment
|
| 367 |
+
146,us-west-2,treatment
|
| 368 |
+
146,eu-west-1,treatment
|
| 369 |
+
147,us-east-1,treatment
|
| 370 |
+
147,us-west-2,treatment
|
| 371 |
+
147,eu-west-1,treatment
|
| 372 |
+
148,us-east-1,treatment
|
| 373 |
+
148,us-west-2,treatment
|
| 374 |
+
148,eu-west-1,treatment
|
| 375 |
+
149,us-east-1,treatment
|
| 376 |
+
149,us-west-2,treatment
|
| 377 |
+
149,eu-west-1,treatment
|
| 378 |
+
150,us-west-2,treatment
|
| 379 |
+
150,us-east-1,treatment
|
setup_database.py
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sqlite3
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import os
|
| 4 |
+
|
| 5 |
+
print("Setting up SQLite Database...\n")
|
| 6 |
+
|
| 7 |
+
# Create/connect to database
|
| 8 |
+
db_path = 'resource_optimization.db'
|
| 9 |
+
conn = sqlite3.connect(db_path)
|
| 10 |
+
cursor = conn.cursor()
|
| 11 |
+
|
| 12 |
+
print(f"Connected to database: {db_path}\n")
|
| 13 |
+
|
| 14 |
+
# ==================== Load Services ====================
|
| 15 |
+
print("Loading services.csv...")
|
| 16 |
+
services_df = pd.read_csv('data/services.csv')
|
| 17 |
+
services_df.to_sql('services', conn, if_exists='replace', index=False)
|
| 18 |
+
print(f"Loaded {len(services_df)} services\n")
|
| 19 |
+
|
| 20 |
+
# ==================== Load Regional Latency ====================
|
| 21 |
+
print("Loading regional_latency.csv...")
|
| 22 |
+
latency_df = pd.read_csv('data/regional_latency.csv')
|
| 23 |
+
latency_df['timestamp'] = pd.to_datetime(latency_df['timestamp'])
|
| 24 |
+
latency_df.to_sql('regional_latency', conn, if_exists='replace', index=False)
|
| 25 |
+
print(f"Loaded {len(latency_df)} latency records\n")
|
| 26 |
+
|
| 27 |
+
# ==================== Load Traffic Patterns ====================
|
| 28 |
+
print("Loading traffic_patterns.csv...")
|
| 29 |
+
traffic_df = pd.read_csv('data/traffic_patterns.csv')
|
| 30 |
+
traffic_df['timestamp'] = pd.to_datetime(traffic_df['timestamp'])
|
| 31 |
+
traffic_df.to_sql('traffic_patterns', conn, if_exists='replace', index=False)
|
| 32 |
+
print(f"Loaded {len(traffic_df)} traffic records\n")
|
| 33 |
+
|
| 34 |
+
# ==================== Load Service Placement ====================
|
| 35 |
+
print("Loading service_placement.csv...")
|
| 36 |
+
placement_df = pd.read_csv('data/service_placement.csv')
|
| 37 |
+
placement_df['timestamp'] = pd.to_datetime(placement_df['timestamp'])
|
| 38 |
+
placement_df.to_sql('service_placement', conn, if_exists='replace', index=False)
|
| 39 |
+
print(f"Loaded {len(placement_df)} placement records\n")
|
| 40 |
+
|
| 41 |
+
# ==================== Create Indexes (for faster queries) ====================
|
| 42 |
+
print("Creating indexes for faster queries...")
|
| 43 |
+
cursor.execute('CREATE INDEX IF NOT EXISTS idx_service_id ON services(service_id)')
|
| 44 |
+
cursor.execute('CREATE INDEX IF NOT EXISTS idx_service_placement_service ON service_placement(service_id)')
|
| 45 |
+
cursor.execute('CREATE INDEX IF NOT EXISTS idx_traffic_service ON traffic_patterns(service_id)')
|
| 46 |
+
cursor.execute('CREATE INDEX IF NOT EXISTS idx_latency_regions ON regional_latency(region1, region2)')
|
| 47 |
+
print("Indexes created\n")
|
| 48 |
+
|
| 49 |
+
conn.commit()
|
| 50 |
+
|
| 51 |
+
# ==================== Verify Data ====================
|
| 52 |
+
print("="*60)
|
| 53 |
+
print("DATABASE SETUP COMPLETE!")
|
| 54 |
+
print("="*60)
|
| 55 |
+
|
| 56 |
+
# Show table info
|
| 57 |
+
tables = cursor.execute("SELECT name FROM sqlite_master WHERE type='table'").fetchall()
|
| 58 |
+
print(f"\nTables in database ({len(tables)}):")
|
| 59 |
+
for table in tables:
|
| 60 |
+
count = cursor.execute(f"SELECT COUNT(*) FROM {table[0]}").fetchone()[0]
|
| 61 |
+
print(f" • {table[0]}: {count:,} rows")
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
conn.close()
|
train_models.py
ADDED
|
@@ -0,0 +1,278 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sqlite3
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import numpy as np
|
| 4 |
+
from sklearn.model_selection import train_test_split, cross_val_score
|
| 5 |
+
from sklearn.preprocessing import StandardScaler, LabelEncoder
|
| 6 |
+
from sklearn.ensemble import RandomForestClassifier
|
| 7 |
+
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
|
| 8 |
+
import xgboost as xgb
|
| 9 |
+
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
|
| 10 |
+
import matplotlib.pyplot as plt
|
| 11 |
+
import seaborn as sns
|
| 12 |
+
import joblib
|
| 13 |
+
import warnings
|
| 14 |
+
warnings.filterwarnings('ignore')
|
| 15 |
+
|
| 16 |
+
print("Training ML Models\n")
|
| 17 |
+
|
| 18 |
+
# ==================== LOAD DATA ====================
|
| 19 |
+
print("="*70)
|
| 20 |
+
print("Loading Data from Database")
|
| 21 |
+
print("="*70)
|
| 22 |
+
|
| 23 |
+
conn = sqlite3.connect('resource_optimization.db')
|
| 24 |
+
|
| 25 |
+
# Load all tables
|
| 26 |
+
services = pd.read_sql_query("SELECT * FROM services", conn)
|
| 27 |
+
latency = pd.read_sql_query("SELECT * FROM regional_latency", conn)
|
| 28 |
+
traffic = pd.read_sql_query("SELECT * FROM traffic_patterns", conn)
|
| 29 |
+
placement = pd.read_sql_query("SELECT * FROM service_placement", conn)
|
| 30 |
+
|
| 31 |
+
print(f"Loaded {len(services)} services")
|
| 32 |
+
print(f"Loaded {len(latency)} latency records")
|
| 33 |
+
print(f"Loaded {len(traffic)} traffic records")
|
| 34 |
+
print(f"Loaded {len(placement)} placement records\n")
|
| 35 |
+
|
| 36 |
+
# ==================== FEATURE ENGINEERING ====================
|
| 37 |
+
print("="*70)
|
| 38 |
+
print("Feature Engineering")
|
| 39 |
+
print("="*70)
|
| 40 |
+
|
| 41 |
+
# Create a feature matrix from placement data
|
| 42 |
+
placement['timestamp'] = pd.to_datetime(placement['timestamp'])
|
| 43 |
+
traffic['timestamp'] = pd.to_datetime(traffic['timestamp'])
|
| 44 |
+
|
| 45 |
+
# Aggregate traffic by service and region
|
| 46 |
+
traffic_agg = traffic.groupby(['service_id', 'region']).agg({
|
| 47 |
+
'requests': ['mean', 'std', 'max'],
|
| 48 |
+
'hour': 'count' # number of hours in dataset
|
| 49 |
+
}).reset_index()
|
| 50 |
+
|
| 51 |
+
traffic_agg.columns = ['service_id', 'region', 'avg_requests', 'std_requests', 'max_requests', 'num_hours']
|
| 52 |
+
traffic_agg['cv_requests'] = traffic_agg['std_requests'] / (traffic_agg['avg_requests'] + 1) # coefficient of variation
|
| 53 |
+
|
| 54 |
+
# Aggregate latency by region pair
|
| 55 |
+
latency_agg = latency.groupby(['region1', 'region2']).agg({
|
| 56 |
+
'latency_ms': ['mean', 'std']
|
| 57 |
+
}).reset_index()
|
| 58 |
+
latency_agg.columns = ['region1', 'region2', 'avg_latency', 'std_latency']
|
| 59 |
+
|
| 60 |
+
# Create training dataset for MODEL 1 (Latency Prediction)
|
| 61 |
+
print("\nBuilding training dataset for latency prediction...")
|
| 62 |
+
|
| 63 |
+
# Merge placement with service info and traffic
|
| 64 |
+
training_data = placement.merge(services[['service_id', 'memory_mb', 'cpu_cores', 'latency_critical', 'dependencies']],
|
| 65 |
+
on='service_id', how='left')
|
| 66 |
+
training_data = training_data.merge(traffic_agg,
|
| 67 |
+
left_on=['service_id', 'region'],
|
| 68 |
+
right_on=['service_id', 'region'],
|
| 69 |
+
how='left')
|
| 70 |
+
|
| 71 |
+
# Merge with latency info (use region to all other regions as features)
|
| 72 |
+
# For simplicity, we'll add the average latency from this region to all others
|
| 73 |
+
region_latency_avg = latency.groupby('region1')['latency_ms'].mean().reset_index()
|
| 74 |
+
region_latency_avg.columns = ['region', 'avg_outbound_latency']
|
| 75 |
+
training_data = training_data.merge(region_latency_avg, on='region', how='left')
|
| 76 |
+
|
| 77 |
+
# Fill missing values
|
| 78 |
+
training_data = training_data.fillna(0)
|
| 79 |
+
|
| 80 |
+
print(f"Created training dataset with {len(training_data)} rows and {training_data.shape[1]} columns")
|
| 81 |
+
|
| 82 |
+
# ==================== MODEL 1: LATENCY PREDICTION (XGBoost Regression) ====================
|
| 83 |
+
print("\n" + "="*70)
|
| 84 |
+
print("MODEL 1: LATENCY PREDICTION (XGBoost Regression)")
|
| 85 |
+
print("="*70)
|
| 86 |
+
|
| 87 |
+
# Features for latency prediction
|
| 88 |
+
feature_cols_latency = ['memory_mb', 'cpu_cores', 'dependencies', 'avg_requests',
|
| 89 |
+
'std_requests', 'max_requests', 'cv_requests', 'avg_outbound_latency', 'instances']
|
| 90 |
+
|
| 91 |
+
X_latency = training_data[feature_cols_latency].fillna(0)
|
| 92 |
+
y_latency = training_data['avg_latency_ms']
|
| 93 |
+
|
| 94 |
+
# Remove any rows with NaN or infinite values
|
| 95 |
+
mask = ~(X_latency.isna().any(axis=1) | np.isinf(X_latency.values).any(axis=1) | y_latency.isna())
|
| 96 |
+
X_latency = X_latency[mask]
|
| 97 |
+
y_latency = y_latency[mask]
|
| 98 |
+
|
| 99 |
+
X_train_lat, X_test_lat, y_train_lat, y_test_lat = train_test_split(
|
| 100 |
+
X_latency, y_latency, test_size=0.2, random_state=42
|
| 101 |
+
)
|
| 102 |
+
|
| 103 |
+
print(f"Training set: {len(X_train_lat)}, Test set: {len(X_test_lat)}")
|
| 104 |
+
|
| 105 |
+
# Scale features
|
| 106 |
+
scaler_latency = StandardScaler()
|
| 107 |
+
X_train_lat_scaled = scaler_latency.fit_transform(X_train_lat)
|
| 108 |
+
X_test_lat_scaled = scaler_latency.transform(X_test_lat)
|
| 109 |
+
|
| 110 |
+
# Train XGBoost
|
| 111 |
+
model_xgb = xgb.XGBRegressor(
|
| 112 |
+
n_estimators=100,
|
| 113 |
+
max_depth=5,
|
| 114 |
+
learning_rate=0.1,
|
| 115 |
+
random_state=42,
|
| 116 |
+
verbosity=0
|
| 117 |
+
)
|
| 118 |
+
|
| 119 |
+
model_xgb.fit(X_train_lat_scaled, y_train_lat)
|
| 120 |
+
|
| 121 |
+
# Evaluate
|
| 122 |
+
y_pred_lat = model_xgb.predict(X_test_lat_scaled)
|
| 123 |
+
mse = mean_squared_error(y_test_lat, y_pred_lat)
|
| 124 |
+
rmse = np.sqrt(mse)
|
| 125 |
+
mae = mean_absolute_error(y_test_lat, y_pred_lat)
|
| 126 |
+
r2 = r2_score(y_test_lat, y_pred_lat)
|
| 127 |
+
|
| 128 |
+
print(f"\nModel trained!")
|
| 129 |
+
print(f" RMSE: {rmse:.4f} ms")
|
| 130 |
+
print(f" MAE: {mae:.4f} ms")
|
| 131 |
+
print(f" R²: {r2:.4f}")
|
| 132 |
+
|
| 133 |
+
# Feature importance
|
| 134 |
+
feature_importance = pd.DataFrame({
|
| 135 |
+
'feature': feature_cols_latency,
|
| 136 |
+
'importance': model_xgb.feature_importances_
|
| 137 |
+
}).sort_values('importance', ascending=False)
|
| 138 |
+
|
| 139 |
+
print(f"\nTop 5 Important Features:")
|
| 140 |
+
print(feature_importance.head())
|
| 141 |
+
|
| 142 |
+
# Save model
|
| 143 |
+
joblib.dump(model_xgb, 'models/xgboost_latency_model.pkl')
|
| 144 |
+
joblib.dump(scaler_latency, 'models/scaler_latency.pkl')
|
| 145 |
+
print(f"Saved to models/xgboost_latency_model.pkl")
|
| 146 |
+
|
| 147 |
+
# ==================== MODEL 2: PLACEMENT STRATEGY (Classification) ====================
|
| 148 |
+
print("\n" + "="*70)
|
| 149 |
+
print("MODEL 2: PLACEMENT STRATEGY (Classification)")
|
| 150 |
+
print("="*70)
|
| 151 |
+
|
| 152 |
+
# Create classification target: single-region (0) vs multi-region (1)
|
| 153 |
+
placement_counts = placement.groupby('service_id')['region'].nunique().reset_index()
|
| 154 |
+
placement_counts.columns = ['service_id', 'num_regions']
|
| 155 |
+
placement_counts['strategy'] = (placement_counts['num_regions'] > 1).astype(int)
|
| 156 |
+
|
| 157 |
+
# Merge with service features
|
| 158 |
+
classification_data = services.merge(placement_counts, on='service_id', how='left')
|
| 159 |
+
|
| 160 |
+
X_class = classification_data[['memory_mb', 'cpu_cores', 'latency_critical', 'traffic_volume_rps', 'dependencies']]
|
| 161 |
+
y_class = classification_data['strategy']
|
| 162 |
+
|
| 163 |
+
print(f"Class distribution: {y_class.value_counts().to_dict()}")
|
| 164 |
+
|
| 165 |
+
# Check if we have both classes
|
| 166 |
+
if len(y_class.unique()) > 1:
|
| 167 |
+
X_train_cls, X_test_cls, y_train_cls, y_test_cls = train_test_split(
|
| 168 |
+
X_class, y_class, test_size=0.2, random_state=42, stratify=y_class
|
| 169 |
+
)
|
| 170 |
+
|
| 171 |
+
print(f"Training set: {len(X_train_cls)}, Test set: {len(X_test_cls)}")
|
| 172 |
+
|
| 173 |
+
# Scale features
|
| 174 |
+
scaler_class = StandardScaler()
|
| 175 |
+
X_train_cls_scaled = scaler_class.fit_transform(X_train_cls)
|
| 176 |
+
X_test_cls_scaled = scaler_class.transform(X_test_cls)
|
| 177 |
+
|
| 178 |
+
# Train classifier
|
| 179 |
+
model_rf = RandomForestClassifier(
|
| 180 |
+
n_estimators=100,
|
| 181 |
+
max_depth=5,
|
| 182 |
+
random_state=42,
|
| 183 |
+
class_weight='balanced'
|
| 184 |
+
)
|
| 185 |
+
|
| 186 |
+
model_rf.fit(X_train_cls_scaled, y_train_cls)
|
| 187 |
+
|
| 188 |
+
# Evaluate
|
| 189 |
+
y_pred_cls = model_rf.predict(X_test_cls_scaled)
|
| 190 |
+
accuracy = accuracy_score(y_test_cls, y_pred_cls)
|
| 191 |
+
|
| 192 |
+
print(f"\nModel trained!")
|
| 193 |
+
print(f" Accuracy: {accuracy:.4f}")
|
| 194 |
+
print(f"\nClassification Report:")
|
| 195 |
+
print(classification_report(y_test_cls, y_pred_cls, labels=[0, 1], target_names=['Single-Region', 'Multi-Region']))
|
| 196 |
+
else:
|
| 197 |
+
print(f"\nWARNING: Only one class found in data (all services are multi-region)")
|
| 198 |
+
print(f" Creating a synthetic binary target for demonstration...")
|
| 199 |
+
|
| 200 |
+
# Create synthetic target based on threshold of traffic volume
|
| 201 |
+
threshold = X_class['traffic_volume_rps'].median()
|
| 202 |
+
y_class = (X_class['traffic_volume_rps'] > threshold).astype(int)
|
| 203 |
+
|
| 204 |
+
X_train_cls, X_test_cls, y_train_cls, y_test_cls = train_test_split(
|
| 205 |
+
X_class, y_class, test_size=0.2, random_state=42, stratify=y_class
|
| 206 |
+
)
|
| 207 |
+
|
| 208 |
+
print(f"New class distribution (high vs low traffic): {y_class.value_counts().to_dict()}")
|
| 209 |
+
print(f"Training set: {len(X_train_cls)}, Test set: {len(X_test_cls)}")
|
| 210 |
+
|
| 211 |
+
# Scale features
|
| 212 |
+
scaler_class = StandardScaler()
|
| 213 |
+
X_train_cls_scaled = scaler_class.fit_transform(X_train_cls)
|
| 214 |
+
X_test_cls_scaled = scaler_class.transform(X_test_cls)
|
| 215 |
+
|
| 216 |
+
# Train classifier
|
| 217 |
+
model_rf = RandomForestClassifier(
|
| 218 |
+
n_estimators=100,
|
| 219 |
+
max_depth=5,
|
| 220 |
+
random_state=42,
|
| 221 |
+
class_weight='balanced'
|
| 222 |
+
)
|
| 223 |
+
|
| 224 |
+
model_rf.fit(X_train_cls_scaled, y_train_cls)
|
| 225 |
+
|
| 226 |
+
# Evaluate
|
| 227 |
+
y_pred_cls = model_rf.predict(X_test_cls_scaled)
|
| 228 |
+
accuracy = accuracy_score(y_test_cls, y_pred_cls)
|
| 229 |
+
|
| 230 |
+
print(f"\nModel trained!")
|
| 231 |
+
print(f" Accuracy: {accuracy:.4f}")
|
| 232 |
+
print(f"\nClassification Report (High vs Low Traffic Services):")
|
| 233 |
+
print(classification_report(y_test_cls, y_pred_cls, labels=[0, 1], target_names=['Low Traffic', 'High Traffic']))
|
| 234 |
+
|
| 235 |
+
# Feature importance
|
| 236 |
+
feature_importance_cls = pd.DataFrame({
|
| 237 |
+
'feature': X_class.columns,
|
| 238 |
+
'importance': model_rf.feature_importances_
|
| 239 |
+
}).sort_values('importance', ascending=False)
|
| 240 |
+
|
| 241 |
+
print(f"\nTop Features for Placement Strategy:")
|
| 242 |
+
print(feature_importance_cls)
|
| 243 |
+
|
| 244 |
+
# Save model
|
| 245 |
+
joblib.dump(model_rf, 'models/random_forest_placement_model.pkl')
|
| 246 |
+
joblib.dump(scaler_class, 'models/scaler_classification.pkl')
|
| 247 |
+
print(f"Saved to models/random_forest_placement_model.pkl")
|
| 248 |
+
|
| 249 |
+
# ==================== SAVE FEATURE IMPORTANCE ====================
|
| 250 |
+
print("\n" + "="*70)
|
| 251 |
+
print("Saving Feature Importance")
|
| 252 |
+
print("="*70)
|
| 253 |
+
|
| 254 |
+
feature_importance.to_csv('models/feature_importance_latency.csv', index=False)
|
| 255 |
+
feature_importance_cls.to_csv('models/feature_importance_placement.csv', index=False)
|
| 256 |
+
print("Feature importance saved")
|
| 257 |
+
|
| 258 |
+
# ==================== SUMMARY ====================
|
| 259 |
+
print("\n" + "="*70)
|
| 260 |
+
print("MODEL TRAINING COMPLETE!")
|
| 261 |
+
print("="*70)
|
| 262 |
+
print(f"\nModels saved in 'models/' folder:")
|
| 263 |
+
print(f" • xgboost_latency_model.pkl")
|
| 264 |
+
print(f" • random_forest_placement_model.pkl")
|
| 265 |
+
print(f" • scaler_latency.pkl")
|
| 266 |
+
print(f" • scaler_classification.pkl")
|
| 267 |
+
print(f" • feature_importance_latency.csv")
|
| 268 |
+
print(f" • feature_importance_placement.csv")
|
| 269 |
+
|
| 270 |
+
print(f"\nModel Performance Summary:")
|
| 271 |
+
print(f" XGBoost (Latency Prediction)")
|
| 272 |
+
print(f" - RMSE: {rmse:.4f} ms")
|
| 273 |
+
print(f" - R²: {r2:.4f}")
|
| 274 |
+
print(f" Random Forest (Placement Strategy)")
|
| 275 |
+
print(f" - Accuracy: {accuracy:.4f}")
|
| 276 |
+
|
| 277 |
+
|
| 278 |
+
conn.close()
|
uv.lock
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version = 1
|
| 2 |
+
revision = 3
|
| 3 |
+
requires-python = ">=3.12"
|
| 4 |
+
|
| 5 |
+
[[package]]
|
| 6 |
+
name = "resource-optimization-ml"
|
| 7 |
+
version = "0.1.0"
|
| 8 |
+
source = { virtual = "." }
|