aankitdas's picture
simplify to json-only dashboard
2cdf4b5
import streamlit as st
import pandas as pd
import numpy as np
import json
import plotly.graph_objects as go
import plotly.express as px
from datetime import datetime
st.set_page_config(page_title="Resource Optimization ML", layout="wide", initial_sidebar_state="expanded")
# ==================== LOAD DATA ====================
@st.cache_resource
def load_ab_results():
with open('results/ab_test_results.json', 'r') as f:
return json.load(f)
@st.cache_resource
def load_sample_data():
"""Load sample data for visualization (generated from project scripts)"""
# These are generated from the scripts but we'll create summary stats
ab_results = load_ab_results()
# Create sample services data based on A/B test
services_data = {
'service_id': list(range(1, 151)),
'service_name': [f"service-{i}" for i in range(1, 151)],
'memory_mb': np.random.choice([256, 512, 1024, 2048, 4096], 150),
'cpu_cores': np.random.choice([0.5, 1, 2, 4], 150),
'traffic_volume_rps': np.random.randint(1000, 100000, 150),
'latency_critical': np.random.choice([True, False], 150, p=[0.3, 0.7])
}
services = pd.DataFrame(services_data)
# Create sample latency data
regions = ['us-east-1', 'us-west-2', 'eu-west-1', 'ap-southeast-1', 'ap-northeast-1']
latency_matrix = {
('us-east-1', 'us-west-2'): (60, 80),
('us-east-1', 'eu-west-1'): (90, 110),
('us-east-1', 'ap-southeast-1'): (180, 220),
('us-east-1', 'ap-northeast-1'): (150, 190),
('us-west-2', 'eu-west-1'): (130, 160),
('us-west-2', 'ap-southeast-1'): (140, 170),
('us-west-2', 'ap-northeast-1'): (110, 140),
('eu-west-1', 'ap-southeast-1'): (200, 250),
('eu-west-1', 'ap-northeast-1'): (180, 230),
('ap-southeast-1', 'ap-northeast-1'): (50, 80),
}
latency_data = []
for r1 in regions:
for r2 in regions:
if r1 == r2:
latency_data.append({'region1': r1, 'region2': r2, 'latency_ms': 2})
elif (r1, r2) in latency_matrix:
min_lat, max_lat = latency_matrix[(r1, r2)]
latency_data.append({'region1': r1, 'region2': r2, 'latency_ms': np.random.uniform(min_lat, max_lat)})
elif (r2, r1) in latency_matrix:
min_lat, max_lat = latency_matrix[(r2, r1)]
latency_data.append({'region1': r1, 'region2': r2, 'latency_ms': np.random.uniform(min_lat, max_lat)})
latency = pd.DataFrame(latency_data)
return services, latency
# Load all data
ab_results = load_ab_results()
services, latency = load_sample_data()
# ==================== SIDEBAR ====================
st.sidebar.title("πŸ“Š Navigation")
page = st.sidebar.radio(
"Select a page:",
["πŸ“ˆ Overview", "🎯 A/B Test Results", "πŸ—ΊοΈ Regional Analysis", "ℹ️ About"]
)
# ==================== PAGE 1: OVERVIEW ====================
if page == "πŸ“ˆ Overview":
st.title("πŸš€ Resource Optimization ML Pipeline")
st.markdown("""
This project demonstrates an **end-to-end ML solution** for optimizing service placement
across AWS regions. The goal: reduce latency and costs while maintaining service reliability.
""")
col1, col2, col3, col4 = st.columns(4)
with col1:
st.metric("Total Services", len(services))
with col2:
st.metric("AWS Regions", 5)
with col3:
st.metric("Dataset Size", "1.6M+ records")
with col4:
st.metric("Models Trained", 2)
st.divider()
# Service Distribution
col1, col2 = st.columns(2)
with col1:
st.subheader("Services by Memory Requirements")
memory_dist = services['memory_mb'].value_counts().sort_index()
fig = px.bar(
x=memory_dist.index,
y=memory_dist.values,
labels={'x': 'Memory (MB)', 'y': 'Count'},
color=memory_dist.values,
color_continuous_scale='Viridis'
)
st.plotly_chart(fig, width='stretch')
with col2:
st.subheader("Latency Critical vs Non-Critical")
critical_dist = services['latency_critical'].value_counts()
fig = px.pie(
values=critical_dist.values,
names=['Non-Critical', 'Latency Critical'],
color_discrete_sequence=['#636EFA', '#EF553B']
)
st.plotly_chart(fig, width='stretch')
st.divider()
st.subheader("Traffic Volume by Service (Top 10)")
top_services = services.nlargest(10, 'traffic_volume_rps')[['service_name', 'traffic_volume_rps']]
fig = px.bar(
top_services,
x='traffic_volume_rps',
y='service_name',
orientation='h',
labels={'traffic_volume_rps': 'Requests/Second', 'service_name': 'Service'},
color='traffic_volume_rps',
color_continuous_scale='Blues'
)
st.plotly_chart(fig, width='stretch')
# ==================== PAGE 2: A/B TEST RESULTS ====================
elif page == "🎯 A/B Test Results":
st.title("A/B Test: Random vs ML-Optimized Placement")
st.markdown("""
Comparing a **random placement strategy** (control) against an **ML-optimized strategy** (treatment).
""")
control = ab_results['control_metrics']
treatment = ab_results['treatment_metrics']
improvements = ab_results['improvements']
sig = ab_results['statistical_significance']
# Key Metrics Comparison
col1, col2, col3, col4 = st.columns(4)
with col1:
st.metric(
"Latency Reduction",
f"{improvements['latency_reduction_pct']:.2f}%",
delta="Lower is better"
)
with col2:
st.metric(
"Cost Savings",
f"{improvements['cost_reduction_pct']:.2f}%",
delta="Lower is better"
)
with col3:
st.metric(
"Critical Service Latency",
f"{improvements['critical_latency_reduction_pct']:.2f}%",
delta="Lower is better"
)
with col4:
is_sig = "βœ… YES" if sig['is_significant'] else "❌ NO"
st.metric(
"Statistically Significant?",
is_sig,
delta=f"p-value: {sig['p_value']:.6f}"
)
st.divider()
# Detailed Comparison Table
st.subheader("Detailed Metrics Comparison")
comparison_data = {
'Metric': [
'Average Latency (ms)',
'Total Cost ($)',
'Placement Pairs',
'Redundancy Score',
'Critical Service Latency (ms)'
],
'Control (Random)': [
f"{control['avg_latency_ms']:.2f}",
f"{control['total_cost']:.2f}",
f"{control['total_placement_pairs']}",
f"{control['redundancy_score']:.2f}",
f"{control['critical_services_latency_ms']:.2f}"
],
'Treatment (ML-Optimized)': [
f"{treatment['avg_latency_ms']:.2f}",
f"{treatment['total_cost']:.2f}",
f"{treatment['total_placement_pairs']}",
f"{treatment['redundancy_score']:.2f}",
f"{treatment['critical_services_latency_ms']:.2f}"
]
}
comparison_df = pd.DataFrame(comparison_data)
st.dataframe(comparison_df, width='stretch')
st.divider()
# Visual Comparison
col1, col2 = st.columns(2)
with col1:
st.subheader("Latency Comparison")
latency_data = {
'Strategy': ['Control\n(Random)', 'Treatment\n(ML-Optimized)'],
'Average Latency (ms)': [control['avg_latency_ms'], treatment['avg_latency_ms']]
}
fig = px.bar(
latency_data,
x='Strategy',
y='Average Latency (ms)',
color_discrete_sequence=['#EF553B', '#00CC96'],
text='Average Latency (ms)'
)
fig.update_traces(textposition='outside')
st.plotly_chart(fig, width='stretch')
with col2:
st.subheader("Cost Comparison")
cost_data = {
'Strategy': ['Control\n(Random)', 'Treatment\n(ML-Optimized)'],
'Total Cost ($)': [control['total_cost'], treatment['total_cost']]
}
fig = px.bar(
cost_data,
x='Strategy',
y='Total Cost ($)',
color_discrete_sequence=['#EF553B', '#00CC96'],
text='Total Cost ($)'
)
fig.update_traces(textposition='outside')
st.plotly_chart(fig, width='stretch')
st.divider()
# Statistical Details
st.subheader("πŸ“Š Statistical Significance Test")
st.write(f"""
- **Test Type**: Independent t-test
- **t-statistic**: {sig['t_statistic']:.4f}
- **p-value**: {sig['p_value']:.10f}
- **Result**: {'βœ… **STATISTICALLY SIGNIFICANT**' if sig['is_significant'] else '❌ Not significant'} (Ξ± = 0.05)
*The improvement in latency is statistically significant, meaning it's unlikely to be due to random chance.*
""")
# ==================== PAGE 3: REGIONAL ANALYSIS ====================
elif page == "πŸ—ΊοΈ Regional Analysis":
st.title("Regional Latency Analysis")
# Latency heatmap
st.subheader("Average Cross-Region Latency (ms)")
latency_pivot = latency.pivot_table(
values='latency_ms',
index='region1',
columns='region2',
aggfunc='mean'
)
fig = go.Figure(data=go.Heatmap(
z=latency_pivot.values,
x=latency_pivot.columns,
y=latency_pivot.index,
colorscale='RdYlGn_r',
text=np.round(latency_pivot.values, 1),
texttemplate='%{text} ms',
textfont={"size": 10}
))
fig.update_layout(title="Latency Heatmap", xaxis_title="To Region", yaxis_title="From Region")
st.plotly_chart(fig, width='stretch')
st.divider()
# Regional statistics
st.subheader("Regional Statistics")
latency_stats = latency.groupby('region1').agg({
'latency_ms': ['mean', 'min', 'max', 'std']
}).round(2)
latency_stats.columns = ['Avg Latency (ms)', 'Min (ms)', 'Max (ms)', 'Std Dev (ms)']
st.dataframe(latency_stats, width='stretch')
# ==================== PAGE 4: ABOUT ====================
elif page == "ℹ️ About":
st.title("About This Project")
st.markdown("""
## 🎯 Problem Statement
Amazon's Region Flexibility Engineering team needs to optimize service placement across
AWS regions to:
- **Reduce latency** for end users
- **Lower costs** by avoiding expensive regions
- **Maintain reliability** with appropriate redundancy
- **Support rapid global expansion**
## πŸ› οΈ Solution Architecture
### 1. Data Pipeline
- **Sources**: Service metadata, traffic patterns, regional latency, placement history
- **Processing**: SQL queries + Pandas for feature engineering
- **Scale**: 150+ services, 5 regions, 1.6M+ traffic records
### 2. ML Models
**Model 1: Latency Prediction (XGBoost)**
- Predicts service latency for a given placement
- Features: Memory, CPU, traffic patterns, outbound latency
- Performance: RMSE = 28.7ms
**Model 2: Placement Strategy (Random Forest)**
- Classifies services as high/low traffic
- Determines optimal number of regions per service
- Accuracy: 100% on test set
### 3. A/B Testing Framework
- **Control**: Random service placement (baseline)
- **Treatment**: ML-optimized placement
- **Results**: 5.25% latency reduction, 4.92% cost savings, statistically significant (p < 0.001)
## πŸ“Š Key Metrics
| Metric | Result |
|--------|--------|
| Latency Reduction | 5.25% |
| Cost Savings | 4.92% |
| Critical Service Improvement | 9.30% |
| Statistical Significance | p < 0.001 βœ… |
| Placement Efficiency | 378 vs 452 pairs (-16%) |
## πŸ’» Tech Stack
- **Data**: SQLite, Pandas, NumPy
- **ML**: scikit-learn, XGBoost
- **Statistics**: SciPy (t-tests, significance)
- **Visualization**: Plotly, Streamlit
- **Deployment**: Hugging Face Spaces
## πŸ“š How to Use
1. **Overview**: See project summary and data distribution
2. **A/B Results**: Detailed comparison of strategies with statistical validation
3. **Regional Analysis**: Explore latency patterns across AWS regions
## πŸš€ Next Steps for Production
- Integrate with real AWS CloudWatch metrics
- Deploy as automated recommendation engine
- Create feedback loop for model retraining
- Build alerting system for anomalies
- Extend to multi-cloud (GCP, Azure)
## πŸ“‚ Project Repository
**GitHub**: [resource-optimization-ml](https://github.com/aankitdas/resource-optimization-ml)
---
**Built with Python | ML | Data Engineering | Cloud Architecture**
""")