import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import math
from plotly.subplots import make_subplots
import json
from datetime import datetime

# Configure Streamlit for better performance
st.set_page_config(
    page_title="Katonic Multitenant Infrastructure Calculator",
    page_icon="🚀",
    layout="wide",
    initial_sidebar_state="expanded"
)

# Cloud provider and On-Premise pricing data (per hour in USD)
CLOUD_PRICING = {
    'On-Premise': {
        'name': 'On-Premise Datacenter',
        'cost_per_node_hour': 0.192,  # ~50% of cloud (amortized hardware + power + cooling over 3 years)
        'managed_k8s_cost': 0.05,  # Self-managed K8s operational cost (admin time, monitoring tools)
        'description': 'Dell PowerEdge R640 / HPE DL360 equivalent',
        'specs': '8 vCPUs, 32GB RAM',
        'vectordb_node': {
            'instance_type': 'Dell PowerEdge R740 / HPE DL380 equivalent',
            'cost_per_hour': 0.384,  # ~50% of cloud (high-memory server amortized)
            'specs': '16 vCPUs, 64GB RAM'
        },
        'jump_host': {
            'instance_type': 'Dell PowerEdge R440 / HPE DL20 equivalent',
            'cost_per_hour': 0.048,  # ~50% of cloud (small server amortized)
            'specs': '2 vCPUs, 8GB RAM'
        },
        'additional_services': {
            'Network_Infrastructure': {'cost_per_hour': 0.020, 'description': 'Switches, routers, firewalls (amortized)'},
            'Storage_SAN': {'cost_per_gb_month': 0.05, 'description': 'SAN/NAS storage (1TB base, amortized)'},
            'Hardware_Load_Balancer': {'cost_per_hour': 0.010, 'description': 'F5/Citrix ADC (amortized)'},
            'Power_Cooling': {'cost_per_hour': 0.030, 'description': 'Datacenter power (0.1kW/server) and cooling'},
            'Datacenter_Space': {'cost_per_hour': 0.015, 'description': 'Rack space and facilities costs'},
            'Maintenance_Support': {'cost_per_hour': 0.025, 'description': 'Hardware maintenance and vendor support contracts'}
        },
        'gpu_pricing_multiplier': 0.55,  # On-prem GPU costs are ~55% of cloud (hardware amortization + power)
        'notes': 'Costs include: hardware amortization (3-year lifecycle), power (~$0.10/kWh), cooling (1:1 ratio), rack space, network infrastructure, storage, and maintenance. Assumes enterprise datacenter with N+1 redundancy. Does NOT include: initial capex, datacenter construction, staff salaries (covered in K8s management cost).'
    },
    'AWS': {
        'name': 'Amazon EKS',
        'cost_per_node_hour': 0.384,
        'managed_k8s_cost': 0.10,
        'description': 'm5.2xlarge instances',
        'specs': '8 vCPUs, 32GB RAM',
        'vectordb_node': {
            'instance_type': 'm5.4xlarge',
            'cost_per_hour': 0.768,
            'specs': '16 vCPUs, 64GB RAM'
        },
        'jump_host': {
            'instance_type': 'm5.large',
            'cost_per_hour': 0.096,
            'specs': '2 vCPUs, 8GB RAM'
        },
        'additional_services': {
            'VPC': {'cost_per_hour': 0.0, 'description': 'Virtual Private Cloud (Free)'},
            'EBS': {'cost_per_gb_month': 0.10, 'description': 'Elastic Block Store (1TB expandable)'},
            'ELB': {'cost_per_hour': 0.025, 'description': 'Elastic Load Balancer'},
            'EIP': {'cost_per_hour': 0.005, 'description': 'Elastic IP Address'}
        }
    },
    'Azure': {
        'name': 'Azure Kubernetes Service',
        'cost_per_node_hour': 0.384,
        'managed_k8s_cost': 0.0,
        'description': 'Standard_D8s_v3 instances',
        'specs': '8 vCPUs, 32GB RAM',
        'vectordb_node': {
            'instance_type': 'Standard_D16s_v3',
            'cost_per_hour': 0.768,
            'specs': '16 vCPUs, 64GB RAM'
        },
        'jump_host': {
            'instance_type': 'Standard_D2s_v3',
            'cost_per_hour': 0.096,
            'specs': '2 vCPUs, 8GB RAM'
        },
        'additional_services': {
            'VNet': {'cost_per_hour': 0.0, 'description': 'Virtual Network (Free)'},
            'Managed_Disks': {'cost_per_gb_month': 0.10, 'description': 'Managed Disks (1TB expandable)'},
            'Load_Balancer': {'cost_per_hour': 0.025, 'description': 'Azure Load Balancer'},
            'Public_IP': {'cost_per_hour': 0.005, 'description': 'Public IP Address'}
        }
    },
    'GCP': {
        'name': 'Google Kubernetes Engine',
        'cost_per_node_hour': 0.379,
        'managed_k8s_cost': 0.10,
        'description': 'n1-standard-8 instances',
        'specs': '8 vCPUs, 30GB RAM',
        'vectordb_node': {
            'instance_type': 'n1-standard-16',
            'cost_per_hour': 0.758,
            'specs': '16 vCPUs, 60GB RAM'
        },
        'jump_host': {
            'instance_type': 'e2-medium',
            'cost_per_hour': 0.067,
            'specs': '2 vCPUs, 8GB RAM'
        },
        'additional_services': {
            'VPC': {'cost_per_hour': 0.0, 'description': 'Virtual Private Cloud (Free)'},
            'Persistent_Disk': {'cost_per_gb_month': 0.10, 'description': 'Persistent Disk (1TB expandable)'},
            'Load_Balancer': {'cost_per_hour': 0.025, 'description': 'Cloud Load Balancing'},
            'Static_IP': {'cost_per_hour': 0.004, 'description': 'Static External IP'},
            'Cloud_Storage': {'cost_per_gb_month': 0.020, 'description': 'GCS Bucket (Optional)'},
            'Filestore': {'cost_per_gb_month': 0.20, 'description': 'Filestore (depends on usage)'}
        }
    }
}

# Production-grade model specifications
MODELS = {
    "Llama 4 Maverick": {
        "params": 400, 
        "active_params": 17,
        "memory_per_param": 2, 
        "max_context": 1000000, 
        "base_tps": 4200,
        "org": "Meta",
        "license": "Open-weight",
        "notes": "Multimodal MoE; 1M context; text, image, code, reasoning"
    },
    "Llama 4 Scout": {
        "params": 109, 
        "active_params": 17,
        "memory_per_param": 2, 
        "max_context": 10000000, 
        "base_tps": 4500,
        "org": "Meta",
        "license": "Open-weight", 
        "notes": "Multimodal MoE; 10M context; efficient for long-form tasks"
    },
    "Llama 3.3 70B": {
        "params": 70,
        "active_params": 70, 
        "memory_per_param": 2, 
        "max_context": 128000, 
        "base_tps": 1800,
        "org": "Meta",
        "license": "Community (open)",
        "notes": "Multilingual; matches Llama 3.1 405B performance"
    },
    "Qwen2 110B": {
        "params": 110,
        "active_params": 110,
        "memory_per_param": 2, 
        "max_context": 128000, 
        "base_tps": 1200,
        "org": "Alibaba/Qwen",
        "license": "Apache 2.0",
        "notes": "Multilingual; top-tier reasoning and coding"
    },
    "DeepSeek-VL 110B": {
        "params": 110,
        "active_params": 110,
        "memory_per_param": 2, 
        "max_context": 128000, 
        "base_tps": 1100,
        "org": "DeepSeek AI",
        "license": "MIT",
        "notes": "Multimodal (vision+language); GPT-4V alternative"
    },
    "Mixtral 8x22B": {
        "params": 141,
        "active_params": 39, 
        "memory_per_param": 2, 
        "max_context": 65536, 
        "base_tps": 2800,
        "org": "Mistral AI",
        "license": "Apache 2.0",
        "notes": "Sparse MoE; efficiency leader among MoE models"
    }
}

GPUS = {
    "H200 141GB": {
        "memory": 141, 
        "compute": 9.0, 
        "tps_min": 5486, 
        "tps_max": 18690,
        "efficiency_tier": "Flagship+",
        "pricing": {
            "aws": 15.70,
            "azure": 12.29,
            "gcp": "NA",
            "on-premise": 8.64  # 55% of AWS price (hardware amortization + power)
        }
    },
    "H100 80GB": {
        "memory": 80, 
        "compute": 9.0, 
        "tps_min": 2400, 
        "tps_max": 14000,
        "efficiency_tier": "Flagship",
        "pricing": {
            "aws": 6.01,
            "azure": 6.98,
            "gcp": 11.06,
            "on-premise": 3.31  # 55% of AWS price
        }
    },
    "A100 80GB": {
        "memory": 80, 
        "compute": 8.0, 
        "tps_min": 1100, 
        "tps_max": 2000,
        "efficiency_tier": "Excellent",
        "pricing": {
            "aws": 3.43,
            "azure": 3.67,
            "gcp": 2.48,
            "on-premise": 1.89  # 55% of AWS price
        }
    },
    "A100 40GB": {
        "memory": 40, 
        "compute": 8.0, 
        "tps_min": 1000, 
        "tps_max": 1800,
        "efficiency_tier": "Good",
        "pricing": {
            "aws": 2.75,
            "azure": 3.67,
            "gcp": 1.46,
            "on-premise": 1.51  # 55% of AWS price
        }
    },
    "L40S": {
        "memory": 48, 
        "compute": 8.9, 
        "tps_min": 4000, 
        "tps_max": 4768,
        "efficiency_tier": "Very Good",
        "pricing": {
            "aws": 1.67,
            "azure": "NA",
            "gcp": "NA",
            "on-premise": 0.92  # 55% of AWS price
        }
    }
}

@st.cache_data(show_spinner=False, ttl=300)
def calculate_detailed_infrastructure(num_tenants, apps_per_tenant):
    """Calculate detailed infrastructure requirements with node type breakdown - CACHED"""
    # Standard node specs (8 vCPUs, 32GB RAM)
    cores_per_node = 8
    ram_per_node = 32
    
    # VectorDB node specs (16 vCPUs, 64GB RAM) - Updated as per requirement
    vectordb_cores_per_node = 16
    vectordb_ram_per_node = 64
    
    # Base infrastructure
    base_platform_nodes = 2
    
    # Per tenant requirements
    platform_nodes_per_tenant = 1
    compute_nodes_per_tenant = 1
    vectordb_nodes_per_tenant = 1  # Using 64GB RAM nodes for VectorDB
    
    # Calculate deployment nodes based on apps per tenant
    # Every 4 apps need 1 deployment node
    deploy_nodes_per_tenant = math.ceil(apps_per_tenant / 4)
    
    # Calculate totals
    total_platform_nodes = base_platform_nodes + (platform_nodes_per_tenant * num_tenants)
    total_compute_nodes = compute_nodes_per_tenant * num_tenants
    total_deploy_nodes = deploy_nodes_per_tenant * num_tenants
    total_vectordb_nodes = vectordb_nodes_per_tenant * num_tenants
    
    # Total standard nodes (excluding VectorDB which uses different specs)
    total_standard_nodes = total_platform_nodes + total_compute_nodes + total_deploy_nodes
    total_nodes = total_standard_nodes + total_vectordb_nodes
    
    # Resource calculations
    total_cpu = (total_standard_nodes * cores_per_node) + (total_vectordb_nodes * vectordb_cores_per_node)
    total_ram = (total_standard_nodes * ram_per_node) + (total_vectordb_nodes * vectordb_ram_per_node)
    
    # Applications capacity
    total_apps = num_tenants * apps_per_tenant
    
    return {
        'node_breakdown': {
            'Platform Nodes': {
                'base': base_platform_nodes,
                'tenant': platform_nodes_per_tenant * num_tenants,
                'total': total_platform_nodes,
                'cores': total_platform_nodes * cores_per_node,
                'ram': total_platform_nodes * ram_per_node,
                'purpose': 'Tenancy Manager + Tenant platform services',
                'node_type': 'Standard (8 vCPU, 32GB RAM)'
            },
            'Compute Nodes': {
                'base': 0,
                'tenant': total_compute_nodes,
                'total': total_compute_nodes,
                'cores': total_compute_nodes * cores_per_node,
                'ram': total_compute_nodes * ram_per_node,
                'purpose': 'Computational workloads',
                'node_type': 'Standard (8 vCPU, 32GB RAM)'
            },
            'Deploy Nodes': {
                'base': 0,
                'tenant': total_deploy_nodes,
                'total': total_deploy_nodes,
                'cores': total_deploy_nodes * cores_per_node,
                'ram': total_deploy_nodes * ram_per_node,
                'purpose': f'Application deployment ({deploy_nodes_per_tenant} node(s) per {apps_per_tenant} apps)',
                'node_type': 'Standard (8 vCPU, 32GB RAM)'
            },
            'VectorDB Nodes': {
                'base': 0,
                'tenant': total_vectordb_nodes,
                'total': total_vectordb_nodes,
                'cores': total_vectordb_nodes * vectordb_cores_per_node,
                'ram': total_vectordb_nodes * vectordb_ram_per_node,
                'purpose': 'Vector database operations (high memory)',
                'node_type': 'High-Memory (16 vCPU, 64GB RAM)'
            }
        },
        'totals': {
            'total_nodes': total_nodes,
            'total_standard_nodes': total_standard_nodes,
            'total_vectordb_nodes': total_vectordb_nodes,
            'total_cpu': total_cpu,
            'total_ram': total_ram,
            'total_apps': total_apps,
            'deploy_nodes_per_tenant': deploy_nodes_per_tenant
        },
        'specs': {
            'cores_per_node': cores_per_node,
            'ram_per_node': ram_per_node,
            'vectordb_cores_per_node': vectordb_cores_per_node,
            'vectordb_ram_per_node': vectordb_ram_per_node
        }
    }

def calculate_model_memory_requirements(model_params, active_params, precision_bytes):
    """Calculate memory requirements for model inference"""
    model_memory = model_params * precision_bytes
    overhead = model_memory * 0.25
    kv_cache = model_memory * 0.1
    total_memory = model_memory + overhead + kv_cache
    return total_memory

def calculate_model_tps_on_gpu(model_base_tps, model_params, active_params, gpu_spec):
    """Calculate actual TPS for a specific model on a specific GPU"""
    effective_params = active_params
    reference_params = 70
    param_scaling = (reference_params / effective_params) ** 0.7
    
    gpu_tps_min = gpu_spec["tps_min"]
    gpu_tps_max = gpu_spec["tps_max"]
    
    actual_tps_min = gpu_tps_min * param_scaling
    actual_tps_max = gpu_tps_max * param_scaling
    
    estimated_tps = actual_tps_min + (actual_tps_max - actual_tps_min) * 0.3
    return estimated_tps, actual_tps_min, actual_tps_max

def calculate_gpu_node_configurations(total_gpus_needed, gpu_memory_gb, gpu_spec):
    """Calculate GPU node configurations based on standard cloud GPU node sizes"""
    configurations = []
    
    # Standard GPU node configurations: 1, 2, 4, 8 GPUs per node
    standard_configs = [1, 2, 4, 8]
    
    # Minimum GPUs per node based on memory requirements
    min_gpus_per_node = math.ceil(gpu_memory_gb / gpu_spec["memory"])
    
    for gpus_per_node in standard_configs:
        # Skip configurations that can't fit the model memory requirement
        if gpus_per_node < min_gpus_per_node:
            continue
            
        num_nodes = math.ceil(total_gpus_needed / gpus_per_node)
        total_gpus_allocated = num_nodes * gpus_per_node
        gpu_utilization = (total_gpus_needed / total_gpus_allocated) * 100
        gpu_waste = total_gpus_allocated - total_gpus_needed
        
        configurations.append({
            'gpus_per_node': gpus_per_node,
            'num_nodes': num_nodes,
            'total_gpus_allocated': total_gpus_allocated,
            'total_gpus_needed': total_gpus_needed,
            'utilization': gpu_utilization,
            'gpu_waste': gpu_waste,
            'meets_memory_req': gpus_per_node >= min_gpus_per_node,
            'memory_utilization': (gpu_memory_gb / (gpus_per_node * gpu_spec["memory"])) * 100
        })
    
    # If no configurations work (shouldn't happen with proper validation), add all configs
    if not configurations:
        for gpus_per_node in standard_configs:
            num_nodes = math.ceil(total_gpus_needed / gpus_per_node)
            total_gpus_allocated = num_nodes * gpus_per_node
            gpu_utilization = (total_gpus_needed / total_gpus_allocated) * 100
            gpu_waste = total_gpus_allocated - total_gpus_needed
            
            configurations.append({
                'gpus_per_node': gpus_per_node,
                'num_nodes': num_nodes,
                'total_gpus_allocated': total_gpus_allocated,
                'total_gpus_needed': total_gpus_needed,
                'utilization': gpu_utilization,
                'gpu_waste': gpu_waste,
                'meets_memory_req': gpus_per_node >= min_gpus_per_node,
                'memory_utilization': (gpu_memory_gb / (gpus_per_node * gpu_spec["memory"])) * 100
            })
    
    # Sort by utilization (descending) and then by total GPUs (ascending)
    configurations.sort(key=lambda x: (-x['utilization'], x['total_gpus_allocated']))
    return configurations, min_gpus_per_node

@st.cache_data(show_spinner=False, ttl=300)
def calculate_gpu_requirements(conversations_per_minute, tokens_per_conversation, model_spec, gpu_spec, precision_bytes):
    """Calculate GPU requirements for LLM inference with proper node configurations - CACHED"""
    # Calculate throughput requirements
    required_tps = (conversations_per_minute * tokens_per_conversation) / 60
    
    # Calculate memory requirements
    model_memory_gb = calculate_model_memory_requirements(
        model_spec["params"], model_spec["active_params"], precision_bytes
    )
    
    # Calculate model performance on GPU
    estimated_tps, tps_min, tps_max = calculate_model_tps_on_gpu(
        model_spec["base_tps"], model_spec["params"], model_spec["active_params"], gpu_spec
    )
    
    # Calculate basic GPU requirements
    gpus_needed_memory = math.ceil(model_memory_gb / gpu_spec["memory"])
    gpus_needed_throughput = math.ceil(required_tps / estimated_tps)
    total_gpus_needed = max(gpus_needed_memory, gpus_needed_throughput, 1)
    
    # Calculate proper GPU node configurations
    gpu_configs, min_gpus_per_node = calculate_gpu_node_configurations(
        total_gpus_needed, model_memory_gb, gpu_spec
    )
    
    # Use the best (most efficient) configuration
    best_config = gpu_configs[0] if gpu_configs else None
    actual_gpus_allocated = best_config['total_gpus_allocated'] if best_config else total_gpus_needed
    
    return {
        'gpus_needed_memory': gpus_needed_memory,
        'gpus_needed_throughput': gpus_needed_throughput,
        'total_gpus_needed': total_gpus_needed,
        'actual_gpus_allocated': actual_gpus_allocated,
        'gpu_configurations': gpu_configs,
        'best_config': best_config,
        'min_gpus_per_node': min_gpus_per_node,
        'model_memory_gb': model_memory_gb,
        'required_tps': required_tps,
        'estimated_tps': estimated_tps,
        'tps_range': (tps_min, tps_max),
        'total_system_tps': estimated_tps * actual_gpus_allocated,
        'max_conversations_per_minute': (estimated_tps * actual_gpus_allocated * 60) / tokens_per_conversation,
        'bottleneck': 'Memory' if gpus_needed_memory >= gpus_needed_throughput else 'Throughput'
    }

def is_gpu_available_for_provider(provider, gpu_spec):
    """Check if GPU is actually available for a provider (not N/A and has valid pricing)"""
    gpu_pricing = gpu_spec.get("pricing", {})
    provider_key = provider.lower()
    
    if provider_key not in gpu_pricing:
        return False
    
    price = gpu_pricing[provider_key]
    return price != "NA" and isinstance(price, (int, float)) and price > 0

def get_available_providers_for_gpu(gpu_spec):
    """Get list of providers that actually have the selected GPU available"""
    available_providers = []
    for provider in CLOUD_PRICING.keys():
        if is_gpu_available_for_provider(provider, gpu_spec):
            available_providers.append(provider)
    return available_providers

def create_downloadable_cost_report(all_costs, infrastructure, gpu_requirements, model_spec, gpu_spec, selected_model, selected_gpu, num_tenants, apps_per_tenant, conversations_per_minute, tokens_per_conversation, precision, time_period):
    """Create a comprehensive cost report for download"""
    report_data = {
        'report_metadata': {
            'generated_at': datetime.now().isoformat(),
            'configuration': {
                'tenants': num_tenants,
                'apps_per_tenant': apps_per_tenant,
                'total_apps': num_tenants * apps_per_tenant,
                'model': selected_model,
                'gpu': selected_gpu,
                'precision': precision,
                'conversations_per_minute': conversations_per_minute,
                'tokens_per_conversation': tokens_per_conversation,
                'time_period': time_period
            }
        },
        'infrastructure_summary': {
            'platform_nodes': infrastructure['totals']['total_standard_nodes'],
            'vectordb_nodes': infrastructure['totals']['total_vectordb_nodes'],
            'total_nodes': infrastructure['totals']['total_nodes'],
            'gpu_nodes': gpu_requirements['total_gpus_needed'],
            'total_cpu_cores': infrastructure['totals']['total_cpu'],
            'total_ram_gb': infrastructure['totals']['total_ram'],
            'total_gpu_memory_gb': gpu_requirements['total_gpus_needed'] * gpu_spec['memory'],
            'max_conversations_per_minute': gpu_requirements['max_conversations_per_minute']
        },
        'cost_breakdown_by_provider': {}
    }
    
    # Add cost breakdown for each provider
    for provider, costs in all_costs.items():
        provider_available = is_gpu_available_for_provider(provider, gpu_spec)
        
        report_data['cost_breakdown_by_provider'][provider] = {
            'gpu_available': provider_available,
            'platform_costs': {
                'kubernetes_nodes': costs['platform_costs']['total_node_cost'],
                'vectordb_nodes': costs['platform_costs']['vectordb_node_cost'],
                'jump_host': costs['platform_costs']['jump_host_cost'],
                'additional_services': costs['platform_costs']['additional_services_cost'],
                'k8s_management': costs['platform_costs']['k8s_management_cost'],
                'platform_total': costs['platform_costs']['platform_total']
            },
            'gpu_costs': {
                'gpu_count': costs['gpu_costs']['gpu_count'],
                'gpu_cost_per_hour': costs['gpu_costs']['gpu_cost_per_hour'],
                'total_gpu_cost': costs['gpu_costs']['total_gpu_cost'] if provider_available else 'N/A'
            },
            'totals': {
                'platform_cost': costs['totals']['platform_cost'],
                'gpu_cost': costs['totals']['gpu_cost'] if provider_available else 'N/A',
                'total_cost': costs['totals']['total_cost'] if provider_available else 'N/A',
                'cost_per_hour': costs['totals']['cost_per_hour'] if provider_available else 'N/A',
                'cost_per_day': costs['totals']['cost_per_day'] if provider_available else 'N/A'
            },
            'service_details': costs['platform_costs']['service_costs']
        }
    
    return report_data

def format_cost_for_display(cost, available=True):
    """Format cost for display, handling N/A cases"""
    if not available or cost == 'N/A':
        return 'N/A'
    return f"${cost:.2f}"

def calculate_detailed_costs(provider, infrastructure, gpu_requirements, gpu_spec, days=30):
    """Calculate detailed costs for both platform and GPU infrastructure"""
    pricing = CLOUD_PRICING[provider]
    hours = days * 24
    
    # Platform infrastructure costs
    node_costs = {}
    total_standard_node_cost = 0
    total_vectordb_node_cost = 0
    
    for node_type, details in infrastructure['node_breakdown'].items():
        if node_type == 'VectorDB Nodes':
            # Use special pricing for VectorDB nodes
            node_cost = details['total'] * pricing['vectordb_node']['cost_per_hour'] * hours
            total_vectordb_node_cost = node_cost
        else:
            # Use standard pricing for other nodes
            node_cost = details['total'] * pricing['cost_per_node_hour'] * hours
            total_standard_node_cost += node_cost
        
        node_costs[node_type] = {
            'count': details['total'],
            'cost': node_cost,
            'cores': details['cores'],
            'ram': details['ram'],
            'node_type': details.get('node_type', 'Standard')
        }
    
    total_node_cost = total_standard_node_cost + total_vectordb_node_cost
    
    # Jump Host cost
    jump_host_cost = pricing['jump_host']['cost_per_hour'] * hours
    
    # Additional services costs
    additional_services_cost = 0
    service_costs = {}
    
    for service, details in pricing['additional_services'].items():
        if 'cost_per_hour' in details:
            service_cost = details['cost_per_hour'] * hours
        elif 'cost_per_gb_month' in details:
            if 'storage' in service.lower() or 'disk' in service.lower() or 'ebs' in service.lower() or 'san' in service.lower():
                service_cost = details['cost_per_gb_month'] * 1024 * (days / 30)
            else:
                service_cost = 0
        else:
            service_cost = 0
        
        service_costs[service] = service_cost
        additional_services_cost += service_cost
    
    # Kubernetes management cost
    k8s_management_cost = pricing['managed_k8s_cost'] * hours
    
    # GPU costs - properly handle N/A cases
    gpu_pricing = gpu_spec.get("pricing", {})
    gpu_available = is_gpu_available_for_provider(provider, gpu_spec)
    gpu_cost_per_hour = 0
    gpu_cost = 0
    
    if gpu_available:
        gpu_cost_per_hour = gpu_pricing[provider.lower()]
        gpu_cost = gpu_requirements['actual_gpus_allocated'] * gpu_cost_per_hour * hours
    
    # Total costs
    platform_cost = total_node_cost + jump_host_cost + additional_services_cost + k8s_management_cost
    total_cost = platform_cost + gpu_cost if gpu_available else None  # None for N/A cases
    
    return {
        'platform_costs': {
            'node_costs': node_costs,
            'total_node_cost': total_node_cost,
            'vectordb_node_cost': total_vectordb_node_cost,
            'jump_host_cost': jump_host_cost,
            'service_costs': service_costs,
            'additional_services_cost': additional_services_cost,
            'k8s_management_cost': k8s_management_cost,
            'platform_total': platform_cost
        },
        'gpu_costs': {
            'gpu_count': gpu_requirements['actual_gpus_allocated'],
            'gpu_cost_per_hour': gpu_cost_per_hour,
            'total_gpu_cost': gpu_cost,
            'gpu_available': gpu_available
        },
        'totals': {
            'platform_cost': platform_cost,
            'gpu_cost': gpu_cost,
            'total_cost': total_cost,
            'cost_per_hour': total_cost / hours if total_cost is not None else None,
            'cost_per_day': total_cost / days if total_cost is not None else None,
            'gpu_available': gpu_available
        }
    }

def create_comprehensive_dashboard():
    st.set_page_config(
        page_title="Katonic Multitenant Infrastructure Calculator",
        page_icon="🚀",
        layout="wide"
    )
    
    st.title("🚀 Katonic Multitenant Infrastructure Calculator")
    st.markdown("**Comprehensive infrastructure planning for multi-tenant LLMOPS platforms with GPU-accelerated LLM inference**")
    
    # Sidebar Configuration
    with st.sidebar:
        st.header("🔧 Configuration")
        
        # Platform Configuration
        st.subheader("Platform Settings")
        num_tenants = st.slider(
            "Number of Tenants", 
            min_value=1, 
            max_value=20, 
            value=3,
            help="Each tenant requires dedicated platform, compute, deploy, and VectorDB nodes"
        )
        
        apps_per_tenant = st.number_input(
            "Apps per Tenant",
            min_value=1,
            max_value=50,
            value=4,
            step=1,
            help="Number of applications per tenant. Every 4 apps require 1 deployment node"
        )
        
        # Cloud Provider Pricing Configuration
        st.subheader("Cloud Provider Pricing (Optional)")
        
        # AWS Pricing
        with st.expander("☁️ Customize AWS Costs", expanded=False):
            st.markdown("**Adjust AWS pricing (per hour in USD)**")
            
            st.markdown("##### Compute Nodes")
            aws_standard_node = st.number_input(
                "m5.2xlarge (8 vCPU, 32GB)",
                min_value=0.01,
                max_value=2.00,
                value=0.384,
                step=0.01,
                format="%.3f",
                key="aws_standard",
                help="Default: $0.384/hr"
            )
            
            aws_vectordb_node = st.number_input(
                "m5.4xlarge (16 vCPU, 64GB)",
                min_value=0.01,
                max_value=4.00,
                value=0.768,
                step=0.01,
                format="%.3f",
                key="aws_vectordb",
                help="Default: $0.768/hr"
            )
            
            aws_jump_host = st.number_input(
                "m5.large (2 vCPU, 8GB)",
                min_value=0.01,
                max_value=0.50,
                value=0.096,
                step=0.01,
                format="%.3f",
                key="aws_jump",
                help="Default: $0.096/hr"
            )
            
            aws_k8s_management = st.number_input(
                "EKS Management Cost",
                min_value=0.0,
                max_value=0.50,
                value=0.10,
                step=0.01,
                format="%.3f",
                key="aws_k8s",
                help="Default: $0.10/hr"
            )
            
            st.markdown("##### GPU Pricing")
            col1, col2 = st.columns(2)
            with col1:
                aws_h200 = st.number_input("H200 141GB", value=15.70, step=0.10, format="%.2f", key="aws_h200")
                aws_h100 = st.number_input("H100 80GB", value=6.01, step=0.10, format="%.2f", key="aws_h100")
                aws_a100_80 = st.number_input("A100 80GB", value=3.43, step=0.10, format="%.2f", key="aws_a100_80")
            with col2:
                aws_a100_40 = st.number_input("A100 40GB", value=2.75, step=0.10, format="%.2f", key="aws_a100_40")
                aws_l40s = st.number_input("L40S", value=1.67, step=0.10, format="%.2f", key="aws_l40s")
        
        # Azure Pricing
        with st.expander("☁️ Customize Azure Costs", expanded=False):
            st.markdown("**Adjust Azure pricing (per hour in USD)**")
            
            st.markdown("##### Compute Nodes")
            azure_standard_node = st.number_input(
                "Standard_D8s_v3 (8 vCPU, 32GB)",
                min_value=0.01,
                max_value=2.00,
                value=0.384,
                step=0.01,
                format="%.3f",
                key="azure_standard",
                help="Default: $0.384/hr"
            )
            
            azure_vectordb_node = st.number_input(
                "Standard_D16s_v3 (16 vCPU, 64GB)",
                min_value=0.01,
                max_value=4.00,
                value=0.768,
                step=0.01,
                format="%.3f",
                key="azure_vectordb",
                help="Default: $0.768/hr"
            )
            
            azure_jump_host = st.number_input(
                "Standard_D2s_v3 (2 vCPU, 8GB)",
                min_value=0.01,
                max_value=0.50,
                value=0.096,
                step=0.01,
                format="%.3f",
                key="azure_jump",
                help="Default: $0.096/hr"
            )
            
            azure_k8s_management = st.number_input(
                "AKS Management Cost",
                min_value=0.0,
                max_value=0.50,
                value=0.0,
                step=0.01,
                format="%.3f",
                key="azure_k8s",
                help="Default: $0.00/hr (Free tier)"
            )
            
            st.markdown("##### GPU Pricing")
            col1, col2 = st.columns(2)
            with col1:
                azure_h200 = st.number_input("H200 141GB", value=12.29, step=0.10, format="%.2f", key="azure_h200")
                azure_h100 = st.number_input("H100 80GB", value=6.98, step=0.10, format="%.2f", key="azure_h100")
                azure_a100_80 = st.number_input("A100 80GB", value=3.67, step=0.10, format="%.2f", key="azure_a100_80")
            with col2:
                azure_a100_40 = st.number_input("A100 40GB", value=3.67, step=0.10, format="%.2f", key="azure_a100_40")
        
        # GCP Pricing
        with st.expander("☁️ Customize GCP Costs", expanded=False):
            st.markdown("**Adjust GCP pricing (per hour in USD)**")
            
            st.markdown("##### Compute Nodes")
            gcp_standard_node = st.number_input(
                "n1-standard-8 (8 vCPU, 30GB)",
                min_value=0.01,
                max_value=2.00,
                value=0.379,
                step=0.01,
                format="%.3f",
                key="gcp_standard",
                help="Default: $0.379/hr"
            )
            
            gcp_vectordb_node = st.number_input(
                "n1-standard-16 (16 vCPU, 60GB)",
                min_value=0.01,
                max_value=4.00,
                value=0.758,
                step=0.01,
                format="%.3f",
                key="gcp_vectordb",
                help="Default: $0.758/hr"
            )
            
            gcp_jump_host = st.number_input(
                "e2-medium (2 vCPU, 8GB)",
                min_value=0.01,
                max_value=0.50,
                value=0.067,
                step=0.01,
                format="%.3f",
                key="gcp_jump",
                help="Default: $0.067/hr"
            )
            
            gcp_k8s_management = st.number_input(
                "GKE Management Cost",
                min_value=0.0,
                max_value=0.50,
                value=0.10,
                step=0.01,
                format="%.3f",
                key="gcp_k8s",
                help="Default: $0.10/hr"
            )
            
            st.markdown("##### GPU Pricing")
            col1, col2 = st.columns(2)
            with col1:
                gcp_h100 = st.number_input("H100 80GB", value=11.06, step=0.10, format="%.2f", key="gcp_h100")
                gcp_a100_80 = st.number_input("A100 80GB", value=2.48, step=0.10, format="%.2f", key="gcp_a100_80")
            with col2:
                gcp_a100_40 = st.number_input("A100 40GB", value=1.46, step=0.10, format="%.2f", key="gcp_a100_40")
        
        # On-Premise Pricing
        with st.expander("🏢 Customize On-Premise Costs", expanded=False):
            st.markdown("**Adjust on-premise costs based on your infrastructure**")
            
            st.markdown("##### Compute Nodes (per hour)")
            onprem_standard_node = st.number_input(
                "Standard Node (8 vCPU, 32GB)",
                min_value=0.01,
                max_value=1.00,
                value=0.192,
                step=0.01,
                format="%.3f",
                key="onprem_standard",
                help="Cost per hour for standard compute nodes (default: $0.192)"
            )
            
            onprem_vectordb_node = st.number_input(
                "VectorDB Node (16 vCPU, 64GB)",
                min_value=0.01,
                max_value=2.00,
                value=0.384,
                step=0.01,
                format="%.3f",
                key="onprem_vectordb",
                help="Cost per hour for high-memory VectorDB nodes (default: $0.384)"
            )
            
            onprem_jump_host = st.number_input(
                "Jump Host (2 vCPU, 8GB)",
                min_value=0.01,
                max_value=0.50,
                value=0.048,
                step=0.01,
                format="%.3f",
                key="onprem_jump",
                help="Cost per hour for jump host (default: $0.048)"
            )
            
            st.markdown("##### GPU Pricing Multiplier")
            onprem_gpu_multiplier = st.slider(
                "GPU Cost Multiplier (% of AWS)",
                min_value=30,
                max_value=100,
                value=55,
                step=5,
                key="onprem_gpu_mult",
                help="Percentage of AWS GPU pricing for on-premise (default: 55%)"
            ) / 100
            
            st.markdown("##### Additional Services (per hour)")
            onprem_network = st.number_input(
                "Network Infrastructure",
                min_value=0.0,
                max_value=0.10,
                value=0.020,
                step=0.005,
                format="%.3f",
                key="onprem_network",
                help="Switches, routers, firewalls (default: $0.020)"
            )
            
            onprem_storage_per_gb = st.number_input(
                "Storage (per GB per month)",
                min_value=0.01,
                max_value=0.20,
                value=0.05,
                step=0.01,
                format="%.3f",
                key="onprem_storage",
                help="SAN/NAS storage cost (default: $0.05/GB/month)"
            )
            
            onprem_load_balancer = st.number_input(
                "Hardware Load Balancer",
                min_value=0.0,
                max_value=0.05,
                value=0.010,
                step=0.005,
                format="%.3f",
                key="onprem_lb",
                help="Load balancer amortized cost (default: $0.010)"
            )
            
            onprem_power_cooling = st.number_input(
                "Power & Cooling",
                min_value=0.01,
                max_value=0.10,
                value=0.030,
                step=0.005,
                format="%.3f",
                key="onprem_power",
                help="Datacenter power and cooling (default: $0.030)"
            )
            
            onprem_datacenter_space = st.number_input(
                "Datacenter Space",
                min_value=0.0,
                max_value=0.05,
                value=0.015,
                step=0.005,
                format="%.3f",
                key="onprem_space",
                help="Rack space and facilities (default: $0.015)"
            )
            
            onprem_maintenance = st.number_input(
                "Maintenance & Support",
                min_value=0.0,
                max_value=0.10,
                value=0.025,
                step=0.005,
                format="%.3f",
                key="onprem_maint",
                help="Hardware maintenance contracts (default: $0.025)"
            )
            
            onprem_k8s_management = st.number_input(
                "K8s Management Cost",
                min_value=0.0,
                max_value=0.20,
                value=0.05,
                step=0.01,
                format="%.3f",
                key="onprem_k8s",
                help="Self-managed K8s operational cost (default: $0.05)"
            )
        
        # Reset button
        if st.button("🔄 Reset All Pricing to Defaults", type="secondary"):
            st.rerun()
        
        # LLM Configuration
        st.subheader("LLM Settings")
        selected_model = st.selectbox(
            "Select LLM Model",
            list(MODELS.keys()),
            index=2,  # Default to Llama 3.3 70B
            help="Choose the LLM model for inference workloads"
        )
        
        selected_gpu = st.selectbox(
            "Select GPU Type",
            list(GPUS.keys()),
            index=1,  # Default to H100 80GB
            help="GPU type for LLM inference nodes"
        )
        
        precision = st.selectbox(
            "Model Precision",
            ["FP16", "INT8", "INT4"],
            index=0,  # Default to FP16
            help="Model precision affects memory usage and quality"
        )
        
        # Workload Configuration
        st.subheader("Workload Settings")
        conversations_per_minute = st.number_input(
            "Conversations per Minute",
            min_value=1,
            max_value=5000,
            value=200,
            step=10,
            help="Expected conversation throughput across all tenants"
        )
        
        tokens_per_conversation = st.number_input(
            "Tokens per Conversation",
            min_value=500,
            max_value=20000,
            value=2000,
            step=100,
            help="Average tokens per conversation (input + output)"
        )
        
        # Time period
        time_period = st.selectbox(
            "Cost Calculation Period",
            ["Monthly (30 days)", "Weekly (7 days)", "Daily (1 day)", "Hourly"],
            index=0
        )
        
        days_map = {
            "Monthly (30 days)": 30,
            "Weekly (7 days)": 7,
            "Daily (1 day)": 1,
            "Hourly": 1/24
        }
        days = days_map[time_period]
    
    # Calculate all requirements
    infrastructure = calculate_detailed_infrastructure(num_tenants, apps_per_tenant)
    
    # Apply custom pricing - create modified copies to avoid global state issues
    def apply_custom_pricing():
        """Apply user-configured pricing to global dictionaries"""
        # Update AWS pricing with user-configured values
        CLOUD_PRICING['AWS']['cost_per_node_hour'] = aws_standard_node
        CLOUD_PRICING['AWS']['vectordb_node']['cost_per_hour'] = aws_vectordb_node
        CLOUD_PRICING['AWS']['jump_host']['cost_per_hour'] = aws_jump_host
        CLOUD_PRICING['AWS']['managed_k8s_cost'] = aws_k8s_management
        
        # Update AWS GPU pricing
        GPUS["H200 141GB"]["pricing"]["aws"] = aws_h200
        GPUS["H100 80GB"]["pricing"]["aws"] = aws_h100
        GPUS["A100 80GB"]["pricing"]["aws"] = aws_a100_80
        GPUS["A100 40GB"]["pricing"]["aws"] = aws_a100_40
        GPUS["L40S"]["pricing"]["aws"] = aws_l40s
        
        # Update Azure pricing with user-configured values
        CLOUD_PRICING['Azure']['cost_per_node_hour'] = azure_standard_node
        CLOUD_PRICING['Azure']['vectordb_node']['cost_per_hour'] = azure_vectordb_node
        CLOUD_PRICING['Azure']['jump_host']['cost_per_hour'] = azure_jump_host
        CLOUD_PRICING['Azure']['managed_k8s_cost'] = azure_k8s_management
        
        # Update Azure GPU pricing
        GPUS["H200 141GB"]["pricing"]["azure"] = azure_h200
        GPUS["H100 80GB"]["pricing"]["azure"] = azure_h100
        GPUS["A100 80GB"]["pricing"]["azure"] = azure_a100_80
        GPUS["A100 40GB"]["pricing"]["azure"] = azure_a100_40
        
        # Update GCP pricing with user-configured values
        CLOUD_PRICING['GCP']['cost_per_node_hour'] = gcp_standard_node
        CLOUD_PRICING['GCP']['vectordb_node']['cost_per_hour'] = gcp_vectordb_node
        CLOUD_PRICING['GCP']['jump_host']['cost_per_hour'] = gcp_jump_host
        CLOUD_PRICING['GCP']['managed_k8s_cost'] = gcp_k8s_management
        
        # Update GCP GPU pricing
        GPUS["H100 80GB"]["pricing"]["gcp"] = gcp_h100
        GPUS["A100 80GB"]["pricing"]["gcp"] = gcp_a100_80
        GPUS["A100 40GB"]["pricing"]["gcp"] = gcp_a100_40
        
        # Update On-Premise pricing with user-configured values
        CLOUD_PRICING['On-Premise']['cost_per_node_hour'] = onprem_standard_node
        CLOUD_PRICING['On-Premise']['vectordb_node']['cost_per_hour'] = onprem_vectordb_node
        CLOUD_PRICING['On-Premise']['jump_host']['cost_per_hour'] = onprem_jump_host
        CLOUD_PRICING['On-Premise']['managed_k8s_cost'] = onprem_k8s_management
        
        # Update on-premise additional services
        CLOUD_PRICING['On-Premise']['additional_services'] = {
            'Network_Infrastructure': {'cost_per_hour': onprem_network, 'description': 'Switches, routers, firewalls (amortized)'},
            'Storage_SAN': {'cost_per_gb_month': onprem_storage_per_gb, 'description': 'SAN/NAS storage (1TB base, amortized)'},
            'Hardware_Load_Balancer': {'cost_per_hour': onprem_load_balancer, 'description': 'F5/Citrix ADC (amortized)'},
            'Power_Cooling': {'cost_per_hour': onprem_power_cooling, 'description': 'Datacenter power and cooling'},
            'Datacenter_Space': {'cost_per_hour': onprem_datacenter_space, 'description': 'Rack space and facilities costs'},
            'Maintenance_Support': {'cost_per_hour': onprem_maintenance, 'description': 'Hardware maintenance and vendor support contracts'}
        }
        
        # Update on-premise GPU pricing based on AWS prices and multiplier
        for gpu_name in GPUS.keys():
            if 'aws' in GPUS[gpu_name]['pricing'] and GPUS[gpu_name]['pricing']['aws'] != 'NA':
                aws_price = GPUS[gpu_name]['pricing']['aws']
                GPUS[gpu_name]['pricing']['on-premise'] = round(aws_price * onprem_gpu_multiplier, 2)
    
    # Apply all custom pricing
    apply_custom_pricing()
    
    precision_bytes = {
        "FP16": 2,
        "INT8": 1,
        "INT4": 0.5
    }[precision]
    
    model_spec = MODELS[selected_model]
    gpu_spec = GPUS[selected_gpu]
    
    gpu_requirements = calculate_gpu_requirements(
        conversations_per_minute, tokens_per_conversation, 
        model_spec, gpu_spec, precision_bytes
    )
    
    # Main Dashboard
    st.header("📊 Infrastructure Overview")
    
    st.markdown("---")  # Visual separator
    
    # Row 1: Core Metrics - Use 4 columns for better spacing
    col1, col2, col3, col4 = st.columns(4)
    
    with col1:
        st.metric(
            label="🏢 Total Tenants", 
            value=f"{num_tenants}",
            help="Number of tenant environments"
        )
    
    with col2:
        st.metric(
            label="📦 Apps per Tenant",
            value=f"{apps_per_tenant}",
            help=f"Total applications: {infrastructure['totals']['total_apps']}"
        )
    
    with col3:
        st.metric(
            label="🖥️ Worker Nodes", 
            value=f"{infrastructure['totals']['total_nodes']}",
            help=f"Standard: {infrastructure['totals']['total_standard_nodes']}, VectorDB: {infrastructure['totals']['total_vectordb_nodes']}"
        )
    
    with col4:
        gpu_display = f"{gpu_requirements['actual_gpus_allocated']} GPUs"
        if gpu_requirements['best_config']:
            gpu_detail = f"({gpu_requirements['best_config']['num_nodes']} nodes)"
        else:
            gpu_detail = ""
        st.metric(
            label="🎮 GPU Resources", 
            value=gpu_display,
            delta=gpu_detail,
            help=f"Configuration: {gpu_requirements['best_config']['num_nodes']}×{gpu_requirements['best_config']['gpus_per_node']} GPUs" if gpu_requirements['best_config'] else "GPU allocation"
        )
    
    # Row 2: Performance Metrics
    col1, col2, col3, col4 = st.columns(4)
    
    with col1:
        st.metric(
            label="💬 Target Load", 
            value=f"{conversations_per_minute}",
            delta="conv/min",
            help="Target conversation throughput"
        )
    
    with col2:
        st.metric(
            label="📈 Max Capacity", 
            value=f"{gpu_requirements['max_conversations_per_minute']:.0f}",
            delta="conv/min",
            help="Maximum system capacity"
        )
    
    with col3:
        capacity_headroom = gpu_requirements['max_conversations_per_minute'] - conversations_per_minute
        headroom_percentage = (capacity_headroom / gpu_requirements['max_conversations_per_minute']) * 100
        st.metric(
            label="📊 Capacity Headroom", 
            value=f"{headroom_percentage:.1f}%",
            delta=f"{capacity_headroom:.0f} conv/min available",
            help="Available capacity beyond current target load"
        )
    
    with col4:
        bottleneck_icon = "💾" if gpu_requirements['bottleneck'] == 'Memory' else "⚡"
        st.metric(
            label=f"{bottleneck_icon} Bottleneck", 
            value=gpu_requirements['bottleneck'],
            help="Primary system constraint"
        )
    
    st.markdown("---")  # Visual separator
    
    # Create tabs for detailed views
    tab1, tab2, tab3, tab4, tab5 = st.tabs([
        "🗏 Platform Infrastructure", 
        "🖥️ GPU Requirements", 
        "💰 Cost Analysis", 
        "📈 Performance Analysis",
        "🔧 Technical Specifications"
    ])
    
    with tab1:
        st.subheader("Platform Infrastructure Breakdown")
        
        # Show deployment node scaling info
        st.info(f"📦 **Deployment Node Scaling**: {infrastructure['totals']['deploy_nodes_per_tenant']} deployment node(s) per tenant for {apps_per_tenant} apps (1 node per 4 apps)")
        
        # Platform nodes breakdown
        breakdown_data = []
        for node_type, details in infrastructure['node_breakdown'].items():
            per_tenant_value = details['tenant'] // num_tenants if num_tenants > 0 and details['tenant'] > 0 else 0
            breakdown_data.append({
                'Node Type': node_type,
                'Base': details['base'] if details['base'] > 0 else '-',
                'Per Tenant': per_tenant_value if per_tenant_value > 0 else '-',
                'Total': details['total'],
                'CPU': details['cores'],
                'RAM (GB)': details['ram'],
                'VM Type': details.get('node_type', 'Standard'),
                'Purpose': details['purpose']
            })
        
        breakdown_df = pd.DataFrame(breakdown_data)
        
        # Use column configuration for better display
        st.dataframe(
            breakdown_df, 
            use_container_width=True,
            hide_index=True,
            column_config={
                "Node Type": st.column_config.TextColumn("Node Type", width="medium"),
                "Base": st.column_config.TextColumn("Base", width="small"),
                "Per Tenant": st.column_config.TextColumn("Per Tenant", width="small"),
                "Total": st.column_config.NumberColumn("Total", width="small"),
                "CPU": st.column_config.NumberColumn("CPU", width="small"),
                "RAM (GB)": st.column_config.NumberColumn("RAM (GB)", width="small"),
                "VM Type": st.column_config.TextColumn("VM Type", width="medium"),
                "Purpose": st.column_config.TextColumn("Purpose", width="large")
            }
        )
        
        # Visual breakdown
        col1, col2 = st.columns(2)
        
        with col1:
            # Node distribution pie chart
            node_counts = {node_type: details['total'] 
                          for node_type, details in infrastructure['node_breakdown'].items() 
                          if details['total'] > 0}
            
            fig_nodes = px.pie(
                values=list(node_counts.values()),
                names=list(node_counts.keys()),
                title="Platform Node Distribution"
            )
            st.plotly_chart(fig_nodes, use_container_width=True)
        
        with col2:
            # Resource distribution
            resource_data = []
            for node_type, details in infrastructure['node_breakdown'].items():
                if details['total'] > 0:
                    resource_data.extend([
                        {'Node Type': node_type, 'Resource': 'CPU Cores', 'Amount': details['cores']},
                        {'Node Type': node_type, 'Resource': 'RAM (GB)', 'Amount': details['ram']}
                    ])
            
            resource_df = pd.DataFrame(resource_data)
            fig_resources = px.bar(
                resource_df,
                x='Node Type',
                y='Amount',
                color='Resource',
                title='Resource Distribution by Node Type',
                barmode='group'
            )
            st.plotly_chart(fig_resources, use_container_width=True)
        
        # Node type distribution
        st.subheader("Node Type Distribution")
        col1, col2 = st.columns(2)
        with col1:
            st.metric(
                "Standard Nodes (8 vCPU, 32GB RAM)",
                infrastructure['totals']['total_standard_nodes'],
                help="Platform, Compute, and Deploy nodes"
            )
        with col2:
            st.metric(
                "High-Memory Nodes (16 vCPU, 64GB RAM)",
                infrastructure['totals']['total_vectordb_nodes'],
                help="VectorDB nodes with higher memory capacity"
            )
    
    with tab2:
        st.subheader("GPU Requirements Analysis")
        
        # GPU requirements metrics
        col1, col2, col3, col4 = st.columns(4)
        
        with col1:
            st.metric(
                "Memory-based GPUs", 
                gpu_requirements['gpus_needed_memory'],
                help="GPUs needed to fit model in memory"
            )
        
        with col2:
            st.metric(
                "Throughput-based GPUs", 
                gpu_requirements['gpus_needed_throughput'],
                help="GPUs needed for required throughput"
            )
        
        with col3:
            st.metric(
                "Logical GPUs Needed", 
                gpu_requirements['total_gpus_needed'],
                help="Minimum GPUs needed (before node configuration)"
            )
        
        with col4:
            st.metric(
                "Actual GPUs Allocated", 
                gpu_requirements['actual_gpus_allocated'],
                help="GPUs allocated based on standard node configurations",
                delta=gpu_requirements['actual_gpus_allocated'] - gpu_requirements['total_gpus_needed']
            )
        
        # GPU Node Configuration Analysis
        st.subheader("🖥️ GPU Node Configuration Options")
        
        if gpu_requirements['gpu_configurations']:
            # Display configuration options in a table
            config_data = []
            for config in gpu_requirements['gpu_configurations']:
                efficiency_score = f"{config['utilization']:.1f}%"
                memory_compatible = "✅" if config['meets_memory_req'] else "❌"
                
                config_data.append({
                    "GPUs/Node": config['gpus_per_node'],
                    "Mem": memory_compatible,
                    "Nodes": config['num_nodes'],
                    "Total GPUs": config['total_gpus_allocated'],
                    "GPU Util": efficiency_score,
                    "Waste": config['gpu_waste'],
                    "Mem Util": f"{config['memory_utilization']:.1f}%"
                })
            
            config_df = pd.DataFrame(config_data)
            st.dataframe(
                config_df, 
                use_container_width=True,
                hide_index=True,
                column_config={
                    "GPUs/Node": st.column_config.NumberColumn("GPUs/Node", width="small"),
                    "Mem": st.column_config.TextColumn("Mem ✓", width="small"),
                    "Nodes": st.column_config.NumberColumn("Nodes", width="small"),
                    "Total GPUs": st.column_config.NumberColumn("Total GPUs", width="small"),
                    "GPU Util": st.column_config.TextColumn("GPU Util", width="small"),
                    "Waste": st.column_config.NumberColumn("Waste", width="small"),
                    "Mem Util": st.column_config.TextColumn("Mem Util", width="small")
                }
            )
            
            # Highlight the recommended configuration
            if gpu_requirements['best_config']:
                best = gpu_requirements['best_config']
                st.success(f"💡 **Recommended Configuration**: {best['num_nodes']} nodes × {best['gpus_per_node']} GPUs = {best['total_gpus_allocated']} total GPUs ({best['utilization']:.1f}% utilization)")
            
            # Show minimum requirement info
            st.info(f"**Memory Constraint**: Minimum {gpu_requirements['min_gpus_per_node']} GPUs per node required to fit {gpu_requirements['model_memory_gb']:.1f}GB model in {gpu_spec['memory']}GB GPU memory")
        
        # GPU configuration visualization
        col1, col2 = st.columns(2)
        
        with col1:
            # Node configuration comparison
            if gpu_requirements['gpu_configurations']:
                config_chart_data = pd.DataFrame(gpu_requirements['gpu_configurations'])
                fig_configs = px.bar(
                    config_chart_data,
                    x='gpus_per_node',
                    y='utilization',
                    title='GPU Utilization by Node Configuration',
                    labels={'gpus_per_node': 'GPUs per Node', 'utilization': 'Utilization (%)'}
                )
                st.plotly_chart(fig_configs, use_container_width=True)
        
        with col2:
            # GPU allocation vs requirement
            allocation_data = pd.DataFrame({
                'Metric': ['Required GPUs', 'Allocated GPUs'],
                'Count': [gpu_requirements['total_gpus_needed'], gpu_requirements['actual_gpus_allocated']]
            })
            
            fig_allocation = px.bar(
                allocation_data,
                x='Metric',
                y='Count',
                title='GPU Allocation vs Requirement',
                color='Metric'
            )
            st.plotly_chart(fig_allocation, use_container_width=True)
        
        # Model and GPU specifications
        st.subheader("🔧 Model & GPU Specifications")
        
        # GPU configuration table
        gpu_config_data = [{
            'Model': selected_model,
            'Parameters': f"{model_spec['params']}B ({model_spec['active_params']}B active)" if model_spec['params'] != model_spec['active_params'] else f"{model_spec['params']}B",
            'Model Memory Required': f"{gpu_requirements['model_memory_gb']:.1f} GB",
            'GPU Type': selected_gpu,
            'GPU Memory per Unit': f"{gpu_spec['memory']} GB",
            'GPUs Required (Logic)': gpu_requirements['total_gpus_needed'],
            'GPUs Allocated (Actual)': gpu_requirements['actual_gpus_allocated'],
            'GPU Nodes': f"{gpu_requirements['best_config']['num_nodes']} nodes × {gpu_requirements['best_config']['gpus_per_node']} GPUs" if gpu_requirements['best_config'] else 'N/A',
            'Total GPU Memory': f"{gpu_requirements['actual_gpus_allocated'] * gpu_spec['memory']} GB",
            'Memory Utilization': f"{(gpu_requirements['model_memory_gb'] / (gpu_requirements['actual_gpus_allocated'] * gpu_spec['memory']) * 100):.1f}%",
            'Precision': precision
        }]
        
        gpu_config_df = pd.DataFrame(gpu_config_data)
        st.dataframe(gpu_config_df, use_container_width=True)
        
        # Performance metrics
        col1, col2 = st.columns(2)
        
        with col1:
            # TPS comparison
            tps_data = pd.DataFrame({
                'Metric': ['Required TPS', 'Single GPU TPS', 'Total System TPS'],
                'Value': [
                    gpu_requirements['required_tps'],
                    gpu_requirements['estimated_tps'],
                    gpu_requirements['total_system_tps']
                ]
            })
            
            fig_tps = px.bar(
                tps_data,
                x='Metric',
                y='Value',
                title='Tokens Per Second Analysis',
                color='Metric'
            )
            st.plotly_chart(fig_tps, use_container_width=True)
        
        with col2:
            # Capacity utilization
            utilization_data = pd.DataFrame({
                'Metric': ['Required Capacity', 'Available Capacity'],
                'Conversations/Min': [
                    conversations_per_minute,
                    gpu_requirements['max_conversations_per_minute']
                ]
            })
            
            fig_capacity = px.bar(
                utilization_data,
                x='Metric',
                y='Conversations/Min',
                title='Conversation Capacity Analysis',
                color='Metric'
            )
            st.plotly_chart(fig_capacity, use_container_width=True)
    
    with tab3:
        st.subheader("Comprehensive Cost Analysis")
        
        # Show customization status for all providers
        default_values = {
            'aws': {'standard': 0.384, 'vectordb': 0.768, 'jump': 0.096, 'k8s': 0.10},
            'azure': {'standard': 0.384, 'vectordb': 0.768, 'jump': 0.096, 'k8s': 0.0},
            'gcp': {'standard': 0.379, 'vectordb': 0.758, 'jump': 0.067, 'k8s': 0.10},
            'onprem': {'standard': 0.192, 'vectordb': 0.384, 'jump': 0.048, 'gpu_mult': 0.55, 'k8s': 0.05}
        }
        
        customizations = []
        
        # Check AWS customizations
        if (aws_standard_node != default_values['aws']['standard'] or 
            aws_vectordb_node != default_values['aws']['vectordb'] or
            aws_k8s_management != default_values['aws']['k8s']):
            customizations.append("AWS")
        
        # Check Azure customizations
        if (azure_standard_node != default_values['azure']['standard'] or 
            azure_vectordb_node != default_values['azure']['vectordb'] or
            azure_k8s_management != default_values['azure']['k8s']):
            customizations.append("Azure")
        
        # Check GCP customizations
        if (gcp_standard_node != default_values['gcp']['standard'] or 
            gcp_vectordb_node != default_values['gcp']['vectordb'] or
            gcp_k8s_management != default_values['gcp']['k8s']):
            customizations.append("GCP")
        
        # Check On-Premise customizations
        if (onprem_standard_node != default_values['onprem']['standard'] or
            onprem_vectordb_node != default_values['onprem']['vectordb'] or
            onprem_gpu_multiplier != default_values['onprem']['gpu_mult'] or
            onprem_k8s_management != default_values['onprem']['k8s']):
            customizations.append("On-Premise")
        
        if customizations:
            st.warning(f"""
            **✏️ Custom Pricing Active for: {', '.join(customizations)}**
            
            Using user-configured pricing instead of defaults. View details in Technical Specifications tab or adjust in sidebar.
            """)
        
        # Add info box about cost models
        st.info("""
        **💡 Cost Model Information**: 
        - **Cloud Providers (AWS/Azure/GCP)**: Pay-as-you-go pricing with per-hour compute and GPU costs
        - **On-Premise**: Hardware amortized over 3-year lifecycle + operating costs (power, cooling, maintenance)
        - **Customization**: All pricing values can be adjusted in the sidebar to match your actual costs
        
        **🔧 Customize:** Use the sidebar "Cloud Provider Pricing" sections to adjust costs
        """)
        
        # Calculate costs for all providers
        all_costs = {}
        for provider in CLOUD_PRICING.keys():
            all_costs[provider] = calculate_detailed_costs(
                provider, infrastructure, gpu_requirements, gpu_spec, days
            )
        
        # Cost comparison table
        cost_comparison_data = []
        for provider, costs in all_costs.items():
            gpu_available = costs['totals']['gpu_available']
            cost_comparison_data.append({
                'Provider': provider,
                'GPU': '✅' if gpu_available else '❌',
                'Platform': f"${costs['totals']['platform_cost']:.2f}",
                'GPU Cost': format_cost_for_display(costs['totals']['gpu_cost'], gpu_available),
                'Total': format_cost_for_display(costs['totals']['total_cost'], gpu_available),
                'Per Hour': format_cost_for_display(costs['totals']['cost_per_hour'], gpu_available),
                'Per Day': format_cost_for_display(costs['totals']['cost_per_day'], gpu_available),
                'Total_Numeric': costs['totals']['total_cost'] if gpu_available else None,
                'GPU_Available': gpu_available
            })
        
        cost_df = pd.DataFrame(cost_comparison_data)
        display_cost_df = cost_df.drop(['Total_Numeric', 'GPU_Available'], axis=1)
        
        st.dataframe(
            display_cost_df, 
            use_container_width=True,
            hide_index=True,
            column_config={
                "Provider": st.column_config.TextColumn("Provider", width="medium"),
                "GPU": st.column_config.TextColumn("GPU ✓", width="small"),
                "Platform": st.column_config.TextColumn("Platform Cost", width="medium"),
                "GPU Cost": st.column_config.TextColumn("GPU Cost", width="medium"),
                "Total": st.column_config.TextColumn("Total Cost", width="medium"),
                "Per Hour": st.column_config.TextColumn("$/Hour", width="medium"),
                "Per Day": st.column_config.TextColumn("$/Day", width="medium")
            }
        )
        
        # Add download button for cost report
        report_data = create_downloadable_cost_report(
            all_costs, infrastructure, gpu_requirements, model_spec, gpu_spec,
            selected_model, selected_gpu, num_tenants, apps_per_tenant, conversations_per_minute,
            tokens_per_conversation, precision, time_period
        )
        
        st.download_button(
            label="📥 Download Complete Cost Report (JSON)",
            data=json.dumps(report_data, indent=2),
            file_name=f"llmops_cost_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json",
            mime="application/json",
            help="Download comprehensive cost analysis with all services and configurations"
        )
        
        # Create CSV version for easier viewing
        csv_data = []
        for provider, provider_data in report_data['cost_breakdown_by_provider'].items():
            if provider_data['gpu_available']:
                csv_data.append({
                    'Provider': provider,
                    'GPU_Available': 'Yes',
                    'Platform_Nodes': infrastructure['totals']['total_standard_nodes'],
                    'VectorDB_Nodes': infrastructure['totals']['total_vectordb_nodes'],
                    'GPU_Nodes': gpu_requirements['total_gpus_needed'],
                    'Kubernetes_Nodes_Cost': provider_data['platform_costs']['kubernetes_nodes'],
                    'VectorDB_Nodes_Cost': provider_data['platform_costs']['vectordb_nodes'],
                    'Jump_Host_Cost': provider_data['platform_costs']['jump_host'],
                    'Additional_Services_Cost': provider_data['platform_costs']['additional_services'],
                    'K8s_Management_Cost': provider_data['platform_costs']['k8s_management'],
                    'Total_Platform_Cost': provider_data['platform_costs']['platform_total'],
                    'GPU_Cost_Per_Hour': provider_data['gpu_costs']['gpu_cost_per_hour'],
                    'Total_GPU_Cost': provider_data['gpu_costs']['total_gpu_cost'],
                    'Total_Infrastructure_Cost': provider_data['totals']['total_cost'],
                    'Cost_Per_Hour': provider_data['totals']['cost_per_hour'],
                    'Cost_Per_Day': provider_data['totals']['cost_per_day']
                })
            else:
                csv_data.append({
                    'Provider': provider,
                    'GPU_Available': 'No',
                    'Platform_Nodes': infrastructure['totals']['total_standard_nodes'],
                    'VectorDB_Nodes': infrastructure['totals']['total_vectordb_nodes'],
                    'GPU_Nodes': 'N/A',
                    'Kubernetes_Nodes_Cost': provider_data['platform_costs']['kubernetes_nodes'],
                    'VectorDB_Nodes_Cost': provider_data['platform_costs']['vectordb_nodes'],
                    'Jump_Host_Cost': provider_data['platform_costs']['jump_host'],
                    'Additional_Services_Cost': provider_data['platform_costs']['additional_services'],
                    'K8s_Management_Cost': provider_data['platform_costs']['k8s_management'],
                    'Total_Platform_Cost': provider_data['platform_costs']['platform_total'],
                    'GPU_Cost_Per_Hour': 'N/A',
                    'Total_GPU_Cost': 'N/A',
                    'Total_Infrastructure_Cost': 'N/A',
                    'Cost_Per_Hour': 'N/A',
                    'Cost_Per_Day': 'N/A'
                })
        
        csv_df = pd.DataFrame(csv_data)
        csv_string = csv_df.to_csv(index=False)
        
        st.download_button(
            label="📊 Download Cost Summary (CSV)",
            data=csv_string,
            file_name=f"llmops_cost_summary_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv",
            mime="text/csv",
            help="Download cost summary in CSV format for spreadsheet analysis"
        )
        
        # Cost breakdown visualization - only for providers with GPU available
        available_providers_data = cost_df[cost_df['GPU_Available'] == True]
        
        col1, col2 = st.columns(2)
        
        with col1:
            # Provider comparison - only available providers
            if not available_providers_data.empty:
                fig_provider_comparison = px.bar(
                    available_providers_data,
                    x='Provider',
                    y='Total_Numeric',
                    title=f'Total Cost Comparison ({time_period}) - All Deployment Options',
                    labels={'Total_Numeric': 'Total Cost (USD)'},
                    color='Provider'
                )
                st.plotly_chart(fig_provider_comparison, use_container_width=True)
            else:
                st.warning("⚠️ No providers have the selected GPU available for cost comparison")
        
        with col2:
            # Cost breakdown for selected provider (cheapest available)
            available_providers = get_available_providers_for_gpu(gpu_spec)
            
            if available_providers:
                cheapest_provider = min(available_providers, 
                                      key=lambda x: all_costs[x]['totals']['total_cost'])
                cheapest_costs = all_costs[cheapest_provider]
                
                breakdown_values = [
                    cheapest_costs['totals']['platform_cost'],
                    cheapest_costs['totals']['gpu_cost']
                ]
                breakdown_labels = ['Platform Infrastructure', 'GPU Infrastructure']
                
                fig_breakdown = px.pie(
                    values=breakdown_values,
                    names=breakdown_labels,
                    title=f'{cheapest_provider} - Cost Breakdown'
                )
                st.plotly_chart(fig_breakdown, use_container_width=True)
            else:
                st.warning("⚠️ No providers have the selected GPU available")
        
        # Detailed cost breakdown for cheapest available provider
        available_providers = get_available_providers_for_gpu(gpu_spec)
        
        if available_providers:
            cheapest_provider = min(available_providers, 
                                  key=lambda x: all_costs[x]['totals']['total_cost'])
            
            st.subheader(f"💡 Most Cost-Effective Option: {cheapest_provider}")
            
            if cheapest_provider == 'On-Premise':
                st.success(f"✅ **On-Premise deployment offers the lowest cost** with {selected_gpu}")
                st.info("💰 **Note**: On-premise costs assume 3-year hardware amortization. Initial capex and datacenter setup costs are not included in hourly rates.")
            else:
                st.info(f"✅ **{selected_gpu} is available on {cheapest_provider}**")
            
            cheapest_costs = all_costs[cheapest_provider]
            
            col1, col2, col3 = st.columns(3)
            
            with col1:
                st.metric(
                    "Platform Infrastructure",
                    f"${cheapest_costs['totals']['platform_cost']:.2f}",
                    help="Kubernetes nodes (including VectorDB), networking, storage, management"
                )
            
            with col2:
                st.metric(
                    "GPU Infrastructure", 
                    f"${cheapest_costs['totals']['gpu_cost']:.2f}",
                    help=f"{gpu_requirements['total_gpus_needed']} x {selected_gpu}"
                )
            
            with col3:
                # Calculate savings compared to most expensive available provider
                if len(available_providers) > 1:
                    most_expensive_available = max(available_providers, 
                                                 key=lambda x: all_costs[x]['totals']['total_cost'])
                    savings = all_costs[most_expensive_available]['totals']['total_cost'] - cheapest_costs['totals']['total_cost']
                    savings_pct = (savings / all_costs[most_expensive_available]['totals']['total_cost']) * 100
                    st.metric(
                        "Potential Savings",
                        f"${savings:.2f}",
                        help=f"Savings compared to {most_expensive_available} ({savings_pct:.1f}%)"
                    )
                else:
                    st.metric(
                        "Provider Status",
                        "Only Option",
                        help="This is the only provider with the selected GPU available"
                    )
            
            # Cloud vs On-Premise comparison if both are available
            if 'On-Premise' in available_providers and len(available_providers) > 1:
                st.subheader("☁️ Cloud vs 🏢 On-Premise Comparison")
                
                onprem_cost = all_costs['On-Premise']['totals']['total_cost']
                cloud_providers = [p for p in available_providers if p != 'On-Premise']
                
                comparison_data = []
                for provider in ['On-Premise'] + cloud_providers:
                    comparison_data.append({
                        'Deployment Type': 'On-Premise' if provider == 'On-Premise' else 'Cloud',
                        'Provider': provider,
                        'Total Cost': all_costs[provider]['totals']['total_cost'],
                        'Platform Cost': all_costs[provider]['totals']['platform_cost'],
                        'GPU Cost': all_costs[provider]['totals']['gpu_cost']
                    })
                
                comp_df = pd.DataFrame(comparison_data)
                
                # Create grouped bar chart
                fig_comparison = go.Figure()
                fig_comparison.add_trace(go.Bar(
                    name='Platform Cost',
                    x=comp_df['Provider'],
                    y=comp_df['Platform Cost'],
                    marker_color='lightblue'
                ))
                fig_comparison.add_trace(go.Bar(
                    name='GPU Cost',
                    x=comp_df['Provider'],
                    y=comp_df['GPU Cost'],
                    marker_color='orange'
                ))
                
                fig_comparison.update_layout(
                    title='Cost Breakdown: On-Premise vs Cloud',
                    xaxis_title='Provider',
                    yaxis_title='Cost (USD)',
                    barmode='stack'
                )
                st.plotly_chart(fig_comparison, use_container_width=True)
                
                # Calculate average cloud cost
                avg_cloud_cost = sum([all_costs[p]['totals']['total_cost'] for p in cloud_providers]) / len(cloud_providers)
                cloud_savings = avg_cloud_cost - onprem_cost
                cloud_savings_pct = (cloud_savings / avg_cloud_cost) * 100
                
                if cloud_savings > 0:
                    st.success(f"💰 **On-Premise Savings**: ${cloud_savings:.2f} ({cloud_savings_pct:.1f}%) compared to average cloud cost over {time_period}")
                else:
                    st.info(f"☁️ **Cloud is more cost-effective** for this configuration over {time_period}")
                
        else:
            st.error(f"❌ **No Providers Available**: The selected GPU ({selected_gpu}) is not available on any deployment option")
            st.warning("**Recommendation**: Please select a different GPU model that is available")
            
            # Show which GPUs are available on which providers
            st.subheader("🔍 GPU Availability by Provider")
            availability_data = []
            for gpu_name, gpu_data in GPUS.items():
                available_on = get_available_providers_for_gpu(gpu_data)
                availability_data.append({
                    'GPU Model': gpu_name,
                    'Memory': f"{gpu_data['memory']} GB",
                    'Available On': ', '.join(available_on) if available_on else 'None',
                    'Deployment Options': len(available_on)
                })
            
            availability_df = pd.DataFrame(availability_data)
            availability_df = availability_df.sort_values('Deployment Options', ascending=False)
            st.dataframe(availability_df, use_container_width=True)
    
    with tab4:
        st.subheader("Performance Analysis & Scaling")
        
        # Performance metrics
        col1, col2, col3 = st.columns(3)
        
        with col1:
            st.metric(
                "Total System TPS",
                f"{gpu_requirements['total_system_tps']:.0f}",
                help="Combined throughput of all GPUs"
            )
        
        with col2:
            st.metric(
                "Conversation Capacity",
                f"{gpu_requirements['max_conversations_per_minute']:.0f}/min",
                help="Maximum conversations the system can handle"
            )
        
        with col3:
            capacity_headroom = gpu_requirements['max_conversations_per_minute'] - conversations_per_minute
            headroom_percentage = (capacity_headroom / gpu_requirements['max_conversations_per_minute']) * 100
            st.metric(
                "Capacity Headroom",
                f"{headroom_percentage:.1f}%",
                delta=f"{capacity_headroom:.0f} conv/min available",
                help="Available capacity beyond current target load"
            )
        
        # Scaling analysis
        st.subheader("Scaling Analysis")
        
        # Create scaling scenarios
        scaling_scenarios = [0.5, 1.0, 1.5, 2.0, 3.0, 5.0]
        scaling_data = []
        
        for multiplier in scaling_scenarios:
            scaled_conversations = int(conversations_per_minute * multiplier)
            scaled_gpu_reqs = calculate_gpu_requirements(
                scaled_conversations, tokens_per_conversation,
                model_spec, gpu_spec, precision_bytes
            )
            
            scaling_data.append({
                'Load Multiplier': f"{multiplier}x",
                'Conversations/Min': scaled_conversations,
                'Logical GPUs': scaled_gpu_reqs['total_gpus_needed'],
                'Allocated GPUs': scaled_gpu_reqs['actual_gpus_allocated'],
                'GPU Nodes': f"{scaled_gpu_reqs['best_config']['num_nodes']}×{scaled_gpu_reqs['best_config']['gpus_per_node']}" if scaled_gpu_reqs['best_config'] else 'N/A',
                'System Capacity': f"{scaled_gpu_reqs['max_conversations_per_minute']:.0f}",
                'Headroom %': f"{((scaled_gpu_reqs['max_conversations_per_minute'] - scaled_conversations) / scaled_gpu_reqs['max_conversations_per_minute'] * 100):.1f}%"
            })
        
        scaling_df = pd.DataFrame(scaling_data)
        
        st.dataframe(
            scaling_df, 
            use_container_width=True,
            hide_index=True,
            column_config={
                "Load Multiplier": st.column_config.TextColumn("Load", width="small"),
                "Conversations/Min": st.column_config.NumberColumn("Conv/Min", width="small"),
                "Logical GPUs": st.column_config.NumberColumn("Logical", width="small"),
                "Allocated GPUs": st.column_config.NumberColumn("Allocated", width="small"),
                "GPU Nodes": st.column_config.TextColumn("GPU Nodes", width="medium"),
                "System Capacity": st.column_config.TextColumn("Capacity", width="medium"),
                "Headroom %": st.column_config.TextColumn("Headroom %", width="small")
            }
        )
        
        # Scaling visualization
        fig_scaling = go.Figure()
        
        # Add lines for both logical and allocated GPUs
        fig_scaling.add_trace(go.Scatter(
            x=[float(x.replace('x', '')) for x in scaling_df['Load Multiplier']],
            y=scaling_df['Logical GPUs'].astype(int),
            mode='lines+markers',
            name='Logical GPUs Required',
            line=dict(color='blue', dash='dash')
        ))
        
        fig_scaling.add_trace(go.Scatter(
            x=[float(x.replace('x', '')) for x in scaling_df['Load Multiplier']],
            y=scaling_df['Allocated GPUs'].astype(int),
            mode='lines+markers',
            name='Allocated GPUs (Actual)',
            line=dict(color='red')
        ))
        
        fig_scaling.update_layout(
            title='GPU Scaling Requirements (Logical vs Allocated)',
            xaxis_title='Load Multiplier',
            yaxis_title='Number of GPUs'
        )
        st.plotly_chart(fig_scaling, use_container_width=True)
        
        # Application scaling analysis
        st.subheader("Application Scaling Analysis")
        
        app_scaling_scenarios = [4, 8, 12, 16, 20, 24, 32, 40]
        app_scaling_data = []
        
        for apps in app_scaling_scenarios:
            deploy_nodes = math.ceil(apps / 4)
            app_scaling_data.append({
                'Apps per Tenant': apps,
                'Total Apps': apps * num_tenants,
                'Deploy Nodes per Tenant': deploy_nodes,
                'Total Deploy Nodes': deploy_nodes * num_tenants,
                'Deploy Node Ratio': f"1:{4 if apps >= 4 else apps}"
            })
        
        app_scaling_df = pd.DataFrame(app_scaling_data)
        
        st.dataframe(
            app_scaling_df, 
            use_container_width=True,
            hide_index=True,
            column_config={
                "Apps per Tenant": st.column_config.NumberColumn("Apps/Tenant", width="small"),
                "Total Apps": st.column_config.NumberColumn("Total Apps", width="small"),
                "Deploy Nodes per Tenant": st.column_config.NumberColumn("Deploy/Tenant", width="small"),
                "Total Deploy Nodes": st.column_config.NumberColumn("Total Deploy", width="medium"),
                "Deploy Node Ratio": st.column_config.TextColumn("Ratio", width="small")
            }
        )
        
        # App scaling visualization
        fig_app_scaling = px.line(
            app_scaling_df,
            x='Apps per Tenant',
            y='Total Deploy Nodes',
            title='Deployment Nodes Scaling with Application Count',
            markers=True
        )
        st.plotly_chart(fig_app_scaling, use_container_width=True)
    
    with tab5:
        st.subheader("Technical Specifications")
        
        # Model specifications
        st.markdown("### 🤖 LLM Model Specifications")
        model_specs_data = [{
            'Property': 'Model Name',
            'Value': selected_model
        }, {
            'Property': 'Organization',
            'Value': model_spec['org']
        }, {
            'Property': 'Total Parameters',
            'Value': f"{model_spec['params']}B"
        }, {
            'Property': 'Active Parameters',
            'Value': f"{model_spec['active_params']}B"
        }, {
            'Property': 'Max Context Length',
            'Value': f"{model_spec['max_context']:,} tokens"
        }, {
            'Property': 'Base TPS',
            'Value': f"{model_spec['base_tps']:,}"
        }, {
            'Property': 'License',
            'Value': model_spec['license']
        }, {
            'Property': 'Architecture Type',
            'Value': 'Mixture of Experts (MoE)' if model_spec['params'] != model_spec['active_params'] else 'Dense Model'
        }]
        
        model_specs_df = pd.DataFrame(model_specs_data)
        st.dataframe(model_specs_df, use_container_width=True)
        
        # GPU specifications
        st.markdown("### 🖥️ GPU Specifications")
        gpu_specs_data = [{
            'Property': 'GPU Model',
            'Value': selected_gpu
        }, {
            'Property': 'Memory Capacity',
            'Value': f"{gpu_spec['memory']} GB"
        }, {
            'Property': 'Compute Capability',
            'Value': gpu_spec['compute']
        }, {
            'Property': 'TPS Range',
            'Value': f"{gpu_spec['tps_min']:,} - {gpu_spec['tps_max']:,}"
        }, {
            'Property': 'Efficiency Tier',
            'Value': gpu_spec['efficiency_tier']
        }, {
            'Property': 'Model Precision',
            'Value': precision
        }]
        
        gpu_specs_df = pd.DataFrame(gpu_specs_data)
        st.dataframe(gpu_specs_df, use_container_width=True)
        
        # Platform specifications
        st.markdown("### 🗏 Platform Infrastructure Specifications")
        platform_specs_data = [{
            'Component': 'Standard K8s Nodes',
            'Specification': f"{infrastructure['totals']['total_standard_nodes']} nodes × 8 vCPUs × 32GB RAM"
        }, {
            'Component': 'VectorDB Nodes',
            'Specification': f"{infrastructure['totals']['total_vectordb_nodes']} nodes × 16 vCPUs × 64GB RAM"
        }, {
            'Component': 'GPU Nodes',
            'Specification': f"{gpu_requirements['actual_gpus_allocated']} × {selected_gpu} ({gpu_requirements['best_config']['num_nodes']} nodes × {gpu_requirements['best_config']['gpus_per_node']} GPUs)" if gpu_requirements['best_config'] else f"{gpu_requirements['total_gpus_needed']} × {selected_gpu}"
        }, {
            'Component': 'Total CPU Cores',
            'Specification': f"{infrastructure['totals']['total_cpu']} cores"
        }, {
            'Component': 'Total RAM',
            'Specification': f"{infrastructure['totals']['total_ram']} GB"
        }, {
            'Component': 'Total GPU Memory',
            'Specification': f"{gpu_requirements['actual_gpus_allocated'] * gpu_spec['memory']} GB"
        }, {
            'Component': 'Applications per Tenant',
            'Specification': f"{apps_per_tenant} apps × {num_tenants} tenants = {infrastructure['totals']['total_apps']} total apps"
        }, {
            'Component': 'Deployment Nodes per Tenant',
            'Specification': f"{infrastructure['totals']['deploy_nodes_per_tenant']} node(s) (1 node per 4 apps)"
        }]
        
        platform_specs_df = pd.DataFrame(platform_specs_data)
        st.dataframe(platform_specs_df, use_container_width=True)
        
        # Provider Pricing Configuration Summary
        st.markdown("### 💰 Provider Pricing Configuration")
        
        # Create tabs for each provider
        price_tab1, price_tab2, price_tab3, price_tab4 = st.tabs(["AWS", "Azure", "GCP", "On-Premise"])
        
        with price_tab1:
            aws_config_data = [{
                'Cost Component': 'Standard Compute Node',
                'Specification': 'm5.2xlarge (8 vCPU, 32GB)',
                'Cost per Hour': f"${aws_standard_node:.3f}",
                'Status': '✏️ Custom' if aws_standard_node != 0.384 else '✅ Default'
            }, {
                'Cost Component': 'VectorDB Node',
                'Specification': 'm5.4xlarge (16 vCPU, 64GB)',
                'Cost per Hour': f"${aws_vectordb_node:.3f}",
                'Status': '✏️ Custom' if aws_vectordb_node != 0.768 else '✅ Default'
            }, {
                'Cost Component': 'Jump Host',
                'Specification': 'm5.large (2 vCPU, 8GB)',
                'Cost per Hour': f"${aws_jump_host:.3f}",
                'Status': '✏️ Custom' if aws_jump_host != 0.096 else '✅ Default'
            }, {
                'Cost Component': 'EKS Management',
                'Specification': 'Managed Kubernetes',
                'Cost per Hour': f"${aws_k8s_management:.3f}",
                'Status': '✏️ Custom' if aws_k8s_management != 0.10 else '✅ Default'
            }, {
                'Cost Component': 'H200 141GB GPU',
                'Specification': 'Flagship+ GPU',
                'Cost per Hour': f"${aws_h200:.2f}",
                'Status': '✏️ Custom' if aws_h200 != 15.70 else '✅ Default'
            }, {
                'Cost Component': 'H100 80GB GPU',
                'Specification': 'Flagship GPU',
                'Cost per Hour': f"${aws_h100:.2f}",
                'Status': '✏️ Custom' if aws_h100 != 6.01 else '✅ Default'
            }, {
                'Cost Component': 'A100 80GB GPU',
                'Specification': 'Excellent GPU',
                'Cost per Hour': f"${aws_a100_80:.2f}",
                'Status': '✏️ Custom' if aws_a100_80 != 3.43 else '✅ Default'
            }, {
                'Cost Component': 'A100 40GB GPU',
                'Specification': 'Good GPU',
                'Cost per Hour': f"${aws_a100_40:.2f}",
                'Status': '✏️ Custom' if aws_a100_40 != 2.75 else '✅ Default'
            }, {
                'Cost Component': 'L40S GPU',
                'Specification': 'Very Good GPU',
                'Cost per Hour': f"${aws_l40s:.2f}",
                'Status': '✏️ Custom' if aws_l40s != 1.67 else '✅ Default'
            }]
            aws_config_df = pd.DataFrame(aws_config_data)
            st.dataframe(aws_config_df, use_container_width=True)
        
        with price_tab2:
            azure_config_data = [{
                'Cost Component': 'Standard Compute Node',
                'Specification': 'Standard_D8s_v3 (8 vCPU, 32GB)',
                'Cost per Hour': f"${azure_standard_node:.3f}",
                'Status': '✏️ Custom' if azure_standard_node != 0.384 else '✅ Default'
            }, {
                'Cost Component': 'VectorDB Node',
                'Specification': 'Standard_D16s_v3 (16 vCPU, 64GB)',
                'Cost per Hour': f"${azure_vectordb_node:.3f}",
                'Status': '✏️ Custom' if azure_vectordb_node != 0.768 else '✅ Default'
            }, {
                'Cost Component': 'Jump Host',
                'Specification': 'Standard_D2s_v3 (2 vCPU, 8GB)',
                'Cost per Hour': f"${azure_jump_host:.3f}",
                'Status': '✏️ Custom' if azure_jump_host != 0.096 else '✅ Default'
            }, {
                'Cost Component': 'AKS Management',
                'Specification': 'Managed Kubernetes (Free)',
                'Cost per Hour': f"${azure_k8s_management:.3f}",
                'Status': '✏️ Custom' if azure_k8s_management != 0.0 else '✅ Default'
            }, {
                'Cost Component': 'H200 141GB GPU',
                'Specification': 'Flagship+ GPU',
                'Cost per Hour': f"${azure_h200:.2f}",
                'Status': '✏️ Custom' if azure_h200 != 12.29 else '✅ Default'
            }, {
                'Cost Component': 'H100 80GB GPU',
                'Specification': 'Flagship GPU',
                'Cost per Hour': f"${azure_h100:.2f}",
                'Status': '✏️ Custom' if azure_h100 != 6.98 else '✅ Default'
            }, {
                'Cost Component': 'A100 80GB GPU',
                'Specification': 'Excellent GPU',
                'Cost per Hour': f"${azure_a100_80:.2f}",
                'Status': '✏️ Custom' if azure_a100_80 != 3.67 else '✅ Default'
            }, {
                'Cost Component': 'A100 40GB GPU',
                'Specification': 'Good GPU',
                'Cost per Hour': f"${azure_a100_40:.2f}",
                'Status': '✏️ Custom' if azure_a100_40 != 3.67 else '✅ Default'
            }]
            azure_config_df = pd.DataFrame(azure_config_data)
            st.dataframe(azure_config_df, use_container_width=True)
        
        with price_tab3:
            gcp_config_data = [{
                'Cost Component': 'Standard Compute Node',
                'Specification': 'n1-standard-8 (8 vCPU, 30GB)',
                'Cost per Hour': f"${gcp_standard_node:.3f}",
                'Status': '✏️ Custom' if gcp_standard_node != 0.379 else '✅ Default'
            }, {
                'Cost Component': 'VectorDB Node',
                'Specification': 'n1-standard-16 (16 vCPU, 60GB)',
                'Cost per Hour': f"${gcp_vectordb_node:.3f}",
                'Status': '✏️ Custom' if gcp_vectordb_node != 0.758 else '✅ Default'
            }, {
                'Cost Component': 'Jump Host',
                'Specification': 'e2-medium (2 vCPU, 8GB)',
                'Cost per Hour': f"${gcp_jump_host:.3f}",
                'Status': '✏️ Custom' if gcp_jump_host != 0.067 else '✅ Default'
            }, {
                'Cost Component': 'GKE Management',
                'Specification': 'Managed Kubernetes',
                'Cost per Hour': f"${gcp_k8s_management:.3f}",
                'Status': '✏️ Custom' if gcp_k8s_management != 0.10 else '✅ Default'
            }, {
                'Cost Component': 'H100 80GB GPU',
                'Specification': 'Flagship GPU',
                'Cost per Hour': f"${gcp_h100:.2f}",
                'Status': '✏️ Custom' if gcp_h100 != 11.06 else '✅ Default'
            }, {
                'Cost Component': 'A100 80GB GPU',
                'Specification': 'Excellent GPU',
                'Cost per Hour': f"${gcp_a100_80:.2f}",
                'Status': '✏️ Custom' if gcp_a100_80 != 2.48 else '✅ Default'
            }, {
                'Cost Component': 'A100 40GB GPU',
                'Specification': 'Good GPU',
                'Cost per Hour': f"${gcp_a100_40:.2f}",
                'Status': '✏️ Custom' if gcp_a100_40 != 1.46 else '✅ Default'
            }]
            gcp_config_df = pd.DataFrame(gcp_config_data)
            st.dataframe(gcp_config_df, use_container_width=True)
        
        with price_tab4:
            onprem_config_data = [{
                'Cost Component': 'Standard Compute Node',
                'Specification': '8 vCPU, 32GB RAM',
                'Cost per Hour': f"${onprem_standard_node:.3f}",
                'Status': '✏️ Custom' if onprem_standard_node != 0.192 else '✅ Default'
            }, {
                'Cost Component': 'VectorDB Node',
                'Specification': '16 vCPU, 64GB RAM',
                'Cost per Hour': f"${onprem_vectordb_node:.3f}",
                'Status': '✏️ Custom' if onprem_vectordb_node != 0.384 else '✅ Default'
            }, {
                'Cost Component': 'Jump Host',
                'Specification': '2 vCPU, 8GB RAM',
                'Cost per Hour': f"${onprem_jump_host:.3f}",
                'Status': '✏️ Custom' if onprem_jump_host != 0.048 else '✅ Default'
            }, {
                'Cost Component': 'GPU Pricing',
                'Specification': f'{onprem_gpu_multiplier*100:.0f}% of AWS pricing',
                'Cost per Hour': f"${GPUS[selected_gpu]['pricing']['on-premise']:.2f} (for {selected_gpu})",
                'Status': '✏️ Custom' if onprem_gpu_multiplier != 0.55 else '✅ Default'
            }, {
                'Cost Component': 'K8s Management',
                'Specification': 'Self-managed operational cost',
                'Cost per Hour': f"${onprem_k8s_management:.3f}",
                'Status': '✏️ Custom' if onprem_k8s_management != 0.05 else '✅ Default'
            }, {
                'Cost Component': 'Network Infrastructure',
                'Specification': 'Switches, routers, firewalls',
                'Cost per Hour': f"${onprem_network:.3f}",
                'Status': '✏️ Custom' if onprem_network != 0.020 else '✅ Default'
            }, {
                'Cost Component': 'Storage SAN/NAS',
                'Specification': 'Per GB per month',
                'Cost per Hour': f"${onprem_storage_per_gb:.3f}/GB/month",
                'Status': '✏️ Custom' if onprem_storage_per_gb != 0.05 else '✅ Default'
            }, {
                'Cost Component': 'Hardware Load Balancer',
                'Specification': 'F5/Citrix ADC amortized',
                'Cost per Hour': f"${onprem_load_balancer:.3f}",
                'Status': '✏️ Custom' if onprem_load_balancer != 0.010 else '✅ Default'
            }, {
                'Cost Component': 'Power & Cooling',
                'Specification': 'Datacenter utilities',
                'Cost per Hour': f"${onprem_power_cooling:.3f}",
                'Status': '✏️ Custom' if onprem_power_cooling != 0.030 else '✅ Default'
            }, {
                'Cost Component': 'Datacenter Space',
                'Specification': 'Rack space and facilities',
                'Cost per Hour': f"${onprem_datacenter_space:.3f}",
                'Status': '✏️ Custom' if onprem_datacenter_space != 0.015 else '✅ Default'
            }, {
                'Cost Component': 'Maintenance & Support',
                'Specification': 'Vendor support contracts',
                'Cost per Hour': f"${onprem_maintenance:.3f}",
                'Status': '✏️ Custom' if onprem_maintenance != 0.025 else '✅ Default'
            }]
            onprem_config_df = pd.DataFrame(onprem_config_data)
            st.dataframe(onprem_config_df, use_container_width=True)
        
        st.markdown("""
        **💡 Configuration Tips:**
        - Adjust pricing in the sidebar under "Cloud Provider Pricing (Optional)"
        - Default values based on public pricing as of 2024/2025
        - Customize based on your actual contract rates, discounts, or negotiated pricing
        - All calculations update automatically when values are changed
        - Click "🔄 Reset All Pricing to Defaults" in sidebar to restore original values
        """)
        
        # VM Types Summary
        st.markdown("### 🖥️ Deployment Options Summary")
        
        deployment_options_data = []
        for provider in CLOUD_PRICING.keys():
            pricing = CLOUD_PRICING[provider]
            deployment_options_data.append({
                'Provider': provider,
                'Standard Node': pricing['description'],
                'VectorDB Node': pricing['vectordb_node']['instance_type'],
                'Jump Host': pricing['jump_host']['instance_type'],
                'Managed K8s': pricing['name']
            })
        
        deployment_df = pd.DataFrame(deployment_options_data)
        st.dataframe(deployment_df, use_container_width=True)
    
    # Recommendations section
    st.header("💡 Recommendations & Insights")
    
    col1, col2 = st.columns(2)
    
    with col1:
        st.subheader("🎯 Performance Recommendations")
        
        if gpu_requirements['bottleneck'] == 'Memory':
            st.info("💾 **Memory-bound workload**: Consider using INT8 or INT4 quantization to reduce memory requirements")
        else:
            st.info("⚡ **Throughput-bound workload**: Current memory is sufficient, focus on GPU count for throughput")
        
        capacity_headroom = gpu_requirements['max_conversations_per_minute'] - conversations_per_minute
        headroom_percentage = (capacity_headroom / gpu_requirements['max_conversations_per_minute']) * 100
        
        if headroom_percentage < 20:
            st.warning(f"🚨 **Low headroom** ({headroom_percentage:.1f}%): System near capacity. Consider adding more GPUs or optimizing workload distribution")
        elif headroom_percentage > 70:
            st.success(f"✅ **High headroom** ({headroom_percentage:.1f}%): System has significant capacity for growth")
        else:
            st.info(f"📊 **Balanced headroom** ({headroom_percentage:.1f}%): Good balance between capacity and resource efficiency")
        
        # Application deployment recommendations
        if apps_per_tenant > 12:
            st.warning(f"📦 **High app density**: {apps_per_tenant} apps per tenant requires {infrastructure['totals']['deploy_nodes_per_tenant']} deployment nodes. Consider application consolidation")
        elif apps_per_tenant <= 4:
            st.success(f"✅ **Efficient deployment**: Only 1 deployment node needed for {apps_per_tenant} apps per tenant")
        else:
            st.info(f"📊 **Moderate app density**: {infrastructure['totals']['deploy_nodes_per_tenant']} deployment nodes for {apps_per_tenant} apps per tenant")
    
    with col2:
        st.subheader("💰 Cost Optimization")
        
        available_providers = get_available_providers_for_gpu(gpu_spec)
        
        if len(available_providers) >= 2:
            cheapest_provider = min(available_providers, 
                                  key=lambda x: all_costs[x]['totals']['total_cost'])
            most_expensive_provider = max(available_providers, 
                                        key=lambda x: all_costs[x]['totals']['total_cost'])
            
            savings = all_costs[most_expensive_provider]['totals']['total_cost'] - all_costs[cheapest_provider]['totals']['total_cost']
            savings_percentage = (savings / all_costs[most_expensive_provider]['totals']['total_cost']) * 100
            
            if cheapest_provider == 'On-Premise':
                st.success(f"💡 **Recommended Option**: On-Premise Deployment")
                st.info(f"💰 **Cost Advantage**: ${savings:.2f} ({savings_percentage:.1f}%) savings compared to {most_expensive_provider}")
                st.warning("⚠️ **Consider**: Initial capex, datacenter readiness, and operational expertise for on-premise")
            else:
                st.success(f"💡 **Recommended Provider**: {cheapest_provider}")
                st.info(f"💰 **Potential Savings**: ${savings:.2f} ({savings_percentage:.1f}%) compared to {most_expensive_provider}")
            
            # Cost distribution insight
            cheapest_costs = all_costs[cheapest_provider]
            platform_percentage = (cheapest_costs['totals']['platform_cost'] / cheapest_costs['totals']['total_cost']) * 100
            gpu_percentage = (cheapest_costs['totals']['gpu_cost'] / cheapest_costs['totals']['total_cost']) * 100
            
            if gpu_percentage > 70:
                st.warning("🖥️ **GPU-heavy costs**: Consider optimizing model size or using more efficient GPUs")
            else:
                st.info(f"⚖️ **Balanced infrastructure**: Platform ({platform_percentage:.0f}%) vs GPU ({gpu_percentage:.0f}%)")
                
        elif len(available_providers) == 1:
            available_provider = available_providers[0]
            st.success(f"💡 **Available Option**: {available_provider}")
            if available_provider == 'On-Premise':
                st.info("🏢 On-premise is your only deployment option for this GPU")
            else:
                st.warning("⚠️ **Limited Options**: Only one provider has the selected GPU available")
            
            # Show cost distribution for the only available provider
            provider_costs = all_costs[available_provider]
            platform_percentage = (provider_costs['totals']['platform_cost'] / provider_costs['totals']['total_cost']) * 100
            gpu_percentage = (provider_costs['totals']['gpu_cost'] / provider_costs['totals']['total_cost']) * 100
            
            st.info(f"📊 **Cost Distribution**: Platform ({platform_percentage:.0f}%) vs GPU ({gpu_percentage:.0f}%)")
            
        else:
            st.error("❌ **No Available Options**: Selected GPU is not available on any deployment option")
            st.warning("**Action Required**: Please select a different GPU model")
            
            # Show alternative GPUs
            st.markdown("**💡 Suggested Alternatives:**")
            alternatives = []
            for gpu_name, gpu_data in GPUS.items():
                available_on = get_available_providers_for_gpu(gpu_data)
                if available_on:
                    alternatives.append(f"• **{gpu_name}** - Available on: {', '.join(available_on)}")
            
            if alternatives:
                for alt in alternatives[:3]:  # Show top 3 alternatives
                    st.markdown(alt)
    
    # Infrastructure Summary Box
    st.header("📋 Infrastructure Summary")
    
    summary_col1, summary_col2, summary_col3 = st.columns(3)
    
    with summary_col1:
        st.markdown("### Platform Infrastructure")
        st.markdown(f"""
        - **Tenants**: {num_tenants}
        - **Apps per Tenant**: {apps_per_tenant}
        - **Total Applications**: {infrastructure['totals']['total_apps']}
        - **Standard Nodes**: {infrastructure['totals']['total_standard_nodes']} (8 vCPU, 32GB)
        - **VectorDB Nodes**: {infrastructure['totals']['total_vectordb_nodes']} (16 vCPU, 64GB)
        - **Total Platform Nodes**: {infrastructure['totals']['total_nodes']}
        """)
    
    with summary_col2:
        st.markdown("### GPU Infrastructure")
        st.markdown(f"""
        - **Model**: {selected_model}
        - **GPU Type**: {selected_gpu}
        - **Precision**: {precision}
        - **GPUs Required**: {gpu_requirements['total_gpus_needed']}
        - **GPUs Allocated**: {gpu_requirements['actual_gpus_allocated']}
        - **GPU Configuration**: {gpu_requirements['best_config']['num_nodes']} nodes × {gpu_requirements['best_config']['gpus_per_node']} GPUs
        """)
    
    with summary_col3:
        st.markdown("### Performance Metrics")
        capacity_headroom = gpu_requirements['max_conversations_per_minute'] - conversations_per_minute
        headroom_percentage = (capacity_headroom / gpu_requirements['max_conversations_per_minute']) * 100
        st.markdown(f"""
        - **Target Load**: {conversations_per_minute} conv/min
        - **Max Capacity**: {gpu_requirements['max_conversations_per_minute']:.0f} conv/min
        - **Capacity Headroom**: {headroom_percentage:.1f}%
        - **Bottleneck**: {gpu_requirements['bottleneck']}
        - **Total TPS**: {gpu_requirements['total_system_tps']:.0f}
        - **Tokens/Conv**: {tokens_per_conversation}
        """)

if __name__ == "__main__":
    create_comprehensive_dashboard()