Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import plotly.express as px | |
| import plotly.graph_objects as go | |
| import math | |
| from plotly.subplots import make_subplots | |
| import json | |
| from datetime import datetime | |
| # Configure Streamlit for better performance | |
| st.set_page_config( | |
| page_title="Katonic Multitenant Infrastructure Calculator", | |
| page_icon="🚀", | |
| layout="wide", | |
| initial_sidebar_state="expanded" | |
| ) | |
| # Cloud provider and On-Premise pricing data (per hour in USD) | |
| CLOUD_PRICING = { | |
| 'On-Premise': { | |
| 'name': 'On-Premise Datacenter', | |
| 'cost_per_node_hour': 0.192, # ~50% of cloud (amortized hardware + power + cooling over 3 years) | |
| 'managed_k8s_cost': 0.05, # Self-managed K8s operational cost (admin time, monitoring tools) | |
| 'description': 'Dell PowerEdge R640 / HPE DL360 equivalent', | |
| 'specs': '8 vCPUs, 32GB RAM', | |
| 'vectordb_node': { | |
| 'instance_type': 'Dell PowerEdge R740 / HPE DL380 equivalent', | |
| 'cost_per_hour': 0.384, # ~50% of cloud (high-memory server amortized) | |
| 'specs': '16 vCPUs, 64GB RAM' | |
| }, | |
| 'jump_host': { | |
| 'instance_type': 'Dell PowerEdge R440 / HPE DL20 equivalent', | |
| 'cost_per_hour': 0.048, # ~50% of cloud (small server amortized) | |
| 'specs': '2 vCPUs, 8GB RAM' | |
| }, | |
| 'additional_services': { | |
| 'Network_Infrastructure': {'cost_per_hour': 0.020, 'description': 'Switches, routers, firewalls (amortized)'}, | |
| 'Storage_SAN': {'cost_per_gb_month': 0.05, 'description': 'SAN/NAS storage (1TB base, amortized)'}, | |
| 'Hardware_Load_Balancer': {'cost_per_hour': 0.010, 'description': 'F5/Citrix ADC (amortized)'}, | |
| 'Power_Cooling': {'cost_per_hour': 0.030, 'description': 'Datacenter power (0.1kW/server) and cooling'}, | |
| 'Datacenter_Space': {'cost_per_hour': 0.015, 'description': 'Rack space and facilities costs'}, | |
| 'Maintenance_Support': {'cost_per_hour': 0.025, 'description': 'Hardware maintenance and vendor support contracts'} | |
| }, | |
| 'gpu_pricing_multiplier': 0.55, # On-prem GPU costs are ~55% of cloud (hardware amortization + power) | |
| 'notes': 'Costs include: hardware amortization (3-year lifecycle), power (~$0.10/kWh), cooling (1:1 ratio), rack space, network infrastructure, storage, and maintenance. Assumes enterprise datacenter with N+1 redundancy. Does NOT include: initial capex, datacenter construction, staff salaries (covered in K8s management cost).' | |
| }, | |
| 'AWS': { | |
| 'name': 'Amazon EKS', | |
| 'cost_per_node_hour': 0.384, | |
| 'managed_k8s_cost': 0.10, | |
| 'description': 'm5.2xlarge instances', | |
| 'specs': '8 vCPUs, 32GB RAM', | |
| 'vectordb_node': { | |
| 'instance_type': 'm5.4xlarge', | |
| 'cost_per_hour': 0.768, | |
| 'specs': '16 vCPUs, 64GB RAM' | |
| }, | |
| 'jump_host': { | |
| 'instance_type': 'm5.large', | |
| 'cost_per_hour': 0.096, | |
| 'specs': '2 vCPUs, 8GB RAM' | |
| }, | |
| 'additional_services': { | |
| 'VPC': {'cost_per_hour': 0.0, 'description': 'Virtual Private Cloud (Free)'}, | |
| 'EBS': {'cost_per_gb_month': 0.10, 'description': 'Elastic Block Store (1TB expandable)'}, | |
| 'ELB': {'cost_per_hour': 0.025, 'description': 'Elastic Load Balancer'}, | |
| 'EIP': {'cost_per_hour': 0.005, 'description': 'Elastic IP Address'} | |
| } | |
| }, | |
| 'Azure': { | |
| 'name': 'Azure Kubernetes Service', | |
| 'cost_per_node_hour': 0.384, | |
| 'managed_k8s_cost': 0.0, | |
| 'description': 'Standard_D8s_v3 instances', | |
| 'specs': '8 vCPUs, 32GB RAM', | |
| 'vectordb_node': { | |
| 'instance_type': 'Standard_D16s_v3', | |
| 'cost_per_hour': 0.768, | |
| 'specs': '16 vCPUs, 64GB RAM' | |
| }, | |
| 'jump_host': { | |
| 'instance_type': 'Standard_D2s_v3', | |
| 'cost_per_hour': 0.096, | |
| 'specs': '2 vCPUs, 8GB RAM' | |
| }, | |
| 'additional_services': { | |
| 'VNet': {'cost_per_hour': 0.0, 'description': 'Virtual Network (Free)'}, | |
| 'Managed_Disks': {'cost_per_gb_month': 0.10, 'description': 'Managed Disks (1TB expandable)'}, | |
| 'Load_Balancer': {'cost_per_hour': 0.025, 'description': 'Azure Load Balancer'}, | |
| 'Public_IP': {'cost_per_hour': 0.005, 'description': 'Public IP Address'} | |
| } | |
| }, | |
| 'GCP': { | |
| 'name': 'Google Kubernetes Engine', | |
| 'cost_per_node_hour': 0.379, | |
| 'managed_k8s_cost': 0.10, | |
| 'description': 'n1-standard-8 instances', | |
| 'specs': '8 vCPUs, 30GB RAM', | |
| 'vectordb_node': { | |
| 'instance_type': 'n1-standard-16', | |
| 'cost_per_hour': 0.758, | |
| 'specs': '16 vCPUs, 60GB RAM' | |
| }, | |
| 'jump_host': { | |
| 'instance_type': 'e2-medium', | |
| 'cost_per_hour': 0.067, | |
| 'specs': '2 vCPUs, 8GB RAM' | |
| }, | |
| 'additional_services': { | |
| 'VPC': {'cost_per_hour': 0.0, 'description': 'Virtual Private Cloud (Free)'}, | |
| 'Persistent_Disk': {'cost_per_gb_month': 0.10, 'description': 'Persistent Disk (1TB expandable)'}, | |
| 'Load_Balancer': {'cost_per_hour': 0.025, 'description': 'Cloud Load Balancing'}, | |
| 'Static_IP': {'cost_per_hour': 0.004, 'description': 'Static External IP'}, | |
| 'Cloud_Storage': {'cost_per_gb_month': 0.020, 'description': 'GCS Bucket (Optional)'}, | |
| 'Filestore': {'cost_per_gb_month': 0.20, 'description': 'Filestore (depends on usage)'} | |
| } | |
| } | |
| } | |
| # Production-grade model specifications | |
| MODELS = { | |
| "Llama 4 Maverick": { | |
| "params": 400, | |
| "active_params": 17, | |
| "memory_per_param": 2, | |
| "max_context": 1000000, | |
| "base_tps": 4200, | |
| "org": "Meta", | |
| "license": "Open-weight", | |
| "notes": "Multimodal MoE; 1M context; text, image, code, reasoning" | |
| }, | |
| "Llama 4 Scout": { | |
| "params": 109, | |
| "active_params": 17, | |
| "memory_per_param": 2, | |
| "max_context": 10000000, | |
| "base_tps": 4500, | |
| "org": "Meta", | |
| "license": "Open-weight", | |
| "notes": "Multimodal MoE; 10M context; efficient for long-form tasks" | |
| }, | |
| "Llama 3.3 70B": { | |
| "params": 70, | |
| "active_params": 70, | |
| "memory_per_param": 2, | |
| "max_context": 128000, | |
| "base_tps": 1800, | |
| "org": "Meta", | |
| "license": "Community (open)", | |
| "notes": "Multilingual; matches Llama 3.1 405B performance" | |
| }, | |
| "Qwen2 110B": { | |
| "params": 110, | |
| "active_params": 110, | |
| "memory_per_param": 2, | |
| "max_context": 128000, | |
| "base_tps": 1200, | |
| "org": "Alibaba/Qwen", | |
| "license": "Apache 2.0", | |
| "notes": "Multilingual; top-tier reasoning and coding" | |
| }, | |
| "DeepSeek-VL 110B": { | |
| "params": 110, | |
| "active_params": 110, | |
| "memory_per_param": 2, | |
| "max_context": 128000, | |
| "base_tps": 1100, | |
| "org": "DeepSeek AI", | |
| "license": "MIT", | |
| "notes": "Multimodal (vision+language); GPT-4V alternative" | |
| }, | |
| "Mixtral 8x22B": { | |
| "params": 141, | |
| "active_params": 39, | |
| "memory_per_param": 2, | |
| "max_context": 65536, | |
| "base_tps": 2800, | |
| "org": "Mistral AI", | |
| "license": "Apache 2.0", | |
| "notes": "Sparse MoE; efficiency leader among MoE models" | |
| } | |
| } | |
| GPUS = { | |
| "H200 141GB": { | |
| "memory": 141, | |
| "compute": 9.0, | |
| "tps_min": 5486, | |
| "tps_max": 18690, | |
| "efficiency_tier": "Flagship+", | |
| "pricing": { | |
| "aws": 15.70, | |
| "azure": 12.29, | |
| "gcp": "NA", | |
| "on-premise": 8.64 # 55% of AWS price (hardware amortization + power) | |
| } | |
| }, | |
| "H100 80GB": { | |
| "memory": 80, | |
| "compute": 9.0, | |
| "tps_min": 2400, | |
| "tps_max": 14000, | |
| "efficiency_tier": "Flagship", | |
| "pricing": { | |
| "aws": 6.01, | |
| "azure": 6.98, | |
| "gcp": 11.06, | |
| "on-premise": 3.31 # 55% of AWS price | |
| } | |
| }, | |
| "A100 80GB": { | |
| "memory": 80, | |
| "compute": 8.0, | |
| "tps_min": 1100, | |
| "tps_max": 2000, | |
| "efficiency_tier": "Excellent", | |
| "pricing": { | |
| "aws": 3.43, | |
| "azure": 3.67, | |
| "gcp": 2.48, | |
| "on-premise": 1.89 # 55% of AWS price | |
| } | |
| }, | |
| "A100 40GB": { | |
| "memory": 40, | |
| "compute": 8.0, | |
| "tps_min": 1000, | |
| "tps_max": 1800, | |
| "efficiency_tier": "Good", | |
| "pricing": { | |
| "aws": 2.75, | |
| "azure": 3.67, | |
| "gcp": 1.46, | |
| "on-premise": 1.51 # 55% of AWS price | |
| } | |
| }, | |
| "L40S": { | |
| "memory": 48, | |
| "compute": 8.9, | |
| "tps_min": 4000, | |
| "tps_max": 4768, | |
| "efficiency_tier": "Very Good", | |
| "pricing": { | |
| "aws": 1.67, | |
| "azure": "NA", | |
| "gcp": "NA", | |
| "on-premise": 0.92 # 55% of AWS price | |
| } | |
| } | |
| } | |
| def calculate_detailed_infrastructure(num_tenants, apps_per_tenant): | |
| """Calculate detailed infrastructure requirements with node type breakdown - CACHED""" | |
| # Standard node specs (8 vCPUs, 32GB RAM) | |
| cores_per_node = 8 | |
| ram_per_node = 32 | |
| # VectorDB node specs (16 vCPUs, 64GB RAM) - Updated as per requirement | |
| vectordb_cores_per_node = 16 | |
| vectordb_ram_per_node = 64 | |
| # Base infrastructure | |
| base_platform_nodes = 2 | |
| # Per tenant requirements | |
| platform_nodes_per_tenant = 1 | |
| compute_nodes_per_tenant = 1 | |
| vectordb_nodes_per_tenant = 1 # Using 64GB RAM nodes for VectorDB | |
| # Calculate deployment nodes based on apps per tenant | |
| # Every 4 apps need 1 deployment node | |
| deploy_nodes_per_tenant = math.ceil(apps_per_tenant / 4) | |
| # Calculate totals | |
| total_platform_nodes = base_platform_nodes + (platform_nodes_per_tenant * num_tenants) | |
| total_compute_nodes = compute_nodes_per_tenant * num_tenants | |
| total_deploy_nodes = deploy_nodes_per_tenant * num_tenants | |
| total_vectordb_nodes = vectordb_nodes_per_tenant * num_tenants | |
| # Total standard nodes (excluding VectorDB which uses different specs) | |
| total_standard_nodes = total_platform_nodes + total_compute_nodes + total_deploy_nodes | |
| total_nodes = total_standard_nodes + total_vectordb_nodes | |
| # Resource calculations | |
| total_cpu = (total_standard_nodes * cores_per_node) + (total_vectordb_nodes * vectordb_cores_per_node) | |
| total_ram = (total_standard_nodes * ram_per_node) + (total_vectordb_nodes * vectordb_ram_per_node) | |
| # Applications capacity | |
| total_apps = num_tenants * apps_per_tenant | |
| return { | |
| 'node_breakdown': { | |
| 'Platform Nodes': { | |
| 'base': base_platform_nodes, | |
| 'tenant': platform_nodes_per_tenant * num_tenants, | |
| 'total': total_platform_nodes, | |
| 'cores': total_platform_nodes * cores_per_node, | |
| 'ram': total_platform_nodes * ram_per_node, | |
| 'purpose': 'Tenancy Manager + Tenant platform services', | |
| 'node_type': 'Standard (8 vCPU, 32GB RAM)' | |
| }, | |
| 'Compute Nodes': { | |
| 'base': 0, | |
| 'tenant': total_compute_nodes, | |
| 'total': total_compute_nodes, | |
| 'cores': total_compute_nodes * cores_per_node, | |
| 'ram': total_compute_nodes * ram_per_node, | |
| 'purpose': 'Computational workloads', | |
| 'node_type': 'Standard (8 vCPU, 32GB RAM)' | |
| }, | |
| 'Deploy Nodes': { | |
| 'base': 0, | |
| 'tenant': total_deploy_nodes, | |
| 'total': total_deploy_nodes, | |
| 'cores': total_deploy_nodes * cores_per_node, | |
| 'ram': total_deploy_nodes * ram_per_node, | |
| 'purpose': f'Application deployment ({deploy_nodes_per_tenant} node(s) per {apps_per_tenant} apps)', | |
| 'node_type': 'Standard (8 vCPU, 32GB RAM)' | |
| }, | |
| 'VectorDB Nodes': { | |
| 'base': 0, | |
| 'tenant': total_vectordb_nodes, | |
| 'total': total_vectordb_nodes, | |
| 'cores': total_vectordb_nodes * vectordb_cores_per_node, | |
| 'ram': total_vectordb_nodes * vectordb_ram_per_node, | |
| 'purpose': 'Vector database operations (high memory)', | |
| 'node_type': 'High-Memory (16 vCPU, 64GB RAM)' | |
| } | |
| }, | |
| 'totals': { | |
| 'total_nodes': total_nodes, | |
| 'total_standard_nodes': total_standard_nodes, | |
| 'total_vectordb_nodes': total_vectordb_nodes, | |
| 'total_cpu': total_cpu, | |
| 'total_ram': total_ram, | |
| 'total_apps': total_apps, | |
| 'deploy_nodes_per_tenant': deploy_nodes_per_tenant | |
| }, | |
| 'specs': { | |
| 'cores_per_node': cores_per_node, | |
| 'ram_per_node': ram_per_node, | |
| 'vectordb_cores_per_node': vectordb_cores_per_node, | |
| 'vectordb_ram_per_node': vectordb_ram_per_node | |
| } | |
| } | |
| def calculate_model_memory_requirements(model_params, active_params, precision_bytes): | |
| """Calculate memory requirements for model inference""" | |
| model_memory = model_params * precision_bytes | |
| overhead = model_memory * 0.25 | |
| kv_cache = model_memory * 0.1 | |
| total_memory = model_memory + overhead + kv_cache | |
| return total_memory | |
| def calculate_model_tps_on_gpu(model_base_tps, model_params, active_params, gpu_spec): | |
| """Calculate actual TPS for a specific model on a specific GPU""" | |
| effective_params = active_params | |
| reference_params = 70 | |
| param_scaling = (reference_params / effective_params) ** 0.7 | |
| gpu_tps_min = gpu_spec["tps_min"] | |
| gpu_tps_max = gpu_spec["tps_max"] | |
| actual_tps_min = gpu_tps_min * param_scaling | |
| actual_tps_max = gpu_tps_max * param_scaling | |
| estimated_tps = actual_tps_min + (actual_tps_max - actual_tps_min) * 0.3 | |
| return estimated_tps, actual_tps_min, actual_tps_max | |
| def calculate_gpu_node_configurations(total_gpus_needed, gpu_memory_gb, gpu_spec): | |
| """Calculate GPU node configurations based on standard cloud GPU node sizes""" | |
| configurations = [] | |
| # Standard GPU node configurations: 1, 2, 4, 8 GPUs per node | |
| standard_configs = [1, 2, 4, 8] | |
| # Minimum GPUs per node based on memory requirements | |
| min_gpus_per_node = math.ceil(gpu_memory_gb / gpu_spec["memory"]) | |
| for gpus_per_node in standard_configs: | |
| # Skip configurations that can't fit the model memory requirement | |
| if gpus_per_node < min_gpus_per_node: | |
| continue | |
| num_nodes = math.ceil(total_gpus_needed / gpus_per_node) | |
| total_gpus_allocated = num_nodes * gpus_per_node | |
| gpu_utilization = (total_gpus_needed / total_gpus_allocated) * 100 | |
| gpu_waste = total_gpus_allocated - total_gpus_needed | |
| configurations.append({ | |
| 'gpus_per_node': gpus_per_node, | |
| 'num_nodes': num_nodes, | |
| 'total_gpus_allocated': total_gpus_allocated, | |
| 'total_gpus_needed': total_gpus_needed, | |
| 'utilization': gpu_utilization, | |
| 'gpu_waste': gpu_waste, | |
| 'meets_memory_req': gpus_per_node >= min_gpus_per_node, | |
| 'memory_utilization': (gpu_memory_gb / (gpus_per_node * gpu_spec["memory"])) * 100 | |
| }) | |
| # If no configurations work (shouldn't happen with proper validation), add all configs | |
| if not configurations: | |
| for gpus_per_node in standard_configs: | |
| num_nodes = math.ceil(total_gpus_needed / gpus_per_node) | |
| total_gpus_allocated = num_nodes * gpus_per_node | |
| gpu_utilization = (total_gpus_needed / total_gpus_allocated) * 100 | |
| gpu_waste = total_gpus_allocated - total_gpus_needed | |
| configurations.append({ | |
| 'gpus_per_node': gpus_per_node, | |
| 'num_nodes': num_nodes, | |
| 'total_gpus_allocated': total_gpus_allocated, | |
| 'total_gpus_needed': total_gpus_needed, | |
| 'utilization': gpu_utilization, | |
| 'gpu_waste': gpu_waste, | |
| 'meets_memory_req': gpus_per_node >= min_gpus_per_node, | |
| 'memory_utilization': (gpu_memory_gb / (gpus_per_node * gpu_spec["memory"])) * 100 | |
| }) | |
| # Sort by utilization (descending) and then by total GPUs (ascending) | |
| configurations.sort(key=lambda x: (-x['utilization'], x['total_gpus_allocated'])) | |
| return configurations, min_gpus_per_node | |
| def calculate_gpu_requirements(conversations_per_minute, tokens_per_conversation, model_spec, gpu_spec, precision_bytes): | |
| """Calculate GPU requirements for LLM inference with proper node configurations - CACHED""" | |
| # Calculate throughput requirements | |
| required_tps = (conversations_per_minute * tokens_per_conversation) / 60 | |
| # Calculate memory requirements | |
| model_memory_gb = calculate_model_memory_requirements( | |
| model_spec["params"], model_spec["active_params"], precision_bytes | |
| ) | |
| # Calculate model performance on GPU | |
| estimated_tps, tps_min, tps_max = calculate_model_tps_on_gpu( | |
| model_spec["base_tps"], model_spec["params"], model_spec["active_params"], gpu_spec | |
| ) | |
| # Calculate basic GPU requirements | |
| gpus_needed_memory = math.ceil(model_memory_gb / gpu_spec["memory"]) | |
| gpus_needed_throughput = math.ceil(required_tps / estimated_tps) | |
| total_gpus_needed = max(gpus_needed_memory, gpus_needed_throughput, 1) | |
| # Calculate proper GPU node configurations | |
| gpu_configs, min_gpus_per_node = calculate_gpu_node_configurations( | |
| total_gpus_needed, model_memory_gb, gpu_spec | |
| ) | |
| # Use the best (most efficient) configuration | |
| best_config = gpu_configs[0] if gpu_configs else None | |
| actual_gpus_allocated = best_config['total_gpus_allocated'] if best_config else total_gpus_needed | |
| return { | |
| 'gpus_needed_memory': gpus_needed_memory, | |
| 'gpus_needed_throughput': gpus_needed_throughput, | |
| 'total_gpus_needed': total_gpus_needed, | |
| 'actual_gpus_allocated': actual_gpus_allocated, | |
| 'gpu_configurations': gpu_configs, | |
| 'best_config': best_config, | |
| 'min_gpus_per_node': min_gpus_per_node, | |
| 'model_memory_gb': model_memory_gb, | |
| 'required_tps': required_tps, | |
| 'estimated_tps': estimated_tps, | |
| 'tps_range': (tps_min, tps_max), | |
| 'total_system_tps': estimated_tps * actual_gpus_allocated, | |
| 'max_conversations_per_minute': (estimated_tps * actual_gpus_allocated * 60) / tokens_per_conversation, | |
| 'bottleneck': 'Memory' if gpus_needed_memory >= gpus_needed_throughput else 'Throughput' | |
| } | |
| def is_gpu_available_for_provider(provider, gpu_spec): | |
| """Check if GPU is actually available for a provider (not N/A and has valid pricing)""" | |
| gpu_pricing = gpu_spec.get("pricing", {}) | |
| provider_key = provider.lower() | |
| if provider_key not in gpu_pricing: | |
| return False | |
| price = gpu_pricing[provider_key] | |
| return price != "NA" and isinstance(price, (int, float)) and price > 0 | |
| def get_available_providers_for_gpu(gpu_spec): | |
| """Get list of providers that actually have the selected GPU available""" | |
| available_providers = [] | |
| for provider in CLOUD_PRICING.keys(): | |
| if is_gpu_available_for_provider(provider, gpu_spec): | |
| available_providers.append(provider) | |
| return available_providers | |
| def create_downloadable_cost_report(all_costs, infrastructure, gpu_requirements, model_spec, gpu_spec, selected_model, selected_gpu, num_tenants, apps_per_tenant, conversations_per_minute, tokens_per_conversation, precision, time_period): | |
| """Create a comprehensive cost report for download""" | |
| report_data = { | |
| 'report_metadata': { | |
| 'generated_at': datetime.now().isoformat(), | |
| 'configuration': { | |
| 'tenants': num_tenants, | |
| 'apps_per_tenant': apps_per_tenant, | |
| 'total_apps': num_tenants * apps_per_tenant, | |
| 'model': selected_model, | |
| 'gpu': selected_gpu, | |
| 'precision': precision, | |
| 'conversations_per_minute': conversations_per_minute, | |
| 'tokens_per_conversation': tokens_per_conversation, | |
| 'time_period': time_period | |
| } | |
| }, | |
| 'infrastructure_summary': { | |
| 'platform_nodes': infrastructure['totals']['total_standard_nodes'], | |
| 'vectordb_nodes': infrastructure['totals']['total_vectordb_nodes'], | |
| 'total_nodes': infrastructure['totals']['total_nodes'], | |
| 'gpu_nodes': gpu_requirements['total_gpus_needed'], | |
| 'total_cpu_cores': infrastructure['totals']['total_cpu'], | |
| 'total_ram_gb': infrastructure['totals']['total_ram'], | |
| 'total_gpu_memory_gb': gpu_requirements['total_gpus_needed'] * gpu_spec['memory'], | |
| 'max_conversations_per_minute': gpu_requirements['max_conversations_per_minute'] | |
| }, | |
| 'cost_breakdown_by_provider': {} | |
| } | |
| # Add cost breakdown for each provider | |
| for provider, costs in all_costs.items(): | |
| provider_available = is_gpu_available_for_provider(provider, gpu_spec) | |
| report_data['cost_breakdown_by_provider'][provider] = { | |
| 'gpu_available': provider_available, | |
| 'platform_costs': { | |
| 'kubernetes_nodes': costs['platform_costs']['total_node_cost'], | |
| 'vectordb_nodes': costs['platform_costs']['vectordb_node_cost'], | |
| 'jump_host': costs['platform_costs']['jump_host_cost'], | |
| 'additional_services': costs['platform_costs']['additional_services_cost'], | |
| 'k8s_management': costs['platform_costs']['k8s_management_cost'], | |
| 'platform_total': costs['platform_costs']['platform_total'] | |
| }, | |
| 'gpu_costs': { | |
| 'gpu_count': costs['gpu_costs']['gpu_count'], | |
| 'gpu_cost_per_hour': costs['gpu_costs']['gpu_cost_per_hour'], | |
| 'total_gpu_cost': costs['gpu_costs']['total_gpu_cost'] if provider_available else 'N/A' | |
| }, | |
| 'totals': { | |
| 'platform_cost': costs['totals']['platform_cost'], | |
| 'gpu_cost': costs['totals']['gpu_cost'] if provider_available else 'N/A', | |
| 'total_cost': costs['totals']['total_cost'] if provider_available else 'N/A', | |
| 'cost_per_hour': costs['totals']['cost_per_hour'] if provider_available else 'N/A', | |
| 'cost_per_day': costs['totals']['cost_per_day'] if provider_available else 'N/A' | |
| }, | |
| 'service_details': costs['platform_costs']['service_costs'] | |
| } | |
| return report_data | |
| def format_cost_for_display(cost, available=True): | |
| """Format cost for display, handling N/A cases""" | |
| if not available or cost == 'N/A': | |
| return 'N/A' | |
| return f"${cost:.2f}" | |
| def calculate_detailed_costs(provider, infrastructure, gpu_requirements, gpu_spec, days=30): | |
| """Calculate detailed costs for both platform and GPU infrastructure""" | |
| pricing = CLOUD_PRICING[provider] | |
| hours = days * 24 | |
| # Platform infrastructure costs | |
| node_costs = {} | |
| total_standard_node_cost = 0 | |
| total_vectordb_node_cost = 0 | |
| for node_type, details in infrastructure['node_breakdown'].items(): | |
| if node_type == 'VectorDB Nodes': | |
| # Use special pricing for VectorDB nodes | |
| node_cost = details['total'] * pricing['vectordb_node']['cost_per_hour'] * hours | |
| total_vectordb_node_cost = node_cost | |
| else: | |
| # Use standard pricing for other nodes | |
| node_cost = details['total'] * pricing['cost_per_node_hour'] * hours | |
| total_standard_node_cost += node_cost | |
| node_costs[node_type] = { | |
| 'count': details['total'], | |
| 'cost': node_cost, | |
| 'cores': details['cores'], | |
| 'ram': details['ram'], | |
| 'node_type': details.get('node_type', 'Standard') | |
| } | |
| total_node_cost = total_standard_node_cost + total_vectordb_node_cost | |
| # Jump Host cost | |
| jump_host_cost = pricing['jump_host']['cost_per_hour'] * hours | |
| # Additional services costs | |
| additional_services_cost = 0 | |
| service_costs = {} | |
| for service, details in pricing['additional_services'].items(): | |
| if 'cost_per_hour' in details: | |
| service_cost = details['cost_per_hour'] * hours | |
| elif 'cost_per_gb_month' in details: | |
| if 'storage' in service.lower() or 'disk' in service.lower() or 'ebs' in service.lower() or 'san' in service.lower(): | |
| service_cost = details['cost_per_gb_month'] * 1024 * (days / 30) | |
| else: | |
| service_cost = 0 | |
| else: | |
| service_cost = 0 | |
| service_costs[service] = service_cost | |
| additional_services_cost += service_cost | |
| # Kubernetes management cost | |
| k8s_management_cost = pricing['managed_k8s_cost'] * hours | |
| # GPU costs - properly handle N/A cases | |
| gpu_pricing = gpu_spec.get("pricing", {}) | |
| gpu_available = is_gpu_available_for_provider(provider, gpu_spec) | |
| gpu_cost_per_hour = 0 | |
| gpu_cost = 0 | |
| if gpu_available: | |
| gpu_cost_per_hour = gpu_pricing[provider.lower()] | |
| gpu_cost = gpu_requirements['actual_gpus_allocated'] * gpu_cost_per_hour * hours | |
| # Total costs | |
| platform_cost = total_node_cost + jump_host_cost + additional_services_cost + k8s_management_cost | |
| total_cost = platform_cost + gpu_cost if gpu_available else None # None for N/A cases | |
| return { | |
| 'platform_costs': { | |
| 'node_costs': node_costs, | |
| 'total_node_cost': total_node_cost, | |
| 'vectordb_node_cost': total_vectordb_node_cost, | |
| 'jump_host_cost': jump_host_cost, | |
| 'service_costs': service_costs, | |
| 'additional_services_cost': additional_services_cost, | |
| 'k8s_management_cost': k8s_management_cost, | |
| 'platform_total': platform_cost | |
| }, | |
| 'gpu_costs': { | |
| 'gpu_count': gpu_requirements['actual_gpus_allocated'], | |
| 'gpu_cost_per_hour': gpu_cost_per_hour, | |
| 'total_gpu_cost': gpu_cost, | |
| 'gpu_available': gpu_available | |
| }, | |
| 'totals': { | |
| 'platform_cost': platform_cost, | |
| 'gpu_cost': gpu_cost, | |
| 'total_cost': total_cost, | |
| 'cost_per_hour': total_cost / hours if total_cost is not None else None, | |
| 'cost_per_day': total_cost / days if total_cost is not None else None, | |
| 'gpu_available': gpu_available | |
| } | |
| } | |
| def create_comprehensive_dashboard(): | |
| st.set_page_config( | |
| page_title="Katonic Multitenant Infrastructure Calculator", | |
| page_icon="🚀", | |
| layout="wide" | |
| ) | |
| st.title("🚀 Katonic Multitenant Infrastructure Calculator") | |
| st.markdown("**Comprehensive infrastructure planning for multi-tenant LLMOPS platforms with GPU-accelerated LLM inference**") | |
| # Sidebar Configuration | |
| with st.sidebar: | |
| st.header("🔧 Configuration") | |
| # Platform Configuration | |
| st.subheader("Platform Settings") | |
| num_tenants = st.slider( | |
| "Number of Tenants", | |
| min_value=1, | |
| max_value=20, | |
| value=3, | |
| help="Each tenant requires dedicated platform, compute, deploy, and VectorDB nodes" | |
| ) | |
| apps_per_tenant = st.number_input( | |
| "Apps per Tenant", | |
| min_value=1, | |
| max_value=50, | |
| value=4, | |
| step=1, | |
| help="Number of applications per tenant. Every 4 apps require 1 deployment node" | |
| ) | |
| # Cloud Provider Pricing Configuration | |
| st.subheader("Cloud Provider Pricing (Optional)") | |
| # AWS Pricing | |
| with st.expander("☁️ Customize AWS Costs", expanded=False): | |
| st.markdown("**Adjust AWS pricing (per hour in USD)**") | |
| st.markdown("##### Compute Nodes") | |
| aws_standard_node = st.number_input( | |
| "m5.2xlarge (8 vCPU, 32GB)", | |
| min_value=0.01, | |
| max_value=2.00, | |
| value=0.384, | |
| step=0.01, | |
| format="%.3f", | |
| key="aws_standard", | |
| help="Default: $0.384/hr" | |
| ) | |
| aws_vectordb_node = st.number_input( | |
| "m5.4xlarge (16 vCPU, 64GB)", | |
| min_value=0.01, | |
| max_value=4.00, | |
| value=0.768, | |
| step=0.01, | |
| format="%.3f", | |
| key="aws_vectordb", | |
| help="Default: $0.768/hr" | |
| ) | |
| aws_jump_host = st.number_input( | |
| "m5.large (2 vCPU, 8GB)", | |
| min_value=0.01, | |
| max_value=0.50, | |
| value=0.096, | |
| step=0.01, | |
| format="%.3f", | |
| key="aws_jump", | |
| help="Default: $0.096/hr" | |
| ) | |
| aws_k8s_management = st.number_input( | |
| "EKS Management Cost", | |
| min_value=0.0, | |
| max_value=0.50, | |
| value=0.10, | |
| step=0.01, | |
| format="%.3f", | |
| key="aws_k8s", | |
| help="Default: $0.10/hr" | |
| ) | |
| st.markdown("##### GPU Pricing") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| aws_h200 = st.number_input("H200 141GB", value=15.70, step=0.10, format="%.2f", key="aws_h200") | |
| aws_h100 = st.number_input("H100 80GB", value=6.01, step=0.10, format="%.2f", key="aws_h100") | |
| aws_a100_80 = st.number_input("A100 80GB", value=3.43, step=0.10, format="%.2f", key="aws_a100_80") | |
| with col2: | |
| aws_a100_40 = st.number_input("A100 40GB", value=2.75, step=0.10, format="%.2f", key="aws_a100_40") | |
| aws_l40s = st.number_input("L40S", value=1.67, step=0.10, format="%.2f", key="aws_l40s") | |
| # Azure Pricing | |
| with st.expander("☁️ Customize Azure Costs", expanded=False): | |
| st.markdown("**Adjust Azure pricing (per hour in USD)**") | |
| st.markdown("##### Compute Nodes") | |
| azure_standard_node = st.number_input( | |
| "Standard_D8s_v3 (8 vCPU, 32GB)", | |
| min_value=0.01, | |
| max_value=2.00, | |
| value=0.384, | |
| step=0.01, | |
| format="%.3f", | |
| key="azure_standard", | |
| help="Default: $0.384/hr" | |
| ) | |
| azure_vectordb_node = st.number_input( | |
| "Standard_D16s_v3 (16 vCPU, 64GB)", | |
| min_value=0.01, | |
| max_value=4.00, | |
| value=0.768, | |
| step=0.01, | |
| format="%.3f", | |
| key="azure_vectordb", | |
| help="Default: $0.768/hr" | |
| ) | |
| azure_jump_host = st.number_input( | |
| "Standard_D2s_v3 (2 vCPU, 8GB)", | |
| min_value=0.01, | |
| max_value=0.50, | |
| value=0.096, | |
| step=0.01, | |
| format="%.3f", | |
| key="azure_jump", | |
| help="Default: $0.096/hr" | |
| ) | |
| azure_k8s_management = st.number_input( | |
| "AKS Management Cost", | |
| min_value=0.0, | |
| max_value=0.50, | |
| value=0.0, | |
| step=0.01, | |
| format="%.3f", | |
| key="azure_k8s", | |
| help="Default: $0.00/hr (Free tier)" | |
| ) | |
| st.markdown("##### GPU Pricing") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| azure_h200 = st.number_input("H200 141GB", value=12.29, step=0.10, format="%.2f", key="azure_h200") | |
| azure_h100 = st.number_input("H100 80GB", value=6.98, step=0.10, format="%.2f", key="azure_h100") | |
| azure_a100_80 = st.number_input("A100 80GB", value=3.67, step=0.10, format="%.2f", key="azure_a100_80") | |
| with col2: | |
| azure_a100_40 = st.number_input("A100 40GB", value=3.67, step=0.10, format="%.2f", key="azure_a100_40") | |
| # GCP Pricing | |
| with st.expander("☁️ Customize GCP Costs", expanded=False): | |
| st.markdown("**Adjust GCP pricing (per hour in USD)**") | |
| st.markdown("##### Compute Nodes") | |
| gcp_standard_node = st.number_input( | |
| "n1-standard-8 (8 vCPU, 30GB)", | |
| min_value=0.01, | |
| max_value=2.00, | |
| value=0.379, | |
| step=0.01, | |
| format="%.3f", | |
| key="gcp_standard", | |
| help="Default: $0.379/hr" | |
| ) | |
| gcp_vectordb_node = st.number_input( | |
| "n1-standard-16 (16 vCPU, 60GB)", | |
| min_value=0.01, | |
| max_value=4.00, | |
| value=0.758, | |
| step=0.01, | |
| format="%.3f", | |
| key="gcp_vectordb", | |
| help="Default: $0.758/hr" | |
| ) | |
| gcp_jump_host = st.number_input( | |
| "e2-medium (2 vCPU, 8GB)", | |
| min_value=0.01, | |
| max_value=0.50, | |
| value=0.067, | |
| step=0.01, | |
| format="%.3f", | |
| key="gcp_jump", | |
| help="Default: $0.067/hr" | |
| ) | |
| gcp_k8s_management = st.number_input( | |
| "GKE Management Cost", | |
| min_value=0.0, | |
| max_value=0.50, | |
| value=0.10, | |
| step=0.01, | |
| format="%.3f", | |
| key="gcp_k8s", | |
| help="Default: $0.10/hr" | |
| ) | |
| st.markdown("##### GPU Pricing") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| gcp_h100 = st.number_input("H100 80GB", value=11.06, step=0.10, format="%.2f", key="gcp_h100") | |
| gcp_a100_80 = st.number_input("A100 80GB", value=2.48, step=0.10, format="%.2f", key="gcp_a100_80") | |
| with col2: | |
| gcp_a100_40 = st.number_input("A100 40GB", value=1.46, step=0.10, format="%.2f", key="gcp_a100_40") | |
| # On-Premise Pricing | |
| with st.expander("🏢 Customize On-Premise Costs", expanded=False): | |
| st.markdown("**Adjust on-premise costs based on your infrastructure**") | |
| st.markdown("##### Compute Nodes (per hour)") | |
| onprem_standard_node = st.number_input( | |
| "Standard Node (8 vCPU, 32GB)", | |
| min_value=0.01, | |
| max_value=1.00, | |
| value=0.192, | |
| step=0.01, | |
| format="%.3f", | |
| key="onprem_standard", | |
| help="Cost per hour for standard compute nodes (default: $0.192)" | |
| ) | |
| onprem_vectordb_node = st.number_input( | |
| "VectorDB Node (16 vCPU, 64GB)", | |
| min_value=0.01, | |
| max_value=2.00, | |
| value=0.384, | |
| step=0.01, | |
| format="%.3f", | |
| key="onprem_vectordb", | |
| help="Cost per hour for high-memory VectorDB nodes (default: $0.384)" | |
| ) | |
| onprem_jump_host = st.number_input( | |
| "Jump Host (2 vCPU, 8GB)", | |
| min_value=0.01, | |
| max_value=0.50, | |
| value=0.048, | |
| step=0.01, | |
| format="%.3f", | |
| key="onprem_jump", | |
| help="Cost per hour for jump host (default: $0.048)" | |
| ) | |
| st.markdown("##### GPU Pricing Multiplier") | |
| onprem_gpu_multiplier = st.slider( | |
| "GPU Cost Multiplier (% of AWS)", | |
| min_value=30, | |
| max_value=100, | |
| value=55, | |
| step=5, | |
| key="onprem_gpu_mult", | |
| help="Percentage of AWS GPU pricing for on-premise (default: 55%)" | |
| ) / 100 | |
| st.markdown("##### Additional Services (per hour)") | |
| onprem_network = st.number_input( | |
| "Network Infrastructure", | |
| min_value=0.0, | |
| max_value=0.10, | |
| value=0.020, | |
| step=0.005, | |
| format="%.3f", | |
| key="onprem_network", | |
| help="Switches, routers, firewalls (default: $0.020)" | |
| ) | |
| onprem_storage_per_gb = st.number_input( | |
| "Storage (per GB per month)", | |
| min_value=0.01, | |
| max_value=0.20, | |
| value=0.05, | |
| step=0.01, | |
| format="%.3f", | |
| key="onprem_storage", | |
| help="SAN/NAS storage cost (default: $0.05/GB/month)" | |
| ) | |
| onprem_load_balancer = st.number_input( | |
| "Hardware Load Balancer", | |
| min_value=0.0, | |
| max_value=0.05, | |
| value=0.010, | |
| step=0.005, | |
| format="%.3f", | |
| key="onprem_lb", | |
| help="Load balancer amortized cost (default: $0.010)" | |
| ) | |
| onprem_power_cooling = st.number_input( | |
| "Power & Cooling", | |
| min_value=0.01, | |
| max_value=0.10, | |
| value=0.030, | |
| step=0.005, | |
| format="%.3f", | |
| key="onprem_power", | |
| help="Datacenter power and cooling (default: $0.030)" | |
| ) | |
| onprem_datacenter_space = st.number_input( | |
| "Datacenter Space", | |
| min_value=0.0, | |
| max_value=0.05, | |
| value=0.015, | |
| step=0.005, | |
| format="%.3f", | |
| key="onprem_space", | |
| help="Rack space and facilities (default: $0.015)" | |
| ) | |
| onprem_maintenance = st.number_input( | |
| "Maintenance & Support", | |
| min_value=0.0, | |
| max_value=0.10, | |
| value=0.025, | |
| step=0.005, | |
| format="%.3f", | |
| key="onprem_maint", | |
| help="Hardware maintenance contracts (default: $0.025)" | |
| ) | |
| onprem_k8s_management = st.number_input( | |
| "K8s Management Cost", | |
| min_value=0.0, | |
| max_value=0.20, | |
| value=0.05, | |
| step=0.01, | |
| format="%.3f", | |
| key="onprem_k8s", | |
| help="Self-managed K8s operational cost (default: $0.05)" | |
| ) | |
| # Reset button | |
| if st.button("🔄 Reset All Pricing to Defaults", type="secondary"): | |
| st.rerun() | |
| # LLM Configuration | |
| st.subheader("LLM Settings") | |
| selected_model = st.selectbox( | |
| "Select LLM Model", | |
| list(MODELS.keys()), | |
| index=2, # Default to Llama 3.3 70B | |
| help="Choose the LLM model for inference workloads" | |
| ) | |
| selected_gpu = st.selectbox( | |
| "Select GPU Type", | |
| list(GPUS.keys()), | |
| index=1, # Default to H100 80GB | |
| help="GPU type for LLM inference nodes" | |
| ) | |
| precision = st.selectbox( | |
| "Model Precision", | |
| ["FP16", "INT8", "INT4"], | |
| index=0, # Default to FP16 | |
| help="Model precision affects memory usage and quality" | |
| ) | |
| # Workload Configuration | |
| st.subheader("Workload Settings") | |
| conversations_per_minute = st.number_input( | |
| "Conversations per Minute", | |
| min_value=1, | |
| max_value=5000, | |
| value=200, | |
| step=10, | |
| help="Expected conversation throughput across all tenants" | |
| ) | |
| tokens_per_conversation = st.number_input( | |
| "Tokens per Conversation", | |
| min_value=500, | |
| max_value=20000, | |
| value=2000, | |
| step=100, | |
| help="Average tokens per conversation (input + output)" | |
| ) | |
| # Time period | |
| time_period = st.selectbox( | |
| "Cost Calculation Period", | |
| ["Monthly (30 days)", "Weekly (7 days)", "Daily (1 day)", "Hourly"], | |
| index=0 | |
| ) | |
| days_map = { | |
| "Monthly (30 days)": 30, | |
| "Weekly (7 days)": 7, | |
| "Daily (1 day)": 1, | |
| "Hourly": 1/24 | |
| } | |
| days = days_map[time_period] | |
| # Calculate all requirements | |
| infrastructure = calculate_detailed_infrastructure(num_tenants, apps_per_tenant) | |
| # Apply custom pricing - create modified copies to avoid global state issues | |
| def apply_custom_pricing(): | |
| """Apply user-configured pricing to global dictionaries""" | |
| # Update AWS pricing with user-configured values | |
| CLOUD_PRICING['AWS']['cost_per_node_hour'] = aws_standard_node | |
| CLOUD_PRICING['AWS']['vectordb_node']['cost_per_hour'] = aws_vectordb_node | |
| CLOUD_PRICING['AWS']['jump_host']['cost_per_hour'] = aws_jump_host | |
| CLOUD_PRICING['AWS']['managed_k8s_cost'] = aws_k8s_management | |
| # Update AWS GPU pricing | |
| GPUS["H200 141GB"]["pricing"]["aws"] = aws_h200 | |
| GPUS["H100 80GB"]["pricing"]["aws"] = aws_h100 | |
| GPUS["A100 80GB"]["pricing"]["aws"] = aws_a100_80 | |
| GPUS["A100 40GB"]["pricing"]["aws"] = aws_a100_40 | |
| GPUS["L40S"]["pricing"]["aws"] = aws_l40s | |
| # Update Azure pricing with user-configured values | |
| CLOUD_PRICING['Azure']['cost_per_node_hour'] = azure_standard_node | |
| CLOUD_PRICING['Azure']['vectordb_node']['cost_per_hour'] = azure_vectordb_node | |
| CLOUD_PRICING['Azure']['jump_host']['cost_per_hour'] = azure_jump_host | |
| CLOUD_PRICING['Azure']['managed_k8s_cost'] = azure_k8s_management | |
| # Update Azure GPU pricing | |
| GPUS["H200 141GB"]["pricing"]["azure"] = azure_h200 | |
| GPUS["H100 80GB"]["pricing"]["azure"] = azure_h100 | |
| GPUS["A100 80GB"]["pricing"]["azure"] = azure_a100_80 | |
| GPUS["A100 40GB"]["pricing"]["azure"] = azure_a100_40 | |
| # Update GCP pricing with user-configured values | |
| CLOUD_PRICING['GCP']['cost_per_node_hour'] = gcp_standard_node | |
| CLOUD_PRICING['GCP']['vectordb_node']['cost_per_hour'] = gcp_vectordb_node | |
| CLOUD_PRICING['GCP']['jump_host']['cost_per_hour'] = gcp_jump_host | |
| CLOUD_PRICING['GCP']['managed_k8s_cost'] = gcp_k8s_management | |
| # Update GCP GPU pricing | |
| GPUS["H100 80GB"]["pricing"]["gcp"] = gcp_h100 | |
| GPUS["A100 80GB"]["pricing"]["gcp"] = gcp_a100_80 | |
| GPUS["A100 40GB"]["pricing"]["gcp"] = gcp_a100_40 | |
| # Update On-Premise pricing with user-configured values | |
| CLOUD_PRICING['On-Premise']['cost_per_node_hour'] = onprem_standard_node | |
| CLOUD_PRICING['On-Premise']['vectordb_node']['cost_per_hour'] = onprem_vectordb_node | |
| CLOUD_PRICING['On-Premise']['jump_host']['cost_per_hour'] = onprem_jump_host | |
| CLOUD_PRICING['On-Premise']['managed_k8s_cost'] = onprem_k8s_management | |
| # Update on-premise additional services | |
| CLOUD_PRICING['On-Premise']['additional_services'] = { | |
| 'Network_Infrastructure': {'cost_per_hour': onprem_network, 'description': 'Switches, routers, firewalls (amortized)'}, | |
| 'Storage_SAN': {'cost_per_gb_month': onprem_storage_per_gb, 'description': 'SAN/NAS storage (1TB base, amortized)'}, | |
| 'Hardware_Load_Balancer': {'cost_per_hour': onprem_load_balancer, 'description': 'F5/Citrix ADC (amortized)'}, | |
| 'Power_Cooling': {'cost_per_hour': onprem_power_cooling, 'description': 'Datacenter power and cooling'}, | |
| 'Datacenter_Space': {'cost_per_hour': onprem_datacenter_space, 'description': 'Rack space and facilities costs'}, | |
| 'Maintenance_Support': {'cost_per_hour': onprem_maintenance, 'description': 'Hardware maintenance and vendor support contracts'} | |
| } | |
| # Update on-premise GPU pricing based on AWS prices and multiplier | |
| for gpu_name in GPUS.keys(): | |
| if 'aws' in GPUS[gpu_name]['pricing'] and GPUS[gpu_name]['pricing']['aws'] != 'NA': | |
| aws_price = GPUS[gpu_name]['pricing']['aws'] | |
| GPUS[gpu_name]['pricing']['on-premise'] = round(aws_price * onprem_gpu_multiplier, 2) | |
| # Apply all custom pricing | |
| apply_custom_pricing() | |
| precision_bytes = { | |
| "FP16": 2, | |
| "INT8": 1, | |
| "INT4": 0.5 | |
| }[precision] | |
| model_spec = MODELS[selected_model] | |
| gpu_spec = GPUS[selected_gpu] | |
| gpu_requirements = calculate_gpu_requirements( | |
| conversations_per_minute, tokens_per_conversation, | |
| model_spec, gpu_spec, precision_bytes | |
| ) | |
| # Main Dashboard | |
| st.header("📊 Infrastructure Overview") | |
| st.markdown("---") # Visual separator | |
| # Row 1: Core Metrics - Use 4 columns for better spacing | |
| col1, col2, col3, col4 = st.columns(4) | |
| with col1: | |
| st.metric( | |
| label="🏢 Total Tenants", | |
| value=f"{num_tenants}", | |
| help="Number of tenant environments" | |
| ) | |
| with col2: | |
| st.metric( | |
| label="📦 Apps per Tenant", | |
| value=f"{apps_per_tenant}", | |
| help=f"Total applications: {infrastructure['totals']['total_apps']}" | |
| ) | |
| with col3: | |
| st.metric( | |
| label="🖥️ Worker Nodes", | |
| value=f"{infrastructure['totals']['total_nodes']}", | |
| help=f"Standard: {infrastructure['totals']['total_standard_nodes']}, VectorDB: {infrastructure['totals']['total_vectordb_nodes']}" | |
| ) | |
| with col4: | |
| gpu_display = f"{gpu_requirements['actual_gpus_allocated']} GPUs" | |
| if gpu_requirements['best_config']: | |
| gpu_detail = f"({gpu_requirements['best_config']['num_nodes']} nodes)" | |
| else: | |
| gpu_detail = "" | |
| st.metric( | |
| label="🎮 GPU Resources", | |
| value=gpu_display, | |
| delta=gpu_detail, | |
| help=f"Configuration: {gpu_requirements['best_config']['num_nodes']}×{gpu_requirements['best_config']['gpus_per_node']} GPUs" if gpu_requirements['best_config'] else "GPU allocation" | |
| ) | |
| # Row 2: Performance Metrics | |
| col1, col2, col3, col4 = st.columns(4) | |
| with col1: | |
| st.metric( | |
| label="💬 Target Load", | |
| value=f"{conversations_per_minute}", | |
| delta="conv/min", | |
| help="Target conversation throughput" | |
| ) | |
| with col2: | |
| st.metric( | |
| label="📈 Max Capacity", | |
| value=f"{gpu_requirements['max_conversations_per_minute']:.0f}", | |
| delta="conv/min", | |
| help="Maximum system capacity" | |
| ) | |
| with col3: | |
| capacity_headroom = gpu_requirements['max_conversations_per_minute'] - conversations_per_minute | |
| headroom_percentage = (capacity_headroom / gpu_requirements['max_conversations_per_minute']) * 100 | |
| st.metric( | |
| label="📊 Capacity Headroom", | |
| value=f"{headroom_percentage:.1f}%", | |
| delta=f"{capacity_headroom:.0f} conv/min available", | |
| help="Available capacity beyond current target load" | |
| ) | |
| with col4: | |
| bottleneck_icon = "💾" if gpu_requirements['bottleneck'] == 'Memory' else "⚡" | |
| st.metric( | |
| label=f"{bottleneck_icon} Bottleneck", | |
| value=gpu_requirements['bottleneck'], | |
| help="Primary system constraint" | |
| ) | |
| st.markdown("---") # Visual separator | |
| # Create tabs for detailed views | |
| tab1, tab2, tab3, tab4, tab5 = st.tabs([ | |
| "🗏 Platform Infrastructure", | |
| "🖥️ GPU Requirements", | |
| "💰 Cost Analysis", | |
| "📈 Performance Analysis", | |
| "🔧 Technical Specifications" | |
| ]) | |
| with tab1: | |
| st.subheader("Platform Infrastructure Breakdown") | |
| # Show deployment node scaling info | |
| st.info(f"📦 **Deployment Node Scaling**: {infrastructure['totals']['deploy_nodes_per_tenant']} deployment node(s) per tenant for {apps_per_tenant} apps (1 node per 4 apps)") | |
| # Platform nodes breakdown | |
| breakdown_data = [] | |
| for node_type, details in infrastructure['node_breakdown'].items(): | |
| per_tenant_value = details['tenant'] // num_tenants if num_tenants > 0 and details['tenant'] > 0 else 0 | |
| breakdown_data.append({ | |
| 'Node Type': node_type, | |
| 'Base': details['base'] if details['base'] > 0 else '-', | |
| 'Per Tenant': per_tenant_value if per_tenant_value > 0 else '-', | |
| 'Total': details['total'], | |
| 'CPU': details['cores'], | |
| 'RAM (GB)': details['ram'], | |
| 'VM Type': details.get('node_type', 'Standard'), | |
| 'Purpose': details['purpose'] | |
| }) | |
| breakdown_df = pd.DataFrame(breakdown_data) | |
| # Use column configuration for better display | |
| st.dataframe( | |
| breakdown_df, | |
| use_container_width=True, | |
| hide_index=True, | |
| column_config={ | |
| "Node Type": st.column_config.TextColumn("Node Type", width="medium"), | |
| "Base": st.column_config.TextColumn("Base", width="small"), | |
| "Per Tenant": st.column_config.TextColumn("Per Tenant", width="small"), | |
| "Total": st.column_config.NumberColumn("Total", width="small"), | |
| "CPU": st.column_config.NumberColumn("CPU", width="small"), | |
| "RAM (GB)": st.column_config.NumberColumn("RAM (GB)", width="small"), | |
| "VM Type": st.column_config.TextColumn("VM Type", width="medium"), | |
| "Purpose": st.column_config.TextColumn("Purpose", width="large") | |
| } | |
| ) | |
| # Visual breakdown | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| # Node distribution pie chart | |
| node_counts = {node_type: details['total'] | |
| for node_type, details in infrastructure['node_breakdown'].items() | |
| if details['total'] > 0} | |
| fig_nodes = px.pie( | |
| values=list(node_counts.values()), | |
| names=list(node_counts.keys()), | |
| title="Platform Node Distribution" | |
| ) | |
| st.plotly_chart(fig_nodes, use_container_width=True) | |
| with col2: | |
| # Resource distribution | |
| resource_data = [] | |
| for node_type, details in infrastructure['node_breakdown'].items(): | |
| if details['total'] > 0: | |
| resource_data.extend([ | |
| {'Node Type': node_type, 'Resource': 'CPU Cores', 'Amount': details['cores']}, | |
| {'Node Type': node_type, 'Resource': 'RAM (GB)', 'Amount': details['ram']} | |
| ]) | |
| resource_df = pd.DataFrame(resource_data) | |
| fig_resources = px.bar( | |
| resource_df, | |
| x='Node Type', | |
| y='Amount', | |
| color='Resource', | |
| title='Resource Distribution by Node Type', | |
| barmode='group' | |
| ) | |
| st.plotly_chart(fig_resources, use_container_width=True) | |
| # Node type distribution | |
| st.subheader("Node Type Distribution") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.metric( | |
| "Standard Nodes (8 vCPU, 32GB RAM)", | |
| infrastructure['totals']['total_standard_nodes'], | |
| help="Platform, Compute, and Deploy nodes" | |
| ) | |
| with col2: | |
| st.metric( | |
| "High-Memory Nodes (16 vCPU, 64GB RAM)", | |
| infrastructure['totals']['total_vectordb_nodes'], | |
| help="VectorDB nodes with higher memory capacity" | |
| ) | |
| with tab2: | |
| st.subheader("GPU Requirements Analysis") | |
| # GPU requirements metrics | |
| col1, col2, col3, col4 = st.columns(4) | |
| with col1: | |
| st.metric( | |
| "Memory-based GPUs", | |
| gpu_requirements['gpus_needed_memory'], | |
| help="GPUs needed to fit model in memory" | |
| ) | |
| with col2: | |
| st.metric( | |
| "Throughput-based GPUs", | |
| gpu_requirements['gpus_needed_throughput'], | |
| help="GPUs needed for required throughput" | |
| ) | |
| with col3: | |
| st.metric( | |
| "Logical GPUs Needed", | |
| gpu_requirements['total_gpus_needed'], | |
| help="Minimum GPUs needed (before node configuration)" | |
| ) | |
| with col4: | |
| st.metric( | |
| "Actual GPUs Allocated", | |
| gpu_requirements['actual_gpus_allocated'], | |
| help="GPUs allocated based on standard node configurations", | |
| delta=gpu_requirements['actual_gpus_allocated'] - gpu_requirements['total_gpus_needed'] | |
| ) | |
| # GPU Node Configuration Analysis | |
| st.subheader("🖥️ GPU Node Configuration Options") | |
| if gpu_requirements['gpu_configurations']: | |
| # Display configuration options in a table | |
| config_data = [] | |
| for config in gpu_requirements['gpu_configurations']: | |
| efficiency_score = f"{config['utilization']:.1f}%" | |
| memory_compatible = "✅" if config['meets_memory_req'] else "❌" | |
| config_data.append({ | |
| "GPUs/Node": config['gpus_per_node'], | |
| "Mem": memory_compatible, | |
| "Nodes": config['num_nodes'], | |
| "Total GPUs": config['total_gpus_allocated'], | |
| "GPU Util": efficiency_score, | |
| "Waste": config['gpu_waste'], | |
| "Mem Util": f"{config['memory_utilization']:.1f}%" | |
| }) | |
| config_df = pd.DataFrame(config_data) | |
| st.dataframe( | |
| config_df, | |
| use_container_width=True, | |
| hide_index=True, | |
| column_config={ | |
| "GPUs/Node": st.column_config.NumberColumn("GPUs/Node", width="small"), | |
| "Mem": st.column_config.TextColumn("Mem ✓", width="small"), | |
| "Nodes": st.column_config.NumberColumn("Nodes", width="small"), | |
| "Total GPUs": st.column_config.NumberColumn("Total GPUs", width="small"), | |
| "GPU Util": st.column_config.TextColumn("GPU Util", width="small"), | |
| "Waste": st.column_config.NumberColumn("Waste", width="small"), | |
| "Mem Util": st.column_config.TextColumn("Mem Util", width="small") | |
| } | |
| ) | |
| # Highlight the recommended configuration | |
| if gpu_requirements['best_config']: | |
| best = gpu_requirements['best_config'] | |
| st.success(f"💡 **Recommended Configuration**: {best['num_nodes']} nodes × {best['gpus_per_node']} GPUs = {best['total_gpus_allocated']} total GPUs ({best['utilization']:.1f}% utilization)") | |
| # Show minimum requirement info | |
| st.info(f"**Memory Constraint**: Minimum {gpu_requirements['min_gpus_per_node']} GPUs per node required to fit {gpu_requirements['model_memory_gb']:.1f}GB model in {gpu_spec['memory']}GB GPU memory") | |
| # GPU configuration visualization | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| # Node configuration comparison | |
| if gpu_requirements['gpu_configurations']: | |
| config_chart_data = pd.DataFrame(gpu_requirements['gpu_configurations']) | |
| fig_configs = px.bar( | |
| config_chart_data, | |
| x='gpus_per_node', | |
| y='utilization', | |
| title='GPU Utilization by Node Configuration', | |
| labels={'gpus_per_node': 'GPUs per Node', 'utilization': 'Utilization (%)'} | |
| ) | |
| st.plotly_chart(fig_configs, use_container_width=True) | |
| with col2: | |
| # GPU allocation vs requirement | |
| allocation_data = pd.DataFrame({ | |
| 'Metric': ['Required GPUs', 'Allocated GPUs'], | |
| 'Count': [gpu_requirements['total_gpus_needed'], gpu_requirements['actual_gpus_allocated']] | |
| }) | |
| fig_allocation = px.bar( | |
| allocation_data, | |
| x='Metric', | |
| y='Count', | |
| title='GPU Allocation vs Requirement', | |
| color='Metric' | |
| ) | |
| st.plotly_chart(fig_allocation, use_container_width=True) | |
| # Model and GPU specifications | |
| st.subheader("🔧 Model & GPU Specifications") | |
| # GPU configuration table | |
| gpu_config_data = [{ | |
| 'Model': selected_model, | |
| 'Parameters': f"{model_spec['params']}B ({model_spec['active_params']}B active)" if model_spec['params'] != model_spec['active_params'] else f"{model_spec['params']}B", | |
| 'Model Memory Required': f"{gpu_requirements['model_memory_gb']:.1f} GB", | |
| 'GPU Type': selected_gpu, | |
| 'GPU Memory per Unit': f"{gpu_spec['memory']} GB", | |
| 'GPUs Required (Logic)': gpu_requirements['total_gpus_needed'], | |
| 'GPUs Allocated (Actual)': gpu_requirements['actual_gpus_allocated'], | |
| 'GPU Nodes': f"{gpu_requirements['best_config']['num_nodes']} nodes × {gpu_requirements['best_config']['gpus_per_node']} GPUs" if gpu_requirements['best_config'] else 'N/A', | |
| 'Total GPU Memory': f"{gpu_requirements['actual_gpus_allocated'] * gpu_spec['memory']} GB", | |
| 'Memory Utilization': f"{(gpu_requirements['model_memory_gb'] / (gpu_requirements['actual_gpus_allocated'] * gpu_spec['memory']) * 100):.1f}%", | |
| 'Precision': precision | |
| }] | |
| gpu_config_df = pd.DataFrame(gpu_config_data) | |
| st.dataframe(gpu_config_df, use_container_width=True) | |
| # Performance metrics | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| # TPS comparison | |
| tps_data = pd.DataFrame({ | |
| 'Metric': ['Required TPS', 'Single GPU TPS', 'Total System TPS'], | |
| 'Value': [ | |
| gpu_requirements['required_tps'], | |
| gpu_requirements['estimated_tps'], | |
| gpu_requirements['total_system_tps'] | |
| ] | |
| }) | |
| fig_tps = px.bar( | |
| tps_data, | |
| x='Metric', | |
| y='Value', | |
| title='Tokens Per Second Analysis', | |
| color='Metric' | |
| ) | |
| st.plotly_chart(fig_tps, use_container_width=True) | |
| with col2: | |
| # Capacity utilization | |
| utilization_data = pd.DataFrame({ | |
| 'Metric': ['Required Capacity', 'Available Capacity'], | |
| 'Conversations/Min': [ | |
| conversations_per_minute, | |
| gpu_requirements['max_conversations_per_minute'] | |
| ] | |
| }) | |
| fig_capacity = px.bar( | |
| utilization_data, | |
| x='Metric', | |
| y='Conversations/Min', | |
| title='Conversation Capacity Analysis', | |
| color='Metric' | |
| ) | |
| st.plotly_chart(fig_capacity, use_container_width=True) | |
| with tab3: | |
| st.subheader("Comprehensive Cost Analysis") | |
| # Show customization status for all providers | |
| default_values = { | |
| 'aws': {'standard': 0.384, 'vectordb': 0.768, 'jump': 0.096, 'k8s': 0.10}, | |
| 'azure': {'standard': 0.384, 'vectordb': 0.768, 'jump': 0.096, 'k8s': 0.0}, | |
| 'gcp': {'standard': 0.379, 'vectordb': 0.758, 'jump': 0.067, 'k8s': 0.10}, | |
| 'onprem': {'standard': 0.192, 'vectordb': 0.384, 'jump': 0.048, 'gpu_mult': 0.55, 'k8s': 0.05} | |
| } | |
| customizations = [] | |
| # Check AWS customizations | |
| if (aws_standard_node != default_values['aws']['standard'] or | |
| aws_vectordb_node != default_values['aws']['vectordb'] or | |
| aws_k8s_management != default_values['aws']['k8s']): | |
| customizations.append("AWS") | |
| # Check Azure customizations | |
| if (azure_standard_node != default_values['azure']['standard'] or | |
| azure_vectordb_node != default_values['azure']['vectordb'] or | |
| azure_k8s_management != default_values['azure']['k8s']): | |
| customizations.append("Azure") | |
| # Check GCP customizations | |
| if (gcp_standard_node != default_values['gcp']['standard'] or | |
| gcp_vectordb_node != default_values['gcp']['vectordb'] or | |
| gcp_k8s_management != default_values['gcp']['k8s']): | |
| customizations.append("GCP") | |
| # Check On-Premise customizations | |
| if (onprem_standard_node != default_values['onprem']['standard'] or | |
| onprem_vectordb_node != default_values['onprem']['vectordb'] or | |
| onprem_gpu_multiplier != default_values['onprem']['gpu_mult'] or | |
| onprem_k8s_management != default_values['onprem']['k8s']): | |
| customizations.append("On-Premise") | |
| if customizations: | |
| st.warning(f""" | |
| **✏️ Custom Pricing Active for: {', '.join(customizations)}** | |
| Using user-configured pricing instead of defaults. View details in Technical Specifications tab or adjust in sidebar. | |
| """) | |
| # Add info box about cost models | |
| st.info(""" | |
| **💡 Cost Model Information**: | |
| - **Cloud Providers (AWS/Azure/GCP)**: Pay-as-you-go pricing with per-hour compute and GPU costs | |
| - **On-Premise**: Hardware amortized over 3-year lifecycle + operating costs (power, cooling, maintenance) | |
| - **Customization**: All pricing values can be adjusted in the sidebar to match your actual costs | |
| **🔧 Customize:** Use the sidebar "Cloud Provider Pricing" sections to adjust costs | |
| """) | |
| # Calculate costs for all providers | |
| all_costs = {} | |
| for provider in CLOUD_PRICING.keys(): | |
| all_costs[provider] = calculate_detailed_costs( | |
| provider, infrastructure, gpu_requirements, gpu_spec, days | |
| ) | |
| # Cost comparison table | |
| cost_comparison_data = [] | |
| for provider, costs in all_costs.items(): | |
| gpu_available = costs['totals']['gpu_available'] | |
| cost_comparison_data.append({ | |
| 'Provider': provider, | |
| 'GPU': '✅' if gpu_available else '❌', | |
| 'Platform': f"${costs['totals']['platform_cost']:.2f}", | |
| 'GPU Cost': format_cost_for_display(costs['totals']['gpu_cost'], gpu_available), | |
| 'Total': format_cost_for_display(costs['totals']['total_cost'], gpu_available), | |
| 'Per Hour': format_cost_for_display(costs['totals']['cost_per_hour'], gpu_available), | |
| 'Per Day': format_cost_for_display(costs['totals']['cost_per_day'], gpu_available), | |
| 'Total_Numeric': costs['totals']['total_cost'] if gpu_available else None, | |
| 'GPU_Available': gpu_available | |
| }) | |
| cost_df = pd.DataFrame(cost_comparison_data) | |
| display_cost_df = cost_df.drop(['Total_Numeric', 'GPU_Available'], axis=1) | |
| st.dataframe( | |
| display_cost_df, | |
| use_container_width=True, | |
| hide_index=True, | |
| column_config={ | |
| "Provider": st.column_config.TextColumn("Provider", width="medium"), | |
| "GPU": st.column_config.TextColumn("GPU ✓", width="small"), | |
| "Platform": st.column_config.TextColumn("Platform Cost", width="medium"), | |
| "GPU Cost": st.column_config.TextColumn("GPU Cost", width="medium"), | |
| "Total": st.column_config.TextColumn("Total Cost", width="medium"), | |
| "Per Hour": st.column_config.TextColumn("$/Hour", width="medium"), | |
| "Per Day": st.column_config.TextColumn("$/Day", width="medium") | |
| } | |
| ) | |
| # Add download button for cost report | |
| report_data = create_downloadable_cost_report( | |
| all_costs, infrastructure, gpu_requirements, model_spec, gpu_spec, | |
| selected_model, selected_gpu, num_tenants, apps_per_tenant, conversations_per_minute, | |
| tokens_per_conversation, precision, time_period | |
| ) | |
| st.download_button( | |
| label="📥 Download Complete Cost Report (JSON)", | |
| data=json.dumps(report_data, indent=2), | |
| file_name=f"llmops_cost_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json", | |
| mime="application/json", | |
| help="Download comprehensive cost analysis with all services and configurations" | |
| ) | |
| # Create CSV version for easier viewing | |
| csv_data = [] | |
| for provider, provider_data in report_data['cost_breakdown_by_provider'].items(): | |
| if provider_data['gpu_available']: | |
| csv_data.append({ | |
| 'Provider': provider, | |
| 'GPU_Available': 'Yes', | |
| 'Platform_Nodes': infrastructure['totals']['total_standard_nodes'], | |
| 'VectorDB_Nodes': infrastructure['totals']['total_vectordb_nodes'], | |
| 'GPU_Nodes': gpu_requirements['total_gpus_needed'], | |
| 'Kubernetes_Nodes_Cost': provider_data['platform_costs']['kubernetes_nodes'], | |
| 'VectorDB_Nodes_Cost': provider_data['platform_costs']['vectordb_nodes'], | |
| 'Jump_Host_Cost': provider_data['platform_costs']['jump_host'], | |
| 'Additional_Services_Cost': provider_data['platform_costs']['additional_services'], | |
| 'K8s_Management_Cost': provider_data['platform_costs']['k8s_management'], | |
| 'Total_Platform_Cost': provider_data['platform_costs']['platform_total'], | |
| 'GPU_Cost_Per_Hour': provider_data['gpu_costs']['gpu_cost_per_hour'], | |
| 'Total_GPU_Cost': provider_data['gpu_costs']['total_gpu_cost'], | |
| 'Total_Infrastructure_Cost': provider_data['totals']['total_cost'], | |
| 'Cost_Per_Hour': provider_data['totals']['cost_per_hour'], | |
| 'Cost_Per_Day': provider_data['totals']['cost_per_day'] | |
| }) | |
| else: | |
| csv_data.append({ | |
| 'Provider': provider, | |
| 'GPU_Available': 'No', | |
| 'Platform_Nodes': infrastructure['totals']['total_standard_nodes'], | |
| 'VectorDB_Nodes': infrastructure['totals']['total_vectordb_nodes'], | |
| 'GPU_Nodes': 'N/A', | |
| 'Kubernetes_Nodes_Cost': provider_data['platform_costs']['kubernetes_nodes'], | |
| 'VectorDB_Nodes_Cost': provider_data['platform_costs']['vectordb_nodes'], | |
| 'Jump_Host_Cost': provider_data['platform_costs']['jump_host'], | |
| 'Additional_Services_Cost': provider_data['platform_costs']['additional_services'], | |
| 'K8s_Management_Cost': provider_data['platform_costs']['k8s_management'], | |
| 'Total_Platform_Cost': provider_data['platform_costs']['platform_total'], | |
| 'GPU_Cost_Per_Hour': 'N/A', | |
| 'Total_GPU_Cost': 'N/A', | |
| 'Total_Infrastructure_Cost': 'N/A', | |
| 'Cost_Per_Hour': 'N/A', | |
| 'Cost_Per_Day': 'N/A' | |
| }) | |
| csv_df = pd.DataFrame(csv_data) | |
| csv_string = csv_df.to_csv(index=False) | |
| st.download_button( | |
| label="📊 Download Cost Summary (CSV)", | |
| data=csv_string, | |
| file_name=f"llmops_cost_summary_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv", | |
| mime="text/csv", | |
| help="Download cost summary in CSV format for spreadsheet analysis" | |
| ) | |
| # Cost breakdown visualization - only for providers with GPU available | |
| available_providers_data = cost_df[cost_df['GPU_Available'] == True] | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| # Provider comparison - only available providers | |
| if not available_providers_data.empty: | |
| fig_provider_comparison = px.bar( | |
| available_providers_data, | |
| x='Provider', | |
| y='Total_Numeric', | |
| title=f'Total Cost Comparison ({time_period}) - All Deployment Options', | |
| labels={'Total_Numeric': 'Total Cost (USD)'}, | |
| color='Provider' | |
| ) | |
| st.plotly_chart(fig_provider_comparison, use_container_width=True) | |
| else: | |
| st.warning("⚠️ No providers have the selected GPU available for cost comparison") | |
| with col2: | |
| # Cost breakdown for selected provider (cheapest available) | |
| available_providers = get_available_providers_for_gpu(gpu_spec) | |
| if available_providers: | |
| cheapest_provider = min(available_providers, | |
| key=lambda x: all_costs[x]['totals']['total_cost']) | |
| cheapest_costs = all_costs[cheapest_provider] | |
| breakdown_values = [ | |
| cheapest_costs['totals']['platform_cost'], | |
| cheapest_costs['totals']['gpu_cost'] | |
| ] | |
| breakdown_labels = ['Platform Infrastructure', 'GPU Infrastructure'] | |
| fig_breakdown = px.pie( | |
| values=breakdown_values, | |
| names=breakdown_labels, | |
| title=f'{cheapest_provider} - Cost Breakdown' | |
| ) | |
| st.plotly_chart(fig_breakdown, use_container_width=True) | |
| else: | |
| st.warning("⚠️ No providers have the selected GPU available") | |
| # Detailed cost breakdown for cheapest available provider | |
| available_providers = get_available_providers_for_gpu(gpu_spec) | |
| if available_providers: | |
| cheapest_provider = min(available_providers, | |
| key=lambda x: all_costs[x]['totals']['total_cost']) | |
| st.subheader(f"💡 Most Cost-Effective Option: {cheapest_provider}") | |
| if cheapest_provider == 'On-Premise': | |
| st.success(f"✅ **On-Premise deployment offers the lowest cost** with {selected_gpu}") | |
| st.info("💰 **Note**: On-premise costs assume 3-year hardware amortization. Initial capex and datacenter setup costs are not included in hourly rates.") | |
| else: | |
| st.info(f"✅ **{selected_gpu} is available on {cheapest_provider}**") | |
| cheapest_costs = all_costs[cheapest_provider] | |
| col1, col2, col3 = st.columns(3) | |
| with col1: | |
| st.metric( | |
| "Platform Infrastructure", | |
| f"${cheapest_costs['totals']['platform_cost']:.2f}", | |
| help="Kubernetes nodes (including VectorDB), networking, storage, management" | |
| ) | |
| with col2: | |
| st.metric( | |
| "GPU Infrastructure", | |
| f"${cheapest_costs['totals']['gpu_cost']:.2f}", | |
| help=f"{gpu_requirements['total_gpus_needed']} x {selected_gpu}" | |
| ) | |
| with col3: | |
| # Calculate savings compared to most expensive available provider | |
| if len(available_providers) > 1: | |
| most_expensive_available = max(available_providers, | |
| key=lambda x: all_costs[x]['totals']['total_cost']) | |
| savings = all_costs[most_expensive_available]['totals']['total_cost'] - cheapest_costs['totals']['total_cost'] | |
| savings_pct = (savings / all_costs[most_expensive_available]['totals']['total_cost']) * 100 | |
| st.metric( | |
| "Potential Savings", | |
| f"${savings:.2f}", | |
| help=f"Savings compared to {most_expensive_available} ({savings_pct:.1f}%)" | |
| ) | |
| else: | |
| st.metric( | |
| "Provider Status", | |
| "Only Option", | |
| help="This is the only provider with the selected GPU available" | |
| ) | |
| # Cloud vs On-Premise comparison if both are available | |
| if 'On-Premise' in available_providers and len(available_providers) > 1: | |
| st.subheader("☁️ Cloud vs 🏢 On-Premise Comparison") | |
| onprem_cost = all_costs['On-Premise']['totals']['total_cost'] | |
| cloud_providers = [p for p in available_providers if p != 'On-Premise'] | |
| comparison_data = [] | |
| for provider in ['On-Premise'] + cloud_providers: | |
| comparison_data.append({ | |
| 'Deployment Type': 'On-Premise' if provider == 'On-Premise' else 'Cloud', | |
| 'Provider': provider, | |
| 'Total Cost': all_costs[provider]['totals']['total_cost'], | |
| 'Platform Cost': all_costs[provider]['totals']['platform_cost'], | |
| 'GPU Cost': all_costs[provider]['totals']['gpu_cost'] | |
| }) | |
| comp_df = pd.DataFrame(comparison_data) | |
| # Create grouped bar chart | |
| fig_comparison = go.Figure() | |
| fig_comparison.add_trace(go.Bar( | |
| name='Platform Cost', | |
| x=comp_df['Provider'], | |
| y=comp_df['Platform Cost'], | |
| marker_color='lightblue' | |
| )) | |
| fig_comparison.add_trace(go.Bar( | |
| name='GPU Cost', | |
| x=comp_df['Provider'], | |
| y=comp_df['GPU Cost'], | |
| marker_color='orange' | |
| )) | |
| fig_comparison.update_layout( | |
| title='Cost Breakdown: On-Premise vs Cloud', | |
| xaxis_title='Provider', | |
| yaxis_title='Cost (USD)', | |
| barmode='stack' | |
| ) | |
| st.plotly_chart(fig_comparison, use_container_width=True) | |
| # Calculate average cloud cost | |
| avg_cloud_cost = sum([all_costs[p]['totals']['total_cost'] for p in cloud_providers]) / len(cloud_providers) | |
| cloud_savings = avg_cloud_cost - onprem_cost | |
| cloud_savings_pct = (cloud_savings / avg_cloud_cost) * 100 | |
| if cloud_savings > 0: | |
| st.success(f"💰 **On-Premise Savings**: ${cloud_savings:.2f} ({cloud_savings_pct:.1f}%) compared to average cloud cost over {time_period}") | |
| else: | |
| st.info(f"☁️ **Cloud is more cost-effective** for this configuration over {time_period}") | |
| else: | |
| st.error(f"❌ **No Providers Available**: The selected GPU ({selected_gpu}) is not available on any deployment option") | |
| st.warning("**Recommendation**: Please select a different GPU model that is available") | |
| # Show which GPUs are available on which providers | |
| st.subheader("🔍 GPU Availability by Provider") | |
| availability_data = [] | |
| for gpu_name, gpu_data in GPUS.items(): | |
| available_on = get_available_providers_for_gpu(gpu_data) | |
| availability_data.append({ | |
| 'GPU Model': gpu_name, | |
| 'Memory': f"{gpu_data['memory']} GB", | |
| 'Available On': ', '.join(available_on) if available_on else 'None', | |
| 'Deployment Options': len(available_on) | |
| }) | |
| availability_df = pd.DataFrame(availability_data) | |
| availability_df = availability_df.sort_values('Deployment Options', ascending=False) | |
| st.dataframe(availability_df, use_container_width=True) | |
| with tab4: | |
| st.subheader("Performance Analysis & Scaling") | |
| # Performance metrics | |
| col1, col2, col3 = st.columns(3) | |
| with col1: | |
| st.metric( | |
| "Total System TPS", | |
| f"{gpu_requirements['total_system_tps']:.0f}", | |
| help="Combined throughput of all GPUs" | |
| ) | |
| with col2: | |
| st.metric( | |
| "Conversation Capacity", | |
| f"{gpu_requirements['max_conversations_per_minute']:.0f}/min", | |
| help="Maximum conversations the system can handle" | |
| ) | |
| with col3: | |
| capacity_headroom = gpu_requirements['max_conversations_per_minute'] - conversations_per_minute | |
| headroom_percentage = (capacity_headroom / gpu_requirements['max_conversations_per_minute']) * 100 | |
| st.metric( | |
| "Capacity Headroom", | |
| f"{headroom_percentage:.1f}%", | |
| delta=f"{capacity_headroom:.0f} conv/min available", | |
| help="Available capacity beyond current target load" | |
| ) | |
| # Scaling analysis | |
| st.subheader("Scaling Analysis") | |
| # Create scaling scenarios | |
| scaling_scenarios = [0.5, 1.0, 1.5, 2.0, 3.0, 5.0] | |
| scaling_data = [] | |
| for multiplier in scaling_scenarios: | |
| scaled_conversations = int(conversations_per_minute * multiplier) | |
| scaled_gpu_reqs = calculate_gpu_requirements( | |
| scaled_conversations, tokens_per_conversation, | |
| model_spec, gpu_spec, precision_bytes | |
| ) | |
| scaling_data.append({ | |
| 'Load Multiplier': f"{multiplier}x", | |
| 'Conversations/Min': scaled_conversations, | |
| 'Logical GPUs': scaled_gpu_reqs['total_gpus_needed'], | |
| 'Allocated GPUs': scaled_gpu_reqs['actual_gpus_allocated'], | |
| 'GPU Nodes': f"{scaled_gpu_reqs['best_config']['num_nodes']}×{scaled_gpu_reqs['best_config']['gpus_per_node']}" if scaled_gpu_reqs['best_config'] else 'N/A', | |
| 'System Capacity': f"{scaled_gpu_reqs['max_conversations_per_minute']:.0f}", | |
| 'Headroom %': f"{((scaled_gpu_reqs['max_conversations_per_minute'] - scaled_conversations) / scaled_gpu_reqs['max_conversations_per_minute'] * 100):.1f}%" | |
| }) | |
| scaling_df = pd.DataFrame(scaling_data) | |
| st.dataframe( | |
| scaling_df, | |
| use_container_width=True, | |
| hide_index=True, | |
| column_config={ | |
| "Load Multiplier": st.column_config.TextColumn("Load", width="small"), | |
| "Conversations/Min": st.column_config.NumberColumn("Conv/Min", width="small"), | |
| "Logical GPUs": st.column_config.NumberColumn("Logical", width="small"), | |
| "Allocated GPUs": st.column_config.NumberColumn("Allocated", width="small"), | |
| "GPU Nodes": st.column_config.TextColumn("GPU Nodes", width="medium"), | |
| "System Capacity": st.column_config.TextColumn("Capacity", width="medium"), | |
| "Headroom %": st.column_config.TextColumn("Headroom %", width="small") | |
| } | |
| ) | |
| # Scaling visualization | |
| fig_scaling = go.Figure() | |
| # Add lines for both logical and allocated GPUs | |
| fig_scaling.add_trace(go.Scatter( | |
| x=[float(x.replace('x', '')) for x in scaling_df['Load Multiplier']], | |
| y=scaling_df['Logical GPUs'].astype(int), | |
| mode='lines+markers', | |
| name='Logical GPUs Required', | |
| line=dict(color='blue', dash='dash') | |
| )) | |
| fig_scaling.add_trace(go.Scatter( | |
| x=[float(x.replace('x', '')) for x in scaling_df['Load Multiplier']], | |
| y=scaling_df['Allocated GPUs'].astype(int), | |
| mode='lines+markers', | |
| name='Allocated GPUs (Actual)', | |
| line=dict(color='red') | |
| )) | |
| fig_scaling.update_layout( | |
| title='GPU Scaling Requirements (Logical vs Allocated)', | |
| xaxis_title='Load Multiplier', | |
| yaxis_title='Number of GPUs' | |
| ) | |
| st.plotly_chart(fig_scaling, use_container_width=True) | |
| # Application scaling analysis | |
| st.subheader("Application Scaling Analysis") | |
| app_scaling_scenarios = [4, 8, 12, 16, 20, 24, 32, 40] | |
| app_scaling_data = [] | |
| for apps in app_scaling_scenarios: | |
| deploy_nodes = math.ceil(apps / 4) | |
| app_scaling_data.append({ | |
| 'Apps per Tenant': apps, | |
| 'Total Apps': apps * num_tenants, | |
| 'Deploy Nodes per Tenant': deploy_nodes, | |
| 'Total Deploy Nodes': deploy_nodes * num_tenants, | |
| 'Deploy Node Ratio': f"1:{4 if apps >= 4 else apps}" | |
| }) | |
| app_scaling_df = pd.DataFrame(app_scaling_data) | |
| st.dataframe( | |
| app_scaling_df, | |
| use_container_width=True, | |
| hide_index=True, | |
| column_config={ | |
| "Apps per Tenant": st.column_config.NumberColumn("Apps/Tenant", width="small"), | |
| "Total Apps": st.column_config.NumberColumn("Total Apps", width="small"), | |
| "Deploy Nodes per Tenant": st.column_config.NumberColumn("Deploy/Tenant", width="small"), | |
| "Total Deploy Nodes": st.column_config.NumberColumn("Total Deploy", width="medium"), | |
| "Deploy Node Ratio": st.column_config.TextColumn("Ratio", width="small") | |
| } | |
| ) | |
| # App scaling visualization | |
| fig_app_scaling = px.line( | |
| app_scaling_df, | |
| x='Apps per Tenant', | |
| y='Total Deploy Nodes', | |
| title='Deployment Nodes Scaling with Application Count', | |
| markers=True | |
| ) | |
| st.plotly_chart(fig_app_scaling, use_container_width=True) | |
| with tab5: | |
| st.subheader("Technical Specifications") | |
| # Model specifications | |
| st.markdown("### 🤖 LLM Model Specifications") | |
| model_specs_data = [{ | |
| 'Property': 'Model Name', | |
| 'Value': selected_model | |
| }, { | |
| 'Property': 'Organization', | |
| 'Value': model_spec['org'] | |
| }, { | |
| 'Property': 'Total Parameters', | |
| 'Value': f"{model_spec['params']}B" | |
| }, { | |
| 'Property': 'Active Parameters', | |
| 'Value': f"{model_spec['active_params']}B" | |
| }, { | |
| 'Property': 'Max Context Length', | |
| 'Value': f"{model_spec['max_context']:,} tokens" | |
| }, { | |
| 'Property': 'Base TPS', | |
| 'Value': f"{model_spec['base_tps']:,}" | |
| }, { | |
| 'Property': 'License', | |
| 'Value': model_spec['license'] | |
| }, { | |
| 'Property': 'Architecture Type', | |
| 'Value': 'Mixture of Experts (MoE)' if model_spec['params'] != model_spec['active_params'] else 'Dense Model' | |
| }] | |
| model_specs_df = pd.DataFrame(model_specs_data) | |
| st.dataframe(model_specs_df, use_container_width=True) | |
| # GPU specifications | |
| st.markdown("### 🖥️ GPU Specifications") | |
| gpu_specs_data = [{ | |
| 'Property': 'GPU Model', | |
| 'Value': selected_gpu | |
| }, { | |
| 'Property': 'Memory Capacity', | |
| 'Value': f"{gpu_spec['memory']} GB" | |
| }, { | |
| 'Property': 'Compute Capability', | |
| 'Value': gpu_spec['compute'] | |
| }, { | |
| 'Property': 'TPS Range', | |
| 'Value': f"{gpu_spec['tps_min']:,} - {gpu_spec['tps_max']:,}" | |
| }, { | |
| 'Property': 'Efficiency Tier', | |
| 'Value': gpu_spec['efficiency_tier'] | |
| }, { | |
| 'Property': 'Model Precision', | |
| 'Value': precision | |
| }] | |
| gpu_specs_df = pd.DataFrame(gpu_specs_data) | |
| st.dataframe(gpu_specs_df, use_container_width=True) | |
| # Platform specifications | |
| st.markdown("### 🗏 Platform Infrastructure Specifications") | |
| platform_specs_data = [{ | |
| 'Component': 'Standard K8s Nodes', | |
| 'Specification': f"{infrastructure['totals']['total_standard_nodes']} nodes × 8 vCPUs × 32GB RAM" | |
| }, { | |
| 'Component': 'VectorDB Nodes', | |
| 'Specification': f"{infrastructure['totals']['total_vectordb_nodes']} nodes × 16 vCPUs × 64GB RAM" | |
| }, { | |
| 'Component': 'GPU Nodes', | |
| 'Specification': f"{gpu_requirements['actual_gpus_allocated']} × {selected_gpu} ({gpu_requirements['best_config']['num_nodes']} nodes × {gpu_requirements['best_config']['gpus_per_node']} GPUs)" if gpu_requirements['best_config'] else f"{gpu_requirements['total_gpus_needed']} × {selected_gpu}" | |
| }, { | |
| 'Component': 'Total CPU Cores', | |
| 'Specification': f"{infrastructure['totals']['total_cpu']} cores" | |
| }, { | |
| 'Component': 'Total RAM', | |
| 'Specification': f"{infrastructure['totals']['total_ram']} GB" | |
| }, { | |
| 'Component': 'Total GPU Memory', | |
| 'Specification': f"{gpu_requirements['actual_gpus_allocated'] * gpu_spec['memory']} GB" | |
| }, { | |
| 'Component': 'Applications per Tenant', | |
| 'Specification': f"{apps_per_tenant} apps × {num_tenants} tenants = {infrastructure['totals']['total_apps']} total apps" | |
| }, { | |
| 'Component': 'Deployment Nodes per Tenant', | |
| 'Specification': f"{infrastructure['totals']['deploy_nodes_per_tenant']} node(s) (1 node per 4 apps)" | |
| }] | |
| platform_specs_df = pd.DataFrame(platform_specs_data) | |
| st.dataframe(platform_specs_df, use_container_width=True) | |
| # Provider Pricing Configuration Summary | |
| st.markdown("### 💰 Provider Pricing Configuration") | |
| # Create tabs for each provider | |
| price_tab1, price_tab2, price_tab3, price_tab4 = st.tabs(["AWS", "Azure", "GCP", "On-Premise"]) | |
| with price_tab1: | |
| aws_config_data = [{ | |
| 'Cost Component': 'Standard Compute Node', | |
| 'Specification': 'm5.2xlarge (8 vCPU, 32GB)', | |
| 'Cost per Hour': f"${aws_standard_node:.3f}", | |
| 'Status': '✏️ Custom' if aws_standard_node != 0.384 else '✅ Default' | |
| }, { | |
| 'Cost Component': 'VectorDB Node', | |
| 'Specification': 'm5.4xlarge (16 vCPU, 64GB)', | |
| 'Cost per Hour': f"${aws_vectordb_node:.3f}", | |
| 'Status': '✏️ Custom' if aws_vectordb_node != 0.768 else '✅ Default' | |
| }, { | |
| 'Cost Component': 'Jump Host', | |
| 'Specification': 'm5.large (2 vCPU, 8GB)', | |
| 'Cost per Hour': f"${aws_jump_host:.3f}", | |
| 'Status': '✏️ Custom' if aws_jump_host != 0.096 else '✅ Default' | |
| }, { | |
| 'Cost Component': 'EKS Management', | |
| 'Specification': 'Managed Kubernetes', | |
| 'Cost per Hour': f"${aws_k8s_management:.3f}", | |
| 'Status': '✏️ Custom' if aws_k8s_management != 0.10 else '✅ Default' | |
| }, { | |
| 'Cost Component': 'H200 141GB GPU', | |
| 'Specification': 'Flagship+ GPU', | |
| 'Cost per Hour': f"${aws_h200:.2f}", | |
| 'Status': '✏️ Custom' if aws_h200 != 15.70 else '✅ Default' | |
| }, { | |
| 'Cost Component': 'H100 80GB GPU', | |
| 'Specification': 'Flagship GPU', | |
| 'Cost per Hour': f"${aws_h100:.2f}", | |
| 'Status': '✏️ Custom' if aws_h100 != 6.01 else '✅ Default' | |
| }, { | |
| 'Cost Component': 'A100 80GB GPU', | |
| 'Specification': 'Excellent GPU', | |
| 'Cost per Hour': f"${aws_a100_80:.2f}", | |
| 'Status': '✏️ Custom' if aws_a100_80 != 3.43 else '✅ Default' | |
| }, { | |
| 'Cost Component': 'A100 40GB GPU', | |
| 'Specification': 'Good GPU', | |
| 'Cost per Hour': f"${aws_a100_40:.2f}", | |
| 'Status': '✏️ Custom' if aws_a100_40 != 2.75 else '✅ Default' | |
| }, { | |
| 'Cost Component': 'L40S GPU', | |
| 'Specification': 'Very Good GPU', | |
| 'Cost per Hour': f"${aws_l40s:.2f}", | |
| 'Status': '✏️ Custom' if aws_l40s != 1.67 else '✅ Default' | |
| }] | |
| aws_config_df = pd.DataFrame(aws_config_data) | |
| st.dataframe(aws_config_df, use_container_width=True) | |
| with price_tab2: | |
| azure_config_data = [{ | |
| 'Cost Component': 'Standard Compute Node', | |
| 'Specification': 'Standard_D8s_v3 (8 vCPU, 32GB)', | |
| 'Cost per Hour': f"${azure_standard_node:.3f}", | |
| 'Status': '✏️ Custom' if azure_standard_node != 0.384 else '✅ Default' | |
| }, { | |
| 'Cost Component': 'VectorDB Node', | |
| 'Specification': 'Standard_D16s_v3 (16 vCPU, 64GB)', | |
| 'Cost per Hour': f"${azure_vectordb_node:.3f}", | |
| 'Status': '✏️ Custom' if azure_vectordb_node != 0.768 else '✅ Default' | |
| }, { | |
| 'Cost Component': 'Jump Host', | |
| 'Specification': 'Standard_D2s_v3 (2 vCPU, 8GB)', | |
| 'Cost per Hour': f"${azure_jump_host:.3f}", | |
| 'Status': '✏️ Custom' if azure_jump_host != 0.096 else '✅ Default' | |
| }, { | |
| 'Cost Component': 'AKS Management', | |
| 'Specification': 'Managed Kubernetes (Free)', | |
| 'Cost per Hour': f"${azure_k8s_management:.3f}", | |
| 'Status': '✏️ Custom' if azure_k8s_management != 0.0 else '✅ Default' | |
| }, { | |
| 'Cost Component': 'H200 141GB GPU', | |
| 'Specification': 'Flagship+ GPU', | |
| 'Cost per Hour': f"${azure_h200:.2f}", | |
| 'Status': '✏️ Custom' if azure_h200 != 12.29 else '✅ Default' | |
| }, { | |
| 'Cost Component': 'H100 80GB GPU', | |
| 'Specification': 'Flagship GPU', | |
| 'Cost per Hour': f"${azure_h100:.2f}", | |
| 'Status': '✏️ Custom' if azure_h100 != 6.98 else '✅ Default' | |
| }, { | |
| 'Cost Component': 'A100 80GB GPU', | |
| 'Specification': 'Excellent GPU', | |
| 'Cost per Hour': f"${azure_a100_80:.2f}", | |
| 'Status': '✏️ Custom' if azure_a100_80 != 3.67 else '✅ Default' | |
| }, { | |
| 'Cost Component': 'A100 40GB GPU', | |
| 'Specification': 'Good GPU', | |
| 'Cost per Hour': f"${azure_a100_40:.2f}", | |
| 'Status': '✏️ Custom' if azure_a100_40 != 3.67 else '✅ Default' | |
| }] | |
| azure_config_df = pd.DataFrame(azure_config_data) | |
| st.dataframe(azure_config_df, use_container_width=True) | |
| with price_tab3: | |
| gcp_config_data = [{ | |
| 'Cost Component': 'Standard Compute Node', | |
| 'Specification': 'n1-standard-8 (8 vCPU, 30GB)', | |
| 'Cost per Hour': f"${gcp_standard_node:.3f}", | |
| 'Status': '✏️ Custom' if gcp_standard_node != 0.379 else '✅ Default' | |
| }, { | |
| 'Cost Component': 'VectorDB Node', | |
| 'Specification': 'n1-standard-16 (16 vCPU, 60GB)', | |
| 'Cost per Hour': f"${gcp_vectordb_node:.3f}", | |
| 'Status': '✏️ Custom' if gcp_vectordb_node != 0.758 else '✅ Default' | |
| }, { | |
| 'Cost Component': 'Jump Host', | |
| 'Specification': 'e2-medium (2 vCPU, 8GB)', | |
| 'Cost per Hour': f"${gcp_jump_host:.3f}", | |
| 'Status': '✏️ Custom' if gcp_jump_host != 0.067 else '✅ Default' | |
| }, { | |
| 'Cost Component': 'GKE Management', | |
| 'Specification': 'Managed Kubernetes', | |
| 'Cost per Hour': f"${gcp_k8s_management:.3f}", | |
| 'Status': '✏️ Custom' if gcp_k8s_management != 0.10 else '✅ Default' | |
| }, { | |
| 'Cost Component': 'H100 80GB GPU', | |
| 'Specification': 'Flagship GPU', | |
| 'Cost per Hour': f"${gcp_h100:.2f}", | |
| 'Status': '✏️ Custom' if gcp_h100 != 11.06 else '✅ Default' | |
| }, { | |
| 'Cost Component': 'A100 80GB GPU', | |
| 'Specification': 'Excellent GPU', | |
| 'Cost per Hour': f"${gcp_a100_80:.2f}", | |
| 'Status': '✏️ Custom' if gcp_a100_80 != 2.48 else '✅ Default' | |
| }, { | |
| 'Cost Component': 'A100 40GB GPU', | |
| 'Specification': 'Good GPU', | |
| 'Cost per Hour': f"${gcp_a100_40:.2f}", | |
| 'Status': '✏️ Custom' if gcp_a100_40 != 1.46 else '✅ Default' | |
| }] | |
| gcp_config_df = pd.DataFrame(gcp_config_data) | |
| st.dataframe(gcp_config_df, use_container_width=True) | |
| with price_tab4: | |
| onprem_config_data = [{ | |
| 'Cost Component': 'Standard Compute Node', | |
| 'Specification': '8 vCPU, 32GB RAM', | |
| 'Cost per Hour': f"${onprem_standard_node:.3f}", | |
| 'Status': '✏️ Custom' if onprem_standard_node != 0.192 else '✅ Default' | |
| }, { | |
| 'Cost Component': 'VectorDB Node', | |
| 'Specification': '16 vCPU, 64GB RAM', | |
| 'Cost per Hour': f"${onprem_vectordb_node:.3f}", | |
| 'Status': '✏️ Custom' if onprem_vectordb_node != 0.384 else '✅ Default' | |
| }, { | |
| 'Cost Component': 'Jump Host', | |
| 'Specification': '2 vCPU, 8GB RAM', | |
| 'Cost per Hour': f"${onprem_jump_host:.3f}", | |
| 'Status': '✏️ Custom' if onprem_jump_host != 0.048 else '✅ Default' | |
| }, { | |
| 'Cost Component': 'GPU Pricing', | |
| 'Specification': f'{onprem_gpu_multiplier*100:.0f}% of AWS pricing', | |
| 'Cost per Hour': f"${GPUS[selected_gpu]['pricing']['on-premise']:.2f} (for {selected_gpu})", | |
| 'Status': '✏️ Custom' if onprem_gpu_multiplier != 0.55 else '✅ Default' | |
| }, { | |
| 'Cost Component': 'K8s Management', | |
| 'Specification': 'Self-managed operational cost', | |
| 'Cost per Hour': f"${onprem_k8s_management:.3f}", | |
| 'Status': '✏️ Custom' if onprem_k8s_management != 0.05 else '✅ Default' | |
| }, { | |
| 'Cost Component': 'Network Infrastructure', | |
| 'Specification': 'Switches, routers, firewalls', | |
| 'Cost per Hour': f"${onprem_network:.3f}", | |
| 'Status': '✏️ Custom' if onprem_network != 0.020 else '✅ Default' | |
| }, { | |
| 'Cost Component': 'Storage SAN/NAS', | |
| 'Specification': 'Per GB per month', | |
| 'Cost per Hour': f"${onprem_storage_per_gb:.3f}/GB/month", | |
| 'Status': '✏️ Custom' if onprem_storage_per_gb != 0.05 else '✅ Default' | |
| }, { | |
| 'Cost Component': 'Hardware Load Balancer', | |
| 'Specification': 'F5/Citrix ADC amortized', | |
| 'Cost per Hour': f"${onprem_load_balancer:.3f}", | |
| 'Status': '✏️ Custom' if onprem_load_balancer != 0.010 else '✅ Default' | |
| }, { | |
| 'Cost Component': 'Power & Cooling', | |
| 'Specification': 'Datacenter utilities', | |
| 'Cost per Hour': f"${onprem_power_cooling:.3f}", | |
| 'Status': '✏️ Custom' if onprem_power_cooling != 0.030 else '✅ Default' | |
| }, { | |
| 'Cost Component': 'Datacenter Space', | |
| 'Specification': 'Rack space and facilities', | |
| 'Cost per Hour': f"${onprem_datacenter_space:.3f}", | |
| 'Status': '✏️ Custom' if onprem_datacenter_space != 0.015 else '✅ Default' | |
| }, { | |
| 'Cost Component': 'Maintenance & Support', | |
| 'Specification': 'Vendor support contracts', | |
| 'Cost per Hour': f"${onprem_maintenance:.3f}", | |
| 'Status': '✏️ Custom' if onprem_maintenance != 0.025 else '✅ Default' | |
| }] | |
| onprem_config_df = pd.DataFrame(onprem_config_data) | |
| st.dataframe(onprem_config_df, use_container_width=True) | |
| st.markdown(""" | |
| **💡 Configuration Tips:** | |
| - Adjust pricing in the sidebar under "Cloud Provider Pricing (Optional)" | |
| - Default values based on public pricing as of 2024/2025 | |
| - Customize based on your actual contract rates, discounts, or negotiated pricing | |
| - All calculations update automatically when values are changed | |
| - Click "🔄 Reset All Pricing to Defaults" in sidebar to restore original values | |
| """) | |
| # VM Types Summary | |
| st.markdown("### 🖥️ Deployment Options Summary") | |
| deployment_options_data = [] | |
| for provider in CLOUD_PRICING.keys(): | |
| pricing = CLOUD_PRICING[provider] | |
| deployment_options_data.append({ | |
| 'Provider': provider, | |
| 'Standard Node': pricing['description'], | |
| 'VectorDB Node': pricing['vectordb_node']['instance_type'], | |
| 'Jump Host': pricing['jump_host']['instance_type'], | |
| 'Managed K8s': pricing['name'] | |
| }) | |
| deployment_df = pd.DataFrame(deployment_options_data) | |
| st.dataframe(deployment_df, use_container_width=True) | |
| # Recommendations section | |
| st.header("💡 Recommendations & Insights") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.subheader("🎯 Performance Recommendations") | |
| if gpu_requirements['bottleneck'] == 'Memory': | |
| st.info("💾 **Memory-bound workload**: Consider using INT8 or INT4 quantization to reduce memory requirements") | |
| else: | |
| st.info("⚡ **Throughput-bound workload**: Current memory is sufficient, focus on GPU count for throughput") | |
| capacity_headroom = gpu_requirements['max_conversations_per_minute'] - conversations_per_minute | |
| headroom_percentage = (capacity_headroom / gpu_requirements['max_conversations_per_minute']) * 100 | |
| if headroom_percentage < 20: | |
| st.warning(f"🚨 **Low headroom** ({headroom_percentage:.1f}%): System near capacity. Consider adding more GPUs or optimizing workload distribution") | |
| elif headroom_percentage > 70: | |
| st.success(f"✅ **High headroom** ({headroom_percentage:.1f}%): System has significant capacity for growth") | |
| else: | |
| st.info(f"📊 **Balanced headroom** ({headroom_percentage:.1f}%): Good balance between capacity and resource efficiency") | |
| # Application deployment recommendations | |
| if apps_per_tenant > 12: | |
| st.warning(f"📦 **High app density**: {apps_per_tenant} apps per tenant requires {infrastructure['totals']['deploy_nodes_per_tenant']} deployment nodes. Consider application consolidation") | |
| elif apps_per_tenant <= 4: | |
| st.success(f"✅ **Efficient deployment**: Only 1 deployment node needed for {apps_per_tenant} apps per tenant") | |
| else: | |
| st.info(f"📊 **Moderate app density**: {infrastructure['totals']['deploy_nodes_per_tenant']} deployment nodes for {apps_per_tenant} apps per tenant") | |
| with col2: | |
| st.subheader("💰 Cost Optimization") | |
| available_providers = get_available_providers_for_gpu(gpu_spec) | |
| if len(available_providers) >= 2: | |
| cheapest_provider = min(available_providers, | |
| key=lambda x: all_costs[x]['totals']['total_cost']) | |
| most_expensive_provider = max(available_providers, | |
| key=lambda x: all_costs[x]['totals']['total_cost']) | |
| savings = all_costs[most_expensive_provider]['totals']['total_cost'] - all_costs[cheapest_provider]['totals']['total_cost'] | |
| savings_percentage = (savings / all_costs[most_expensive_provider]['totals']['total_cost']) * 100 | |
| if cheapest_provider == 'On-Premise': | |
| st.success(f"💡 **Recommended Option**: On-Premise Deployment") | |
| st.info(f"💰 **Cost Advantage**: ${savings:.2f} ({savings_percentage:.1f}%) savings compared to {most_expensive_provider}") | |
| st.warning("⚠️ **Consider**: Initial capex, datacenter readiness, and operational expertise for on-premise") | |
| else: | |
| st.success(f"💡 **Recommended Provider**: {cheapest_provider}") | |
| st.info(f"💰 **Potential Savings**: ${savings:.2f} ({savings_percentage:.1f}%) compared to {most_expensive_provider}") | |
| # Cost distribution insight | |
| cheapest_costs = all_costs[cheapest_provider] | |
| platform_percentage = (cheapest_costs['totals']['platform_cost'] / cheapest_costs['totals']['total_cost']) * 100 | |
| gpu_percentage = (cheapest_costs['totals']['gpu_cost'] / cheapest_costs['totals']['total_cost']) * 100 | |
| if gpu_percentage > 70: | |
| st.warning("🖥️ **GPU-heavy costs**: Consider optimizing model size or using more efficient GPUs") | |
| else: | |
| st.info(f"⚖️ **Balanced infrastructure**: Platform ({platform_percentage:.0f}%) vs GPU ({gpu_percentage:.0f}%)") | |
| elif len(available_providers) == 1: | |
| available_provider = available_providers[0] | |
| st.success(f"💡 **Available Option**: {available_provider}") | |
| if available_provider == 'On-Premise': | |
| st.info("🏢 On-premise is your only deployment option for this GPU") | |
| else: | |
| st.warning("⚠️ **Limited Options**: Only one provider has the selected GPU available") | |
| # Show cost distribution for the only available provider | |
| provider_costs = all_costs[available_provider] | |
| platform_percentage = (provider_costs['totals']['platform_cost'] / provider_costs['totals']['total_cost']) * 100 | |
| gpu_percentage = (provider_costs['totals']['gpu_cost'] / provider_costs['totals']['total_cost']) * 100 | |
| st.info(f"📊 **Cost Distribution**: Platform ({platform_percentage:.0f}%) vs GPU ({gpu_percentage:.0f}%)") | |
| else: | |
| st.error("❌ **No Available Options**: Selected GPU is not available on any deployment option") | |
| st.warning("**Action Required**: Please select a different GPU model") | |
| # Show alternative GPUs | |
| st.markdown("**💡 Suggested Alternatives:**") | |
| alternatives = [] | |
| for gpu_name, gpu_data in GPUS.items(): | |
| available_on = get_available_providers_for_gpu(gpu_data) | |
| if available_on: | |
| alternatives.append(f"• **{gpu_name}** - Available on: {', '.join(available_on)}") | |
| if alternatives: | |
| for alt in alternatives[:3]: # Show top 3 alternatives | |
| st.markdown(alt) | |
| # Infrastructure Summary Box | |
| st.header("📋 Infrastructure Summary") | |
| summary_col1, summary_col2, summary_col3 = st.columns(3) | |
| with summary_col1: | |
| st.markdown("### Platform Infrastructure") | |
| st.markdown(f""" | |
| - **Tenants**: {num_tenants} | |
| - **Apps per Tenant**: {apps_per_tenant} | |
| - **Total Applications**: {infrastructure['totals']['total_apps']} | |
| - **Standard Nodes**: {infrastructure['totals']['total_standard_nodes']} (8 vCPU, 32GB) | |
| - **VectorDB Nodes**: {infrastructure['totals']['total_vectordb_nodes']} (16 vCPU, 64GB) | |
| - **Total Platform Nodes**: {infrastructure['totals']['total_nodes']} | |
| """) | |
| with summary_col2: | |
| st.markdown("### GPU Infrastructure") | |
| st.markdown(f""" | |
| - **Model**: {selected_model} | |
| - **GPU Type**: {selected_gpu} | |
| - **Precision**: {precision} | |
| - **GPUs Required**: {gpu_requirements['total_gpus_needed']} | |
| - **GPUs Allocated**: {gpu_requirements['actual_gpus_allocated']} | |
| - **GPU Configuration**: {gpu_requirements['best_config']['num_nodes']} nodes × {gpu_requirements['best_config']['gpus_per_node']} GPUs | |
| """) | |
| with summary_col3: | |
| st.markdown("### Performance Metrics") | |
| capacity_headroom = gpu_requirements['max_conversations_per_minute'] - conversations_per_minute | |
| headroom_percentage = (capacity_headroom / gpu_requirements['max_conversations_per_minute']) * 100 | |
| st.markdown(f""" | |
| - **Target Load**: {conversations_per_minute} conv/min | |
| - **Max Capacity**: {gpu_requirements['max_conversations_per_minute']:.0f} conv/min | |
| - **Capacity Headroom**: {headroom_percentage:.1f}% | |
| - **Bottleneck**: {gpu_requirements['bottleneck']} | |
| - **Total TPS**: {gpu_requirements['total_system_tps']:.0f} | |
| - **Tokens/Conv**: {tokens_per_conversation} | |
| """) | |
| if __name__ == "__main__": | |
| create_comprehensive_dashboard() |