Spaces:

subhrajit-mohanty
/

cc

Sleeping

App Files Files Community

cc / src /streamlit_app.py

subhrajit-mohanty

Update src/streamlit_app.py

bf4e1ad verified 4 months ago

raw

history blame contribute delete

108 kB

	import streamlit as st
	import pandas as pd
	import plotly.express as px
	import plotly.graph_objects as go
	import math
	from plotly.subplots import make_subplots
	import json
	from datetime import datetime

	# Configure Streamlit for better performance
	st.set_page_config(
	page_title="Katonic Multitenant Infrastructure Calculator",
	page_icon="🚀",
	layout="wide",
	initial_sidebar_state="expanded"
	)

	# Cloud provider and On-Premise pricing data (per hour in USD)
	CLOUD_PRICING = {
	'On-Premise': {
	'name': 'On-Premise Datacenter',
	'cost_per_node_hour': 0.192, # ~50% of cloud (amortized hardware + power + cooling over 3 years)
	'managed_k8s_cost': 0.05, # Self-managed K8s operational cost (admin time, monitoring tools)
	'description': 'Dell PowerEdge R640 / HPE DL360 equivalent',
	'specs': '8 vCPUs, 32GB RAM',
	'vectordb_node': {
	'instance_type': 'Dell PowerEdge R740 / HPE DL380 equivalent',
	'cost_per_hour': 0.384, # ~50% of cloud (high-memory server amortized)
	'specs': '16 vCPUs, 64GB RAM'
	},
	'jump_host': {
	'instance_type': 'Dell PowerEdge R440 / HPE DL20 equivalent',
	'cost_per_hour': 0.048, # ~50% of cloud (small server amortized)
	'specs': '2 vCPUs, 8GB RAM'
	},
	'additional_services': {
	'Network_Infrastructure': {'cost_per_hour': 0.020, 'description': 'Switches, routers, firewalls (amortized)'},
	'Storage_SAN': {'cost_per_gb_month': 0.05, 'description': 'SAN/NAS storage (1TB base, amortized)'},
	'Hardware_Load_Balancer': {'cost_per_hour': 0.010, 'description': 'F5/Citrix ADC (amortized)'},
	'Power_Cooling': {'cost_per_hour': 0.030, 'description': 'Datacenter power (0.1kW/server) and cooling'},
	'Datacenter_Space': {'cost_per_hour': 0.015, 'description': 'Rack space and facilities costs'},
	'Maintenance_Support': {'cost_per_hour': 0.025, 'description': 'Hardware maintenance and vendor support contracts'}
	},
	'gpu_pricing_multiplier': 0.55, # On-prem GPU costs are ~55% of cloud (hardware amortization + power)
	'notes': 'Costs include: hardware amortization (3-year lifecycle), power (~$0.10/kWh), cooling (1:1 ratio), rack space, network infrastructure, storage, and maintenance. Assumes enterprise datacenter with N+1 redundancy. Does NOT include: initial capex, datacenter construction, staff salaries (covered in K8s management cost).'
	},
	'AWS': {
	'name': 'Amazon EKS',
	'cost_per_node_hour': 0.384,
	'managed_k8s_cost': 0.10,
	'description': 'm5.2xlarge instances',
	'specs': '8 vCPUs, 32GB RAM',
	'vectordb_node': {
	'instance_type': 'm5.4xlarge',
	'cost_per_hour': 0.768,
	'specs': '16 vCPUs, 64GB RAM'
	},
	'jump_host': {
	'instance_type': 'm5.large',
	'cost_per_hour': 0.096,
	'specs': '2 vCPUs, 8GB RAM'
	},
	'additional_services': {
	'VPC': {'cost_per_hour': 0.0, 'description': 'Virtual Private Cloud (Free)'},
	'EBS': {'cost_per_gb_month': 0.10, 'description': 'Elastic Block Store (1TB expandable)'},
	'ELB': {'cost_per_hour': 0.025, 'description': 'Elastic Load Balancer'},
	'EIP': {'cost_per_hour': 0.005, 'description': 'Elastic IP Address'}
	}
	},
	'Azure': {
	'name': 'Azure Kubernetes Service',
	'cost_per_node_hour': 0.384,
	'managed_k8s_cost': 0.0,
	'description': 'Standard_D8s_v3 instances',
	'specs': '8 vCPUs, 32GB RAM',
	'vectordb_node': {
	'instance_type': 'Standard_D16s_v3',
	'cost_per_hour': 0.768,
	'specs': '16 vCPUs, 64GB RAM'
	},
	'jump_host': {
	'instance_type': 'Standard_D2s_v3',
	'cost_per_hour': 0.096,
	'specs': '2 vCPUs, 8GB RAM'
	},
	'additional_services': {
	'VNet': {'cost_per_hour': 0.0, 'description': 'Virtual Network (Free)'},
	'Managed_Disks': {'cost_per_gb_month': 0.10, 'description': 'Managed Disks (1TB expandable)'},
	'Load_Balancer': {'cost_per_hour': 0.025, 'description': 'Azure Load Balancer'},
	'Public_IP': {'cost_per_hour': 0.005, 'description': 'Public IP Address'}
	}
	},
	'GCP': {
	'name': 'Google Kubernetes Engine',
	'cost_per_node_hour': 0.379,
	'managed_k8s_cost': 0.10,
	'description': 'n1-standard-8 instances',
	'specs': '8 vCPUs, 30GB RAM',
	'vectordb_node': {
	'instance_type': 'n1-standard-16',
	'cost_per_hour': 0.758,
	'specs': '16 vCPUs, 60GB RAM'
	},
	'jump_host': {
	'instance_type': 'e2-medium',
	'cost_per_hour': 0.067,
	'specs': '2 vCPUs, 8GB RAM'
	},
	'additional_services': {
	'VPC': {'cost_per_hour': 0.0, 'description': 'Virtual Private Cloud (Free)'},
	'Persistent_Disk': {'cost_per_gb_month': 0.10, 'description': 'Persistent Disk (1TB expandable)'},
	'Load_Balancer': {'cost_per_hour': 0.025, 'description': 'Cloud Load Balancing'},
	'Static_IP': {'cost_per_hour': 0.004, 'description': 'Static External IP'},
	'Cloud_Storage': {'cost_per_gb_month': 0.020, 'description': 'GCS Bucket (Optional)'},
	'Filestore': {'cost_per_gb_month': 0.20, 'description': 'Filestore (depends on usage)'}
	}
	}
	}

	# Production-grade model specifications
	MODELS = {
	"Llama 4 Maverick": {
	"params": 400,
	"active_params": 17,
	"memory_per_param": 2,
	"max_context": 1000000,
	"base_tps": 4200,
	"org": "Meta",
	"license": "Open-weight",
	"notes": "Multimodal MoE; 1M context; text, image, code, reasoning"
	},
	"Llama 4 Scout": {
	"params": 109,
	"active_params": 17,
	"memory_per_param": 2,
	"max_context": 10000000,
	"base_tps": 4500,
	"org": "Meta",
	"license": "Open-weight",
	"notes": "Multimodal MoE; 10M context; efficient for long-form tasks"
	},
	"Llama 3.3 70B": {
	"params": 70,
	"active_params": 70,
	"memory_per_param": 2,
	"max_context": 128000,
	"base_tps": 1800,
	"org": "Meta",
	"license": "Community (open)",
	"notes": "Multilingual; matches Llama 3.1 405B performance"
	},
	"Qwen2 110B": {
	"params": 110,
	"active_params": 110,
	"memory_per_param": 2,
	"max_context": 128000,
	"base_tps": 1200,
	"org": "Alibaba/Qwen",
	"license": "Apache 2.0",
	"notes": "Multilingual; top-tier reasoning and coding"
	},
	"DeepSeek-VL 110B": {
	"params": 110,
	"active_params": 110,
	"memory_per_param": 2,
	"max_context": 128000,
	"base_tps": 1100,
	"org": "DeepSeek AI",
	"license": "MIT",
	"notes": "Multimodal (vision+language); GPT-4V alternative"
	},
	"Mixtral 8x22B": {
	"params": 141,
	"active_params": 39,
	"memory_per_param": 2,
	"max_context": 65536,
	"base_tps": 2800,
	"org": "Mistral AI",
	"license": "Apache 2.0",
	"notes": "Sparse MoE; efficiency leader among MoE models"
	}
	}

	GPUS = {
	"H200 141GB": {
	"memory": 141,
	"compute": 9.0,
	"tps_min": 5486,
	"tps_max": 18690,
	"efficiency_tier": "Flagship+",
	"pricing": {
	"aws": 15.70,
	"azure": 12.29,
	"gcp": "NA",
	"on-premise": 8.64 # 55% of AWS price (hardware amortization + power)
	}
	},
	"H100 80GB": {
	"memory": 80,
	"compute": 9.0,
	"tps_min": 2400,
	"tps_max": 14000,
	"efficiency_tier": "Flagship",
	"pricing": {
	"aws": 6.01,
	"azure": 6.98,
	"gcp": 11.06,
	"on-premise": 3.31 # 55% of AWS price
	}
	},
	"A100 80GB": {
	"memory": 80,
	"compute": 8.0,
	"tps_min": 1100,
	"tps_max": 2000,
	"efficiency_tier": "Excellent",
	"pricing": {
	"aws": 3.43,
	"azure": 3.67,
	"gcp": 2.48,
	"on-premise": 1.89 # 55% of AWS price
	}
	},
	"A100 40GB": {
	"memory": 40,
	"compute": 8.0,
	"tps_min": 1000,
	"tps_max": 1800,
	"efficiency_tier": "Good",
	"pricing": {
	"aws": 2.75,
	"azure": 3.67,
	"gcp": 1.46,
	"on-premise": 1.51 # 55% of AWS price
	}
	},
	"L40S": {
	"memory": 48,
	"compute": 8.9,
	"tps_min": 4000,
	"tps_max": 4768,
	"efficiency_tier": "Very Good",
	"pricing": {
	"aws": 1.67,
	"azure": "NA",
	"gcp": "NA",
	"on-premise": 0.92 # 55% of AWS price
	}
	}
	}

	@st.cache_data(show_spinner=False, ttl=300)
	def calculate_detailed_infrastructure(num_tenants, apps_per_tenant):
	"""Calculate detailed infrastructure requirements with node type breakdown - CACHED"""
	# Standard node specs (8 vCPUs, 32GB RAM)
	cores_per_node = 8
	ram_per_node = 32

	# VectorDB node specs (16 vCPUs, 64GB RAM) - Updated as per requirement
	vectordb_cores_per_node = 16
	vectordb_ram_per_node = 64

	# Base infrastructure
	base_platform_nodes = 2

	# Per tenant requirements
	platform_nodes_per_tenant = 1
	compute_nodes_per_tenant = 1
	vectordb_nodes_per_tenant = 1 # Using 64GB RAM nodes for VectorDB

	# Calculate deployment nodes based on apps per tenant
	# Every 4 apps need 1 deployment node
	deploy_nodes_per_tenant = math.ceil(apps_per_tenant / 4)

	# Calculate totals
	total_platform_nodes = base_platform_nodes + (platform_nodes_per_tenant * num_tenants)
	total_compute_nodes = compute_nodes_per_tenant * num_tenants
	total_deploy_nodes = deploy_nodes_per_tenant * num_tenants
	total_vectordb_nodes = vectordb_nodes_per_tenant * num_tenants

	# Total standard nodes (excluding VectorDB which uses different specs)
	total_standard_nodes = total_platform_nodes + total_compute_nodes + total_deploy_nodes
	total_nodes = total_standard_nodes + total_vectordb_nodes

	# Resource calculations
	total_cpu = (total_standard_nodes * cores_per_node) + (total_vectordb_nodes * vectordb_cores_per_node)
	total_ram = (total_standard_nodes * ram_per_node) + (total_vectordb_nodes * vectordb_ram_per_node)

	# Applications capacity
	total_apps = num_tenants * apps_per_tenant

	return {
	'node_breakdown': {
	'Platform Nodes': {
	'base': base_platform_nodes,
	'tenant': platform_nodes_per_tenant * num_tenants,
	'total': total_platform_nodes,
	'cores': total_platform_nodes * cores_per_node,
	'ram': total_platform_nodes * ram_per_node,
	'purpose': 'Tenancy Manager + Tenant platform services',
	'node_type': 'Standard (8 vCPU, 32GB RAM)'
	},
	'Compute Nodes': {
	'base': 0,
	'tenant': total_compute_nodes,
	'total': total_compute_nodes,
	'cores': total_compute_nodes * cores_per_node,
	'ram': total_compute_nodes * ram_per_node,
	'purpose': 'Computational workloads',
	'node_type': 'Standard (8 vCPU, 32GB RAM)'
	},
	'Deploy Nodes': {
	'base': 0,
	'tenant': total_deploy_nodes,
	'total': total_deploy_nodes,
	'cores': total_deploy_nodes * cores_per_node,
	'ram': total_deploy_nodes * ram_per_node,
	'purpose': f'Application deployment ({deploy_nodes_per_tenant} node(s) per {apps_per_tenant} apps)',
	'node_type': 'Standard (8 vCPU, 32GB RAM)'
	},
	'VectorDB Nodes': {
	'base': 0,
	'tenant': total_vectordb_nodes,
	'total': total_vectordb_nodes,
	'cores': total_vectordb_nodes * vectordb_cores_per_node,
	'ram': total_vectordb_nodes * vectordb_ram_per_node,
	'purpose': 'Vector database operations (high memory)',
	'node_type': 'High-Memory (16 vCPU, 64GB RAM)'
	}
	},
	'totals': {
	'total_nodes': total_nodes,
	'total_standard_nodes': total_standard_nodes,
	'total_vectordb_nodes': total_vectordb_nodes,
	'total_cpu': total_cpu,
	'total_ram': total_ram,
	'total_apps': total_apps,
	'deploy_nodes_per_tenant': deploy_nodes_per_tenant
	},
	'specs': {
	'cores_per_node': cores_per_node,
	'ram_per_node': ram_per_node,
	'vectordb_cores_per_node': vectordb_cores_per_node,
	'vectordb_ram_per_node': vectordb_ram_per_node
	}
	}

	def calculate_model_memory_requirements(model_params, active_params, precision_bytes):
	"""Calculate memory requirements for model inference"""
	model_memory = model_params * precision_bytes
	overhead = model_memory * 0.25
	kv_cache = model_memory * 0.1
	total_memory = model_memory + overhead + kv_cache
	return total_memory

	def calculate_model_tps_on_gpu(model_base_tps, model_params, active_params, gpu_spec):
	"""Calculate actual TPS for a specific model on a specific GPU"""
	effective_params = active_params
	reference_params = 70
	param_scaling = (reference_params / effective_params) ** 0.7

	gpu_tps_min = gpu_spec["tps_min"]
	gpu_tps_max = gpu_spec["tps_max"]

	actual_tps_min = gpu_tps_min * param_scaling
	actual_tps_max = gpu_tps_max * param_scaling

	estimated_tps = actual_tps_min + (actual_tps_max - actual_tps_min) * 0.3
	return estimated_tps, actual_tps_min, actual_tps_max

	def calculate_gpu_node_configurations(total_gpus_needed, gpu_memory_gb, gpu_spec):
	"""Calculate GPU node configurations based on standard cloud GPU node sizes"""
	configurations = []

	# Standard GPU node configurations: 1, 2, 4, 8 GPUs per node
	standard_configs = [1, 2, 4, 8]

	# Minimum GPUs per node based on memory requirements
	min_gpus_per_node = math.ceil(gpu_memory_gb / gpu_spec["memory"])

	for gpus_per_node in standard_configs:
	# Skip configurations that can't fit the model memory requirement
	if gpus_per_node < min_gpus_per_node:
	continue

	num_nodes = math.ceil(total_gpus_needed / gpus_per_node)
	total_gpus_allocated = num_nodes * gpus_per_node
	gpu_utilization = (total_gpus_needed / total_gpus_allocated) * 100
	gpu_waste = total_gpus_allocated - total_gpus_needed

	configurations.append({
	'gpus_per_node': gpus_per_node,
	'num_nodes': num_nodes,
	'total_gpus_allocated': total_gpus_allocated,
	'total_gpus_needed': total_gpus_needed,
	'utilization': gpu_utilization,
	'gpu_waste': gpu_waste,
	'meets_memory_req': gpus_per_node >= min_gpus_per_node,
	'memory_utilization': (gpu_memory_gb / (gpus_per_node * gpu_spec["memory"])) * 100
	})

	# If no configurations work (shouldn't happen with proper validation), add all configs
	if not configurations:
	for gpus_per_node in standard_configs:
	num_nodes = math.ceil(total_gpus_needed / gpus_per_node)
	total_gpus_allocated = num_nodes * gpus_per_node
	gpu_utilization = (total_gpus_needed / total_gpus_allocated) * 100
	gpu_waste = total_gpus_allocated - total_gpus_needed

	configurations.append({
	'gpus_per_node': gpus_per_node,
	'num_nodes': num_nodes,
	'total_gpus_allocated': total_gpus_allocated,
	'total_gpus_needed': total_gpus_needed,
	'utilization': gpu_utilization,
	'gpu_waste': gpu_waste,
	'meets_memory_req': gpus_per_node >= min_gpus_per_node,
	'memory_utilization': (gpu_memory_gb / (gpus_per_node * gpu_spec["memory"])) * 100
	})

	# Sort by utilization (descending) and then by total GPUs (ascending)
	configurations.sort(key=lambda x: (-x['utilization'], x['total_gpus_allocated']))
	return configurations, min_gpus_per_node

	@st.cache_data(show_spinner=False, ttl=300)
	def calculate_gpu_requirements(conversations_per_minute, tokens_per_conversation, model_spec, gpu_spec, precision_bytes):
	"""Calculate GPU requirements for LLM inference with proper node configurations - CACHED"""
	# Calculate throughput requirements
	required_tps = (conversations_per_minute * tokens_per_conversation) / 60

	# Calculate memory requirements
	model_memory_gb = calculate_model_memory_requirements(
	model_spec["params"], model_spec["active_params"], precision_bytes
	)

	# Calculate model performance on GPU
	estimated_tps, tps_min, tps_max = calculate_model_tps_on_gpu(
	model_spec["base_tps"], model_spec["params"], model_spec["active_params"], gpu_spec
	)

	# Calculate basic GPU requirements
	gpus_needed_memory = math.ceil(model_memory_gb / gpu_spec["memory"])
	gpus_needed_throughput = math.ceil(required_tps / estimated_tps)
	total_gpus_needed = max(gpus_needed_memory, gpus_needed_throughput, 1)

	# Calculate proper GPU node configurations
	gpu_configs, min_gpus_per_node = calculate_gpu_node_configurations(
	total_gpus_needed, model_memory_gb, gpu_spec
	)

	# Use the best (most efficient) configuration
	best_config = gpu_configs[0] if gpu_configs else None
	actual_gpus_allocated = best_config['total_gpus_allocated'] if best_config else total_gpus_needed

	return {
	'gpus_needed_memory': gpus_needed_memory,
	'gpus_needed_throughput': gpus_needed_throughput,
	'total_gpus_needed': total_gpus_needed,
	'actual_gpus_allocated': actual_gpus_allocated,
	'gpu_configurations': gpu_configs,
	'best_config': best_config,
	'min_gpus_per_node': min_gpus_per_node,
	'model_memory_gb': model_memory_gb,
	'required_tps': required_tps,
	'estimated_tps': estimated_tps,
	'tps_range': (tps_min, tps_max),
	'total_system_tps': estimated_tps * actual_gpus_allocated,
	'max_conversations_per_minute': (estimated_tps * actual_gpus_allocated * 60) / tokens_per_conversation,
	'bottleneck': 'Memory' if gpus_needed_memory >= gpus_needed_throughput else 'Throughput'
	}

	def is_gpu_available_for_provider(provider, gpu_spec):
	"""Check if GPU is actually available for a provider (not N/A and has valid pricing)"""
	gpu_pricing = gpu_spec.get("pricing", {})
	provider_key = provider.lower()

	if provider_key not in gpu_pricing:
	return False

	price = gpu_pricing[provider_key]
	return price != "NA" and isinstance(price, (int, float)) and price > 0

	def get_available_providers_for_gpu(gpu_spec):
	"""Get list of providers that actually have the selected GPU available"""
	available_providers = []
	for provider in CLOUD_PRICING.keys():
	if is_gpu_available_for_provider(provider, gpu_spec):
	available_providers.append(provider)
	return available_providers

	def create_downloadable_cost_report(all_costs, infrastructure, gpu_requirements, model_spec, gpu_spec, selected_model, selected_gpu, num_tenants, apps_per_tenant, conversations_per_minute, tokens_per_conversation, precision, time_period):
	"""Create a comprehensive cost report for download"""
	report_data = {
	'report_metadata': {
	'generated_at': datetime.now().isoformat(),
	'configuration': {
	'tenants': num_tenants,
	'apps_per_tenant': apps_per_tenant,
	'total_apps': num_tenants * apps_per_tenant,
	'model': selected_model,
	'gpu': selected_gpu,
	'precision': precision,
	'conversations_per_minute': conversations_per_minute,
	'tokens_per_conversation': tokens_per_conversation,
	'time_period': time_period
	}
	},
	'infrastructure_summary': {
	'platform_nodes': infrastructure['totals']['total_standard_nodes'],
	'vectordb_nodes': infrastructure['totals']['total_vectordb_nodes'],
	'total_nodes': infrastructure['totals']['total_nodes'],
	'gpu_nodes': gpu_requirements['total_gpus_needed'],
	'total_cpu_cores': infrastructure['totals']['total_cpu'],
	'total_ram_gb': infrastructure['totals']['total_ram'],
	'total_gpu_memory_gb': gpu_requirements['total_gpus_needed'] * gpu_spec['memory'],
	'max_conversations_per_minute': gpu_requirements['max_conversations_per_minute']
	},
	'cost_breakdown_by_provider': {}
	}

	# Add cost breakdown for each provider
	for provider, costs in all_costs.items():
	provider_available = is_gpu_available_for_provider(provider, gpu_spec)

	report_data['cost_breakdown_by_provider'][provider] = {
	'gpu_available': provider_available,
	'platform_costs': {
	'kubernetes_nodes': costs['platform_costs']['total_node_cost'],
	'vectordb_nodes': costs['platform_costs']['vectordb_node_cost'],
	'jump_host': costs['platform_costs']['jump_host_cost'],
	'additional_services': costs['platform_costs']['additional_services_cost'],
	'k8s_management': costs['platform_costs']['k8s_management_cost'],
	'platform_total': costs['platform_costs']['platform_total']
	},
	'gpu_costs': {
	'gpu_count': costs['gpu_costs']['gpu_count'],
	'gpu_cost_per_hour': costs['gpu_costs']['gpu_cost_per_hour'],
	'total_gpu_cost': costs['gpu_costs']['total_gpu_cost'] if provider_available else 'N/A'
	},
	'totals': {
	'platform_cost': costs['totals']['platform_cost'],
	'gpu_cost': costs['totals']['gpu_cost'] if provider_available else 'N/A',
	'total_cost': costs['totals']['total_cost'] if provider_available else 'N/A',
	'cost_per_hour': costs['totals']['cost_per_hour'] if provider_available else 'N/A',
	'cost_per_day': costs['totals']['cost_per_day'] if provider_available else 'N/A'
	},
	'service_details': costs['platform_costs']['service_costs']
	}

	return report_data

	def format_cost_for_display(cost, available=True):
	"""Format cost for display, handling N/A cases"""
	if not available or cost == 'N/A':
	return 'N/A'
	return f"${cost:.2f}"

	def calculate_detailed_costs(provider, infrastructure, gpu_requirements, gpu_spec, days=30):
	"""Calculate detailed costs for both platform and GPU infrastructure"""
	pricing = CLOUD_PRICING[provider]
	hours = days * 24

	# Platform infrastructure costs
	node_costs = {}
	total_standard_node_cost = 0
	total_vectordb_node_cost = 0

	for node_type, details in infrastructure['node_breakdown'].items():
	if node_type == 'VectorDB Nodes':
	# Use special pricing for VectorDB nodes
	node_cost = details['total'] * pricing['vectordb_node']['cost_per_hour'] * hours
	total_vectordb_node_cost = node_cost
	else:
	# Use standard pricing for other nodes
	node_cost = details['total'] * pricing['cost_per_node_hour'] * hours
	total_standard_node_cost += node_cost

	node_costs[node_type] = {
	'count': details['total'],
	'cost': node_cost,
	'cores': details['cores'],
	'ram': details['ram'],
	'node_type': details.get('node_type', 'Standard')
	}

	total_node_cost = total_standard_node_cost + total_vectordb_node_cost

	# Jump Host cost
	jump_host_cost = pricing['jump_host']['cost_per_hour'] * hours

	# Additional services costs
	additional_services_cost = 0
	service_costs = {}

	for service, details in pricing['additional_services'].items():
	if 'cost_per_hour' in details:
	service_cost = details['cost_per_hour'] * hours
	elif 'cost_per_gb_month' in details:
	if 'storage' in service.lower() or 'disk' in service.lower() or 'ebs' in service.lower() or 'san' in service.lower():
	service_cost = details['cost_per_gb_month'] * 1024 * (days / 30)
	else:
	service_cost = 0
	else:
	service_cost = 0

	service_costs[service] = service_cost
	additional_services_cost += service_cost

	# Kubernetes management cost
	k8s_management_cost = pricing['managed_k8s_cost'] * hours

	# GPU costs - properly handle N/A cases
	gpu_pricing = gpu_spec.get("pricing", {})
	gpu_available = is_gpu_available_for_provider(provider, gpu_spec)
	gpu_cost_per_hour = 0
	gpu_cost = 0

	if gpu_available:
	gpu_cost_per_hour = gpu_pricing[provider.lower()]
	gpu_cost = gpu_requirements['actual_gpus_allocated'] * gpu_cost_per_hour * hours

	# Total costs
	platform_cost = total_node_cost + jump_host_cost + additional_services_cost + k8s_management_cost
	total_cost = platform_cost + gpu_cost if gpu_available else None # None for N/A cases

	return {
	'platform_costs': {
	'node_costs': node_costs,
	'total_node_cost': total_node_cost,
	'vectordb_node_cost': total_vectordb_node_cost,
	'jump_host_cost': jump_host_cost,
	'service_costs': service_costs,
	'additional_services_cost': additional_services_cost,
	'k8s_management_cost': k8s_management_cost,
	'platform_total': platform_cost
	},
	'gpu_costs': {
	'gpu_count': gpu_requirements['actual_gpus_allocated'],
	'gpu_cost_per_hour': gpu_cost_per_hour,
	'total_gpu_cost': gpu_cost,
	'gpu_available': gpu_available
	},
	'totals': {
	'platform_cost': platform_cost,
	'gpu_cost': gpu_cost,
	'total_cost': total_cost,
	'cost_per_hour': total_cost / hours if total_cost is not None else None,
	'cost_per_day': total_cost / days if total_cost is not None else None,
	'gpu_available': gpu_available
	}
	}

	def create_comprehensive_dashboard():
	st.set_page_config(
	page_title="Katonic Multitenant Infrastructure Calculator",
	page_icon="🚀",
	layout="wide"
	)

	st.title("🚀 Katonic Multitenant Infrastructure Calculator")
	st.markdown("Comprehensive infrastructure planning for multi-tenant LLMOPS platforms with GPU-accelerated LLM inference")

	# Sidebar Configuration
	with st.sidebar:
	st.header("🔧 Configuration")

	# Platform Configuration
	st.subheader("Platform Settings")
	num_tenants = st.slider(
	"Number of Tenants",
	min_value=1,
	max_value=20,
	value=3,
	help="Each tenant requires dedicated platform, compute, deploy, and VectorDB nodes"
	)

	apps_per_tenant = st.number_input(
	"Apps per Tenant",
	min_value=1,
	max_value=50,
	value=4,
	step=1,
	help="Number of applications per tenant. Every 4 apps require 1 deployment node"
	)

	# Cloud Provider Pricing Configuration
	st.subheader("Cloud Provider Pricing (Optional)")

	# AWS Pricing
	with st.expander("☁️ Customize AWS Costs", expanded=False):
	st.markdown("Adjust AWS pricing (per hour in USD)")

	st.markdown("##### Compute Nodes")
	aws_standard_node = st.number_input(
	"m5.2xlarge (8 vCPU, 32GB)",
	min_value=0.01,
	max_value=2.00,
	value=0.384,
	step=0.01,
	format="%.3f",
	key="aws_standard",
	help="Default: $0.384/hr"
	)

	aws_vectordb_node = st.number_input(
	"m5.4xlarge (16 vCPU, 64GB)",
	min_value=0.01,
	max_value=4.00,
	value=0.768,
	step=0.01,
	format="%.3f",
	key="aws_vectordb",
	help="Default: $0.768/hr"
	)

	aws_jump_host = st.number_input(
	"m5.large (2 vCPU, 8GB)",
	min_value=0.01,
	max_value=0.50,
	value=0.096,
	step=0.01,
	format="%.3f",
	key="aws_jump",
	help="Default: $0.096/hr"
	)

	aws_k8s_management = st.number_input(
	"EKS Management Cost",
	min_value=0.0,
	max_value=0.50,
	value=0.10,
	step=0.01,
	format="%.3f",
	key="aws_k8s",
	help="Default: $0.10/hr"
	)

	st.markdown("##### GPU Pricing")
	col1, col2 = st.columns(2)
	with col1:
	aws_h200 = st.number_input("H200 141GB", value=15.70, step=0.10, format="%.2f", key="aws_h200")
	aws_h100 = st.number_input("H100 80GB", value=6.01, step=0.10, format="%.2f", key="aws_h100")
	aws_a100_80 = st.number_input("A100 80GB", value=3.43, step=0.10, format="%.2f", key="aws_a100_80")
	with col2:
	aws_a100_40 = st.number_input("A100 40GB", value=2.75, step=0.10, format="%.2f", key="aws_a100_40")
	aws_l40s = st.number_input("L40S", value=1.67, step=0.10, format="%.2f", key="aws_l40s")

	# Azure Pricing
	with st.expander("☁️ Customize Azure Costs", expanded=False):
	st.markdown("Adjust Azure pricing (per hour in USD)")

	st.markdown("##### Compute Nodes")
	azure_standard_node = st.number_input(
	"Standard_D8s_v3 (8 vCPU, 32GB)",
	min_value=0.01,
	max_value=2.00,
	value=0.384,
	step=0.01,
	format="%.3f",
	key="azure_standard",
	help="Default: $0.384/hr"
	)

	azure_vectordb_node = st.number_input(
	"Standard_D16s_v3 (16 vCPU, 64GB)",
	min_value=0.01,
	max_value=4.00,
	value=0.768,
	step=0.01,
	format="%.3f",
	key="azure_vectordb",
	help="Default: $0.768/hr"
	)

	azure_jump_host = st.number_input(
	"Standard_D2s_v3 (2 vCPU, 8GB)",
	min_value=0.01,
	max_value=0.50,
	value=0.096,
	step=0.01,
	format="%.3f",
	key="azure_jump",
	help="Default: $0.096/hr"
	)

	azure_k8s_management = st.number_input(
	"AKS Management Cost",
	min_value=0.0,
	max_value=0.50,
	value=0.0,
	step=0.01,
	format="%.3f",
	key="azure_k8s",
	help="Default: $0.00/hr (Free tier)"
	)

	st.markdown("##### GPU Pricing")
	col1, col2 = st.columns(2)
	with col1:
	azure_h200 = st.number_input("H200 141GB", value=12.29, step=0.10, format="%.2f", key="azure_h200")
	azure_h100 = st.number_input("H100 80GB", value=6.98, step=0.10, format="%.2f", key="azure_h100")
	azure_a100_80 = st.number_input("A100 80GB", value=3.67, step=0.10, format="%.2f", key="azure_a100_80")
	with col2:
	azure_a100_40 = st.number_input("A100 40GB", value=3.67, step=0.10, format="%.2f", key="azure_a100_40")

	# GCP Pricing
	with st.expander("☁️ Customize GCP Costs", expanded=False):
	st.markdown("Adjust GCP pricing (per hour in USD)")

	st.markdown("##### Compute Nodes")
	gcp_standard_node = st.number_input(
	"n1-standard-8 (8 vCPU, 30GB)",
	min_value=0.01,
	max_value=2.00,
	value=0.379,
	step=0.01,
	format="%.3f",
	key="gcp_standard",
	help="Default: $0.379/hr"
	)

	gcp_vectordb_node = st.number_input(
	"n1-standard-16 (16 vCPU, 60GB)",
	min_value=0.01,
	max_value=4.00,
	value=0.758,
	step=0.01,
	format="%.3f",
	key="gcp_vectordb",
	help="Default: $0.758/hr"
	)

	gcp_jump_host = st.number_input(
	"e2-medium (2 vCPU, 8GB)",
	min_value=0.01,
	max_value=0.50,
	value=0.067,
	step=0.01,
	format="%.3f",
	key="gcp_jump",
	help="Default: $0.067/hr"
	)

	gcp_k8s_management = st.number_input(
	"GKE Management Cost",
	min_value=0.0,
	max_value=0.50,
	value=0.10,
	step=0.01,
	format="%.3f",
	key="gcp_k8s",
	help="Default: $0.10/hr"
	)

	st.markdown("##### GPU Pricing")
	col1, col2 = st.columns(2)
	with col1:
	gcp_h100 = st.number_input("H100 80GB", value=11.06, step=0.10, format="%.2f", key="gcp_h100")
	gcp_a100_80 = st.number_input("A100 80GB", value=2.48, step=0.10, format="%.2f", key="gcp_a100_80")
	with col2:
	gcp_a100_40 = st.number_input("A100 40GB", value=1.46, step=0.10, format="%.2f", key="gcp_a100_40")

	# On-Premise Pricing
	with st.expander("🏢 Customize On-Premise Costs", expanded=False):
	st.markdown("Adjust on-premise costs based on your infrastructure")

	st.markdown("##### Compute Nodes (per hour)")
	onprem_standard_node = st.number_input(
	"Standard Node (8 vCPU, 32GB)",
	min_value=0.01,
	max_value=1.00,
	value=0.192,
	step=0.01,
	format="%.3f",
	key="onprem_standard",
	help="Cost per hour for standard compute nodes (default: $0.192)"
	)

	onprem_vectordb_node = st.number_input(
	"VectorDB Node (16 vCPU, 64GB)",
	min_value=0.01,
	max_value=2.00,
	value=0.384,
	step=0.01,
	format="%.3f",
	key="onprem_vectordb",
	help="Cost per hour for high-memory VectorDB nodes (default: $0.384)"
	)

	onprem_jump_host = st.number_input(
	"Jump Host (2 vCPU, 8GB)",
	min_value=0.01,
	max_value=0.50,
	value=0.048,
	step=0.01,
	format="%.3f",
	key="onprem_jump",
	help="Cost per hour for jump host (default: $0.048)"
	)

	st.markdown("##### GPU Pricing Multiplier")
	onprem_gpu_multiplier = st.slider(
	"GPU Cost Multiplier (% of AWS)",
	min_value=30,
	max_value=100,
	value=55,
	step=5,
	key="onprem_gpu_mult",
	help="Percentage of AWS GPU pricing for on-premise (default: 55%)"
	) / 100

	st.markdown("##### Additional Services (per hour)")
	onprem_network = st.number_input(
	"Network Infrastructure",
	min_value=0.0,
	max_value=0.10,
	value=0.020,
	step=0.005,
	format="%.3f",
	key="onprem_network",
	help="Switches, routers, firewalls (default: $0.020)"
	)

	onprem_storage_per_gb = st.number_input(
	"Storage (per GB per month)",
	min_value=0.01,
	max_value=0.20,
	value=0.05,
	step=0.01,
	format="%.3f",
	key="onprem_storage",
	help="SAN/NAS storage cost (default: $0.05/GB/month)"
	)

	onprem_load_balancer = st.number_input(
	"Hardware Load Balancer",
	min_value=0.0,
	max_value=0.05,
	value=0.010,
	step=0.005,
	format="%.3f",
	key="onprem_lb",
	help="Load balancer amortized cost (default: $0.010)"
	)

	onprem_power_cooling = st.number_input(
	"Power & Cooling",
	min_value=0.01,
	max_value=0.10,
	value=0.030,
	step=0.005,
	format="%.3f",
	key="onprem_power",
	help="Datacenter power and cooling (default: $0.030)"
	)

	onprem_datacenter_space = st.number_input(
	"Datacenter Space",
	min_value=0.0,
	max_value=0.05,
	value=0.015,
	step=0.005,
	format="%.3f",
	key="onprem_space",
	help="Rack space and facilities (default: $0.015)"
	)

	onprem_maintenance = st.number_input(
	"Maintenance & Support",
	min_value=0.0,
	max_value=0.10,
	value=0.025,
	step=0.005,
	format="%.3f",
	key="onprem_maint",
	help="Hardware maintenance contracts (default: $0.025)"
	)

	onprem_k8s_management = st.number_input(
	"K8s Management Cost",
	min_value=0.0,
	max_value=0.20,
	value=0.05,
	step=0.01,
	format="%.3f",
	key="onprem_k8s",
	help="Self-managed K8s operational cost (default: $0.05)"
	)

	# Reset button
	if st.button("🔄 Reset All Pricing to Defaults", type="secondary"):
	st.rerun()

	# LLM Configuration
	st.subheader("LLM Settings")
	selected_model = st.selectbox(
	"Select LLM Model",
	list(MODELS.keys()),
	index=2, # Default to Llama 3.3 70B
	help="Choose the LLM model for inference workloads"
	)

	selected_gpu = st.selectbox(
	"Select GPU Type",
	list(GPUS.keys()),
	index=1, # Default to H100 80GB
	help="GPU type for LLM inference nodes"
	)

	precision = st.selectbox(
	"Model Precision",
	["FP16", "INT8", "INT4"],
	index=0, # Default to FP16
	help="Model precision affects memory usage and quality"
	)

	# Workload Configuration
	st.subheader("Workload Settings")
	conversations_per_minute = st.number_input(
	"Conversations per Minute",
	min_value=1,
	max_value=5000,
	value=200,
	step=10,
	help="Expected conversation throughput across all tenants"
	)

	tokens_per_conversation = st.number_input(
	"Tokens per Conversation",
	min_value=500,
	max_value=20000,
	value=2000,
	step=100,
	help="Average tokens per conversation (input + output)"
	)

	# Time period
	time_period = st.selectbox(
	"Cost Calculation Period",
	["Monthly (30 days)", "Weekly (7 days)", "Daily (1 day)", "Hourly"],
	index=0
	)

	days_map = {
	"Monthly (30 days)": 30,
	"Weekly (7 days)": 7,
	"Daily (1 day)": 1,
	"Hourly": 1/24
	}
	days = days_map[time_period]

	# Calculate all requirements
	infrastructure = calculate_detailed_infrastructure(num_tenants, apps_per_tenant)

	# Apply custom pricing - create modified copies to avoid global state issues
	def apply_custom_pricing():
	"""Apply user-configured pricing to global dictionaries"""
	# Update AWS pricing with user-configured values
	CLOUD_PRICING['AWS']['cost_per_node_hour'] = aws_standard_node
	CLOUD_PRICING['AWS']['vectordb_node']['cost_per_hour'] = aws_vectordb_node
	CLOUD_PRICING['AWS']['jump_host']['cost_per_hour'] = aws_jump_host
	CLOUD_PRICING['AWS']['managed_k8s_cost'] = aws_k8s_management

	# Update AWS GPU pricing
	GPUS["H200 141GB"]["pricing"]["aws"] = aws_h200
	GPUS["H100 80GB"]["pricing"]["aws"] = aws_h100
	GPUS["A100 80GB"]["pricing"]["aws"] = aws_a100_80
	GPUS["A100 40GB"]["pricing"]["aws"] = aws_a100_40
	GPUS["L40S"]["pricing"]["aws"] = aws_l40s

	# Update Azure pricing with user-configured values
	CLOUD_PRICING['Azure']['cost_per_node_hour'] = azure_standard_node
	CLOUD_PRICING['Azure']['vectordb_node']['cost_per_hour'] = azure_vectordb_node
	CLOUD_PRICING['Azure']['jump_host']['cost_per_hour'] = azure_jump_host
	CLOUD_PRICING['Azure']['managed_k8s_cost'] = azure_k8s_management

	# Update Azure GPU pricing
	GPUS["H200 141GB"]["pricing"]["azure"] = azure_h200
	GPUS["H100 80GB"]["pricing"]["azure"] = azure_h100
	GPUS["A100 80GB"]["pricing"]["azure"] = azure_a100_80
	GPUS["A100 40GB"]["pricing"]["azure"] = azure_a100_40

	# Update GCP pricing with user-configured values
	CLOUD_PRICING['GCP']['cost_per_node_hour'] = gcp_standard_node
	CLOUD_PRICING['GCP']['vectordb_node']['cost_per_hour'] = gcp_vectordb_node
	CLOUD_PRICING['GCP']['jump_host']['cost_per_hour'] = gcp_jump_host
	CLOUD_PRICING['GCP']['managed_k8s_cost'] = gcp_k8s_management

	# Update GCP GPU pricing
	GPUS["H100 80GB"]["pricing"]["gcp"] = gcp_h100
	GPUS["A100 80GB"]["pricing"]["gcp"] = gcp_a100_80
	GPUS["A100 40GB"]["pricing"]["gcp"] = gcp_a100_40

	# Update On-Premise pricing with user-configured values
	CLOUD_PRICING['On-Premise']['cost_per_node_hour'] = onprem_standard_node
	CLOUD_PRICING['On-Premise']['vectordb_node']['cost_per_hour'] = onprem_vectordb_node
	CLOUD_PRICING['On-Premise']['jump_host']['cost_per_hour'] = onprem_jump_host
	CLOUD_PRICING['On-Premise']['managed_k8s_cost'] = onprem_k8s_management

	# Update on-premise additional services
	CLOUD_PRICING['On-Premise']['additional_services'] = {
	'Network_Infrastructure': {'cost_per_hour': onprem_network, 'description': 'Switches, routers, firewalls (amortized)'},
	'Storage_SAN': {'cost_per_gb_month': onprem_storage_per_gb, 'description': 'SAN/NAS storage (1TB base, amortized)'},
	'Hardware_Load_Balancer': {'cost_per_hour': onprem_load_balancer, 'description': 'F5/Citrix ADC (amortized)'},
	'Power_Cooling': {'cost_per_hour': onprem_power_cooling, 'description': 'Datacenter power and cooling'},
	'Datacenter_Space': {'cost_per_hour': onprem_datacenter_space, 'description': 'Rack space and facilities costs'},
	'Maintenance_Support': {'cost_per_hour': onprem_maintenance, 'description': 'Hardware maintenance and vendor support contracts'}
	}

	# Update on-premise GPU pricing based on AWS prices and multiplier
	for gpu_name in GPUS.keys():
	if 'aws' in GPUS[gpu_name]['pricing'] and GPUS[gpu_name]['pricing']['aws'] != 'NA':
	aws_price = GPUS[gpu_name]['pricing']['aws']
	GPUS[gpu_name]['pricing']['on-premise'] = round(aws_price * onprem_gpu_multiplier, 2)

	# Apply all custom pricing
	apply_custom_pricing()

	precision_bytes = {
	"FP16": 2,
	"INT8": 1,
	"INT4": 0.5
	}[precision]

	model_spec = MODELS[selected_model]
	gpu_spec = GPUS[selected_gpu]

	gpu_requirements = calculate_gpu_requirements(
	conversations_per_minute, tokens_per_conversation,
	model_spec, gpu_spec, precision_bytes
	)

	# Main Dashboard
	st.header("📊 Infrastructure Overview")

	st.markdown("---") # Visual separator

	# Row 1: Core Metrics - Use 4 columns for better spacing
	col1, col2, col3, col4 = st.columns(4)

	with col1:
	st.metric(
	label="🏢 Total Tenants",
	value=f"{num_tenants}",
	help="Number of tenant environments"
	)

	with col2:
	st.metric(
	label="📦 Apps per Tenant",
	value=f"{apps_per_tenant}",
	help=f"Total applications: {infrastructure['totals']['total_apps']}"
	)

	with col3:
	st.metric(
	label="🖥️ Worker Nodes",
	value=f"{infrastructure['totals']['total_nodes']}",
	help=f"Standard: {infrastructure['totals']['total_standard_nodes']}, VectorDB: {infrastructure['totals']['total_vectordb_nodes']}"
	)

	with col4:
	gpu_display = f"{gpu_requirements['actual_gpus_allocated']} GPUs"
	if gpu_requirements['best_config']:
	gpu_detail = f"({gpu_requirements['best_config']['num_nodes']} nodes)"
	else:
	gpu_detail = ""
	st.metric(
	label="🎮 GPU Resources",
	value=gpu_display,
	delta=gpu_detail,
	help=f"Configuration: {gpu_requirements['best_config']['num_nodes']}×{gpu_requirements['best_config']['gpus_per_node']} GPUs" if gpu_requirements['best_config'] else "GPU allocation"
	)

	# Row 2: Performance Metrics
	col1, col2, col3, col4 = st.columns(4)

	with col1:
	st.metric(
	label="💬 Target Load",
	value=f"{conversations_per_minute}",
	delta="conv/min",
	help="Target conversation throughput"
	)

	with col2:
	st.metric(
	label="📈 Max Capacity",
	value=f"{gpu_requirements['max_conversations_per_minute']:.0f}",
	delta="conv/min",
	help="Maximum system capacity"
	)

	with col3:
	capacity_headroom = gpu_requirements['max_conversations_per_minute'] - conversations_per_minute
	headroom_percentage = (capacity_headroom / gpu_requirements['max_conversations_per_minute']) * 100
	st.metric(
	label="📊 Capacity Headroom",
	value=f"{headroom_percentage:.1f}%",
	delta=f"{capacity_headroom:.0f} conv/min available",
	help="Available capacity beyond current target load"
	)

	with col4:
	bottleneck_icon = "💾" if gpu_requirements['bottleneck'] == 'Memory' else "⚡"
	st.metric(
	label=f"{bottleneck_icon} Bottleneck",
	value=gpu_requirements['bottleneck'],
	help="Primary system constraint"
	)

	st.markdown("---") # Visual separator

	# Create tabs for detailed views
	tab1, tab2, tab3, tab4, tab5 = st.tabs([
	"🗏 Platform Infrastructure",
	"🖥️ GPU Requirements",
	"💰 Cost Analysis",
	"📈 Performance Analysis",
	"🔧 Technical Specifications"
	])

	with tab1:
	st.subheader("Platform Infrastructure Breakdown")

	# Show deployment node scaling info
	st.info(f"📦 Deployment Node Scaling: {infrastructure['totals']['deploy_nodes_per_tenant']} deployment node(s) per tenant for {apps_per_tenant} apps (1 node per 4 apps)")

	# Platform nodes breakdown
	breakdown_data = []
	for node_type, details in infrastructure['node_breakdown'].items():
	per_tenant_value = details['tenant'] // num_tenants if num_tenants > 0 and details['tenant'] > 0 else 0
	breakdown_data.append({
	'Node Type': node_type,
	'Base': details['base'] if details['base'] > 0 else '-',
	'Per Tenant': per_tenant_value if per_tenant_value > 0 else '-',
	'Total': details['total'],
	'CPU': details['cores'],
	'RAM (GB)': details['ram'],
	'VM Type': details.get('node_type', 'Standard'),
	'Purpose': details['purpose']
	})

	breakdown_df = pd.DataFrame(breakdown_data)

	# Use column configuration for better display
	st.dataframe(
	breakdown_df,
	use_container_width=True,
	hide_index=True,
	column_config={
	"Node Type": st.column_config.TextColumn("Node Type", width="medium"),
	"Base": st.column_config.TextColumn("Base", width="small"),
	"Per Tenant": st.column_config.TextColumn("Per Tenant", width="small"),
	"Total": st.column_config.NumberColumn("Total", width="small"),
	"CPU": st.column_config.NumberColumn("CPU", width="small"),
	"RAM (GB)": st.column_config.NumberColumn("RAM (GB)", width="small"),
	"VM Type": st.column_config.TextColumn("VM Type", width="medium"),
	"Purpose": st.column_config.TextColumn("Purpose", width="large")
	}
	)

	# Visual breakdown
	col1, col2 = st.columns(2)

	with col1:
	# Node distribution pie chart
	node_counts = {node_type: details['total']
	for node_type, details in infrastructure['node_breakdown'].items()
	if details['total'] > 0}

	fig_nodes = px.pie(
	values=list(node_counts.values()),
	names=list(node_counts.keys()),
	title="Platform Node Distribution"
	)
	st.plotly_chart(fig_nodes, use_container_width=True)

	with col2:
	# Resource distribution
	resource_data = []
	for node_type, details in infrastructure['node_breakdown'].items():
	if details['total'] > 0:
	resource_data.extend([
	{'Node Type': node_type, 'Resource': 'CPU Cores', 'Amount': details['cores']},
	{'Node Type': node_type, 'Resource': 'RAM (GB)', 'Amount': details['ram']}
	])

	resource_df = pd.DataFrame(resource_data)
	fig_resources = px.bar(
	resource_df,
	x='Node Type',
	y='Amount',
	color='Resource',
	title='Resource Distribution by Node Type',
	barmode='group'
	)
	st.plotly_chart(fig_resources, use_container_width=True)

	# Node type distribution
	st.subheader("Node Type Distribution")
	col1, col2 = st.columns(2)
	with col1:
	st.metric(
	"Standard Nodes (8 vCPU, 32GB RAM)",
	infrastructure['totals']['total_standard_nodes'],
	help="Platform, Compute, and Deploy nodes"
	)
	with col2:
	st.metric(
	"High-Memory Nodes (16 vCPU, 64GB RAM)",
	infrastructure['totals']['total_vectordb_nodes'],
	help="VectorDB nodes with higher memory capacity"
	)

	with tab2:
	st.subheader("GPU Requirements Analysis")

	# GPU requirements metrics
	col1, col2, col3, col4 = st.columns(4)

	with col1:
	st.metric(
	"Memory-based GPUs",
	gpu_requirements['gpus_needed_memory'],
	help="GPUs needed to fit model in memory"
	)

	with col2:
	st.metric(
	"Throughput-based GPUs",
	gpu_requirements['gpus_needed_throughput'],
	help="GPUs needed for required throughput"
	)

	with col3:
	st.metric(
	"Logical GPUs Needed",
	gpu_requirements['total_gpus_needed'],
	help="Minimum GPUs needed (before node configuration)"
	)

	with col4:
	st.metric(
	"Actual GPUs Allocated",
	gpu_requirements['actual_gpus_allocated'],
	help="GPUs allocated based on standard node configurations",
	delta=gpu_requirements['actual_gpus_allocated'] - gpu_requirements['total_gpus_needed']
	)

	# GPU Node Configuration Analysis
	st.subheader("🖥️ GPU Node Configuration Options")

	if gpu_requirements['gpu_configurations']:
	# Display configuration options in a table
	config_data = []
	for config in gpu_requirements['gpu_configurations']:
	efficiency_score = f"{config['utilization']:.1f}%"
	memory_compatible = "✅" if config['meets_memory_req'] else "❌"

	config_data.append({
	"GPUs/Node": config['gpus_per_node'],
	"Mem": memory_compatible,
	"Nodes": config['num_nodes'],
	"Total GPUs": config['total_gpus_allocated'],
	"GPU Util": efficiency_score,
	"Waste": config['gpu_waste'],
	"Mem Util": f"{config['memory_utilization']:.1f}%"
	})

	config_df = pd.DataFrame(config_data)
	st.dataframe(
	config_df,
	use_container_width=True,
	hide_index=True,
	column_config={
	"GPUs/Node": st.column_config.NumberColumn("GPUs/Node", width="small"),
	"Mem": st.column_config.TextColumn("Mem ✓", width="small"),
	"Nodes": st.column_config.NumberColumn("Nodes", width="small"),
	"Total GPUs": st.column_config.NumberColumn("Total GPUs", width="small"),
	"GPU Util": st.column_config.TextColumn("GPU Util", width="small"),
	"Waste": st.column_config.NumberColumn("Waste", width="small"),
	"Mem Util": st.column_config.TextColumn("Mem Util", width="small")
	}
	)

	# Highlight the recommended configuration
	if gpu_requirements['best_config']:
	best = gpu_requirements['best_config']
	st.success(f"💡 Recommended Configuration: {best['num_nodes']} nodes × {best['gpus_per_node']} GPUs = {best['total_gpus_allocated']} total GPUs ({best['utilization']:.1f}% utilization)")

	# Show minimum requirement info
	st.info(f"Memory Constraint: Minimum {gpu_requirements['min_gpus_per_node']} GPUs per node required to fit {gpu_requirements['model_memory_gb']:.1f}GB model in {gpu_spec['memory']}GB GPU memory")

	# GPU configuration visualization
	col1, col2 = st.columns(2)

	with col1:
	# Node configuration comparison
	if gpu_requirements['gpu_configurations']:
	config_chart_data = pd.DataFrame(gpu_requirements['gpu_configurations'])
	fig_configs = px.bar(
	config_chart_data,
	x='gpus_per_node',
	y='utilization',
	title='GPU Utilization by Node Configuration',
	labels={'gpus_per_node': 'GPUs per Node', 'utilization': 'Utilization (%)'}
	)
	st.plotly_chart(fig_configs, use_container_width=True)

	with col2:
	# GPU allocation vs requirement
	allocation_data = pd.DataFrame({
	'Metric': ['Required GPUs', 'Allocated GPUs'],
	'Count': [gpu_requirements['total_gpus_needed'], gpu_requirements['actual_gpus_allocated']]
	})

	fig_allocation = px.bar(
	allocation_data,
	x='Metric',
	y='Count',
	title='GPU Allocation vs Requirement',
	color='Metric'
	)
	st.plotly_chart(fig_allocation, use_container_width=True)

	# Model and GPU specifications
	st.subheader("🔧 Model & GPU Specifications")

	# GPU configuration table
	gpu_config_data = [{
	'Model': selected_model,
	'Parameters': f"{model_spec['params']}B ({model_spec['active_params']}B active)" if model_spec['params'] != model_spec['active_params'] else f"{model_spec['params']}B",
	'Model Memory Required': f"{gpu_requirements['model_memory_gb']:.1f} GB",
	'GPU Type': selected_gpu,
	'GPU Memory per Unit': f"{gpu_spec['memory']} GB",
	'GPUs Required (Logic)': gpu_requirements['total_gpus_needed'],
	'GPUs Allocated (Actual)': gpu_requirements['actual_gpus_allocated'],
	'GPU Nodes': f"{gpu_requirements['best_config']['num_nodes']} nodes × {gpu_requirements['best_config']['gpus_per_node']} GPUs" if gpu_requirements['best_config'] else 'N/A',
	'Total GPU Memory': f"{gpu_requirements['actual_gpus_allocated'] * gpu_spec['memory']} GB",
	'Memory Utilization': f"{(gpu_requirements['model_memory_gb'] / (gpu_requirements['actual_gpus_allocated'] * gpu_spec['memory']) * 100):.1f}%",
	'Precision': precision
	}]

	gpu_config_df = pd.DataFrame(gpu_config_data)
	st.dataframe(gpu_config_df, use_container_width=True)

	# Performance metrics
	col1, col2 = st.columns(2)

	with col1:
	# TPS comparison
	tps_data = pd.DataFrame({
	'Metric': ['Required TPS', 'Single GPU TPS', 'Total System TPS'],
	'Value': [
	gpu_requirements['required_tps'],
	gpu_requirements['estimated_tps'],
	gpu_requirements['total_system_tps']
	]
	})

	fig_tps = px.bar(
	tps_data,
	x='Metric',
	y='Value',
	title='Tokens Per Second Analysis',
	color='Metric'
	)
	st.plotly_chart(fig_tps, use_container_width=True)

	with col2:
	# Capacity utilization
	utilization_data = pd.DataFrame({
	'Metric': ['Required Capacity', 'Available Capacity'],
	'Conversations/Min': [
	conversations_per_minute,
	gpu_requirements['max_conversations_per_minute']
	]
	})

	fig_capacity = px.bar(
	utilization_data,
	x='Metric',
	y='Conversations/Min',
	title='Conversation Capacity Analysis',
	color='Metric'
	)
	st.plotly_chart(fig_capacity, use_container_width=True)

	with tab3:
	st.subheader("Comprehensive Cost Analysis")

	# Show customization status for all providers
	default_values = {
	'aws': {'standard': 0.384, 'vectordb': 0.768, 'jump': 0.096, 'k8s': 0.10},
	'azure': {'standard': 0.384, 'vectordb': 0.768, 'jump': 0.096, 'k8s': 0.0},
	'gcp': {'standard': 0.379, 'vectordb': 0.758, 'jump': 0.067, 'k8s': 0.10},
	'onprem': {'standard': 0.192, 'vectordb': 0.384, 'jump': 0.048, 'gpu_mult': 0.55, 'k8s': 0.05}
	}

	customizations = []

	# Check AWS customizations
	if (aws_standard_node != default_values['aws']['standard'] or
	aws_vectordb_node != default_values['aws']['vectordb'] or
	aws_k8s_management != default_values['aws']['k8s']):
	customizations.append("AWS")

	# Check Azure customizations
	if (azure_standard_node != default_values['azure']['standard'] or
	azure_vectordb_node != default_values['azure']['vectordb'] or
	azure_k8s_management != default_values['azure']['k8s']):
	customizations.append("Azure")

	# Check GCP customizations
	if (gcp_standard_node != default_values['gcp']['standard'] or
	gcp_vectordb_node != default_values['gcp']['vectordb'] or
	gcp_k8s_management != default_values['gcp']['k8s']):
	customizations.append("GCP")

	# Check On-Premise customizations
	if (onprem_standard_node != default_values['onprem']['standard'] or
	onprem_vectordb_node != default_values['onprem']['vectordb'] or
	onprem_gpu_multiplier != default_values['onprem']['gpu_mult'] or
	onprem_k8s_management != default_values['onprem']['k8s']):
	customizations.append("On-Premise")

	if customizations:
	st.warning(f"""
	✏️ Custom Pricing Active for: {', '.join(customizations)}

	Using user-configured pricing instead of defaults. View details in Technical Specifications tab or adjust in sidebar.
	""")

	# Add info box about cost models
	st.info("""
	💡 Cost Model Information:
	- Cloud Providers (AWS/Azure/GCP): Pay-as-you-go pricing with per-hour compute and GPU costs
	- On-Premise: Hardware amortized over 3-year lifecycle + operating costs (power, cooling, maintenance)
	- Customization: All pricing values can be adjusted in the sidebar to match your actual costs

	🔧 Customize: Use the sidebar "Cloud Provider Pricing" sections to adjust costs
	""")

	# Calculate costs for all providers
	all_costs = {}
	for provider in CLOUD_PRICING.keys():
	all_costs[provider] = calculate_detailed_costs(
	provider, infrastructure, gpu_requirements, gpu_spec, days
	)

	# Cost comparison table
	cost_comparison_data = []
	for provider, costs in all_costs.items():
	gpu_available = costs['totals']['gpu_available']
	cost_comparison_data.append({
	'Provider': provider,
	'GPU': '✅' if gpu_available else '❌',
	'Platform': f"${costs['totals']['platform_cost']:.2f}",
	'GPU Cost': format_cost_for_display(costs['totals']['gpu_cost'], gpu_available),
	'Total': format_cost_for_display(costs['totals']['total_cost'], gpu_available),
	'Per Hour': format_cost_for_display(costs['totals']['cost_per_hour'], gpu_available),
	'Per Day': format_cost_for_display(costs['totals']['cost_per_day'], gpu_available),
	'Total_Numeric': costs['totals']['total_cost'] if gpu_available else None,
	'GPU_Available': gpu_available
	})

	cost_df = pd.DataFrame(cost_comparison_data)
	display_cost_df = cost_df.drop(['Total_Numeric', 'GPU_Available'], axis=1)

	st.dataframe(
	display_cost_df,
	use_container_width=True,
	hide_index=True,
	column_config={
	"Provider": st.column_config.TextColumn("Provider", width="medium"),
	"GPU": st.column_config.TextColumn("GPU ✓", width="small"),
	"Platform": st.column_config.TextColumn("Platform Cost", width="medium"),
	"GPU Cost": st.column_config.TextColumn("GPU Cost", width="medium"),
	"Total": st.column_config.TextColumn("Total Cost", width="medium"),
	"Per Hour": st.column_config.TextColumn("$/Hour", width="medium"),
	"Per Day": st.column_config.TextColumn("$/Day", width="medium")
	}
	)

	# Add download button for cost report
	report_data = create_downloadable_cost_report(
	all_costs, infrastructure, gpu_requirements, model_spec, gpu_spec,
	selected_model, selected_gpu, num_tenants, apps_per_tenant, conversations_per_minute,
	tokens_per_conversation, precision, time_period
	)

	st.download_button(
	label="📥 Download Complete Cost Report (JSON)",
	data=json.dumps(report_data, indent=2),
	file_name=f"llmops_cost_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json",
	mime="application/json",
	help="Download comprehensive cost analysis with all services and configurations"
	)

	# Create CSV version for easier viewing
	csv_data = []
	for provider, provider_data in report_data['cost_breakdown_by_provider'].items():
	if provider_data['gpu_available']:
	csv_data.append({
	'Provider': provider,
	'GPU_Available': 'Yes',
	'Platform_Nodes': infrastructure['totals']['total_standard_nodes'],
	'VectorDB_Nodes': infrastructure['totals']['total_vectordb_nodes'],
	'GPU_Nodes': gpu_requirements['total_gpus_needed'],
	'Kubernetes_Nodes_Cost': provider_data['platform_costs']['kubernetes_nodes'],
	'VectorDB_Nodes_Cost': provider_data['platform_costs']['vectordb_nodes'],
	'Jump_Host_Cost': provider_data['platform_costs']['jump_host'],
	'Additional_Services_Cost': provider_data['platform_costs']['additional_services'],
	'K8s_Management_Cost': provider_data['platform_costs']['k8s_management'],
	'Total_Platform_Cost': provider_data['platform_costs']['platform_total'],
	'GPU_Cost_Per_Hour': provider_data['gpu_costs']['gpu_cost_per_hour'],
	'Total_GPU_Cost': provider_data['gpu_costs']['total_gpu_cost'],
	'Total_Infrastructure_Cost': provider_data['totals']['total_cost'],
	'Cost_Per_Hour': provider_data['totals']['cost_per_hour'],
	'Cost_Per_Day': provider_data['totals']['cost_per_day']
	})
	else:
	csv_data.append({
	'Provider': provider,
	'GPU_Available': 'No',
	'Platform_Nodes': infrastructure['totals']['total_standard_nodes'],
	'VectorDB_Nodes': infrastructure['totals']['total_vectordb_nodes'],
	'GPU_Nodes': 'N/A',
	'Kubernetes_Nodes_Cost': provider_data['platform_costs']['kubernetes_nodes'],
	'VectorDB_Nodes_Cost': provider_data['platform_costs']['vectordb_nodes'],
	'Jump_Host_Cost': provider_data['platform_costs']['jump_host'],
	'Additional_Services_Cost': provider_data['platform_costs']['additional_services'],
	'K8s_Management_Cost': provider_data['platform_costs']['k8s_management'],
	'Total_Platform_Cost': provider_data['platform_costs']['platform_total'],
	'GPU_Cost_Per_Hour': 'N/A',
	'Total_GPU_Cost': 'N/A',
	'Total_Infrastructure_Cost': 'N/A',
	'Cost_Per_Hour': 'N/A',
	'Cost_Per_Day': 'N/A'
	})

	csv_df = pd.DataFrame(csv_data)
	csv_string = csv_df.to_csv(index=False)

	st.download_button(
	label="📊 Download Cost Summary (CSV)",
	data=csv_string,
	file_name=f"llmops_cost_summary_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv",
	mime="text/csv",
	help="Download cost summary in CSV format for spreadsheet analysis"
	)

	# Cost breakdown visualization - only for providers with GPU available
	available_providers_data = cost_df[cost_df['GPU_Available'] == True]

	col1, col2 = st.columns(2)

	with col1:
	# Provider comparison - only available providers
	if not available_providers_data.empty:
	fig_provider_comparison = px.bar(
	available_providers_data,
	x='Provider',
	y='Total_Numeric',
	title=f'Total Cost Comparison ({time_period}) - All Deployment Options',
	labels={'Total_Numeric': 'Total Cost (USD)'},
	color='Provider'
	)
	st.plotly_chart(fig_provider_comparison, use_container_width=True)
	else:
	st.warning("⚠️ No providers have the selected GPU available for cost comparison")

	with col2:
	# Cost breakdown for selected provider (cheapest available)
	available_providers = get_available_providers_for_gpu(gpu_spec)

	if available_providers:
	cheapest_provider = min(available_providers,
	key=lambda x: all_costs[x]['totals']['total_cost'])
	cheapest_costs = all_costs[cheapest_provider]

	breakdown_values = [
	cheapest_costs['totals']['platform_cost'],
	cheapest_costs['totals']['gpu_cost']
	]
	breakdown_labels = ['Platform Infrastructure', 'GPU Infrastructure']

	fig_breakdown = px.pie(
	values=breakdown_values,
	names=breakdown_labels,
	title=f'{cheapest_provider} - Cost Breakdown'
	)
	st.plotly_chart(fig_breakdown, use_container_width=True)
	else:
	st.warning("⚠️ No providers have the selected GPU available")

	# Detailed cost breakdown for cheapest available provider
	available_providers = get_available_providers_for_gpu(gpu_spec)

	if available_providers:
	cheapest_provider = min(available_providers,
	key=lambda x: all_costs[x]['totals']['total_cost'])

	st.subheader(f"💡 Most Cost-Effective Option: {cheapest_provider}")

	if cheapest_provider == 'On-Premise':
	st.success(f"✅ On-Premise deployment offers the lowest cost with {selected_gpu}")
	st.info("💰 Note: On-premise costs assume 3-year hardware amortization. Initial capex and datacenter setup costs are not included in hourly rates.")
	else:
	st.info(f"✅ {selected_gpu} is available on {cheapest_provider}")

	cheapest_costs = all_costs[cheapest_provider]

	col1, col2, col3 = st.columns(3)

	with col1:
	st.metric(
	"Platform Infrastructure",
	f"${cheapest_costs['totals']['platform_cost']:.2f}",
	help="Kubernetes nodes (including VectorDB), networking, storage, management"
	)

	with col2:
	st.metric(
	"GPU Infrastructure",
	f"${cheapest_costs['totals']['gpu_cost']:.2f}",
	help=f"{gpu_requirements['total_gpus_needed']} x {selected_gpu}"
	)

	with col3:
	# Calculate savings compared to most expensive available provider
	if len(available_providers) > 1:
	most_expensive_available = max(available_providers,
	key=lambda x: all_costs[x]['totals']['total_cost'])
	savings = all_costs[most_expensive_available]['totals']['total_cost'] - cheapest_costs['totals']['total_cost']
	savings_pct = (savings / all_costs[most_expensive_available]['totals']['total_cost']) * 100
	st.metric(
	"Potential Savings",
	f"${savings:.2f}",
	help=f"Savings compared to {most_expensive_available} ({savings_pct:.1f}%)"
	)
	else:
	st.metric(
	"Provider Status",
	"Only Option",
	help="This is the only provider with the selected GPU available"
	)

	# Cloud vs On-Premise comparison if both are available
	if 'On-Premise' in available_providers and len(available_providers) > 1:
	st.subheader("☁️ Cloud vs 🏢 On-Premise Comparison")

	onprem_cost = all_costs['On-Premise']['totals']['total_cost']
	cloud_providers = [p for p in available_providers if p != 'On-Premise']

	comparison_data = []
	for provider in ['On-Premise'] + cloud_providers:
	comparison_data.append({
	'Deployment Type': 'On-Premise' if provider == 'On-Premise' else 'Cloud',
	'Provider': provider,
	'Total Cost': all_costs[provider]['totals']['total_cost'],
	'Platform Cost': all_costs[provider]['totals']['platform_cost'],
	'GPU Cost': all_costs[provider]['totals']['gpu_cost']
	})

	comp_df = pd.DataFrame(comparison_data)

	# Create grouped bar chart
	fig_comparison = go.Figure()
	fig_comparison.add_trace(go.Bar(
	name='Platform Cost',
	x=comp_df['Provider'],
	y=comp_df['Platform Cost'],
	marker_color='lightblue'
	))
	fig_comparison.add_trace(go.Bar(
	name='GPU Cost',
	x=comp_df['Provider'],
	y=comp_df['GPU Cost'],
	marker_color='orange'
	))

	fig_comparison.update_layout(
	title='Cost Breakdown: On-Premise vs Cloud',
	xaxis_title='Provider',
	yaxis_title='Cost (USD)',
	barmode='stack'
	)
	st.plotly_chart(fig_comparison, use_container_width=True)

	# Calculate average cloud cost
	avg_cloud_cost = sum([all_costs[p]['totals']['total_cost'] for p in cloud_providers]) / len(cloud_providers)
	cloud_savings = avg_cloud_cost - onprem_cost
	cloud_savings_pct = (cloud_savings / avg_cloud_cost) * 100

	if cloud_savings > 0:
	st.success(f"💰 On-Premise Savings: ${cloud_savings:.2f} ({cloud_savings_pct:.1f}%) compared to average cloud cost over {time_period}")
	else:
	st.info(f"☁️ Cloud is more cost-effective for this configuration over {time_period}")

	else:
	st.error(f"❌ No Providers Available: The selected GPU ({selected_gpu}) is not available on any deployment option")
	st.warning("Recommendation: Please select a different GPU model that is available")

	# Show which GPUs are available on which providers
	st.subheader("🔍 GPU Availability by Provider")
	availability_data = []
	for gpu_name, gpu_data in GPUS.items():
	available_on = get_available_providers_for_gpu(gpu_data)
	availability_data.append({
	'GPU Model': gpu_name,
	'Memory': f"{gpu_data['memory']} GB",
	'Available On': ', '.join(available_on) if available_on else 'None',
	'Deployment Options': len(available_on)
	})

	availability_df = pd.DataFrame(availability_data)
	availability_df = availability_df.sort_values('Deployment Options', ascending=False)
	st.dataframe(availability_df, use_container_width=True)

	with tab4:
	st.subheader("Performance Analysis & Scaling")

	# Performance metrics
	col1, col2, col3 = st.columns(3)

	with col1:
	st.metric(
	"Total System TPS",
	f"{gpu_requirements['total_system_tps']:.0f}",
	help="Combined throughput of all GPUs"
	)

	with col2:
	st.metric(
	"Conversation Capacity",
	f"{gpu_requirements['max_conversations_per_minute']:.0f}/min",
	help="Maximum conversations the system can handle"
	)

	with col3:
	capacity_headroom = gpu_requirements['max_conversations_per_minute'] - conversations_per_minute
	headroom_percentage = (capacity_headroom / gpu_requirements['max_conversations_per_minute']) * 100
	st.metric(
	"Capacity Headroom",
	f"{headroom_percentage:.1f}%",
	delta=f"{capacity_headroom:.0f} conv/min available",
	help="Available capacity beyond current target load"
	)

	# Scaling analysis
	st.subheader("Scaling Analysis")

	# Create scaling scenarios
	scaling_scenarios = [0.5, 1.0, 1.5, 2.0, 3.0, 5.0]
	scaling_data = []

	for multiplier in scaling_scenarios:
	scaled_conversations = int(conversations_per_minute * multiplier)
	scaled_gpu_reqs = calculate_gpu_requirements(
	scaled_conversations, tokens_per_conversation,
	model_spec, gpu_spec, precision_bytes
	)

	scaling_data.append({
	'Load Multiplier': f"{multiplier}x",
	'Conversations/Min': scaled_conversations,
	'Logical GPUs': scaled_gpu_reqs['total_gpus_needed'],
	'Allocated GPUs': scaled_gpu_reqs['actual_gpus_allocated'],
	'GPU Nodes': f"{scaled_gpu_reqs['best_config']['num_nodes']}×{scaled_gpu_reqs['best_config']['gpus_per_node']}" if scaled_gpu_reqs['best_config'] else 'N/A',
	'System Capacity': f"{scaled_gpu_reqs['max_conversations_per_minute']:.0f}",
	'Headroom %': f"{((scaled_gpu_reqs['max_conversations_per_minute'] - scaled_conversations) / scaled_gpu_reqs['max_conversations_per_minute'] * 100):.1f}%"
	})

	scaling_df = pd.DataFrame(scaling_data)

	st.dataframe(
	scaling_df,
	use_container_width=True,
	hide_index=True,
	column_config={
	"Load Multiplier": st.column_config.TextColumn("Load", width="small"),
	"Conversations/Min": st.column_config.NumberColumn("Conv/Min", width="small"),
	"Logical GPUs": st.column_config.NumberColumn("Logical", width="small"),
	"Allocated GPUs": st.column_config.NumberColumn("Allocated", width="small"),
	"GPU Nodes": st.column_config.TextColumn("GPU Nodes", width="medium"),
	"System Capacity": st.column_config.TextColumn("Capacity", width="medium"),
	"Headroom %": st.column_config.TextColumn("Headroom %", width="small")
	}
	)

	# Scaling visualization
	fig_scaling = go.Figure()

	# Add lines for both logical and allocated GPUs
	fig_scaling.add_trace(go.Scatter(
	x=[float(x.replace('x', '')) for x in scaling_df['Load Multiplier']],
	y=scaling_df['Logical GPUs'].astype(int),
	mode='lines+markers',
	name='Logical GPUs Required',
	line=dict(color='blue', dash='dash')
	))

	fig_scaling.add_trace(go.Scatter(
	x=[float(x.replace('x', '')) for x in scaling_df['Load Multiplier']],
	y=scaling_df['Allocated GPUs'].astype(int),
	mode='lines+markers',
	name='Allocated GPUs (Actual)',
	line=dict(color='red')
	))

	fig_scaling.update_layout(
	title='GPU Scaling Requirements (Logical vs Allocated)',
	xaxis_title='Load Multiplier',
	yaxis_title='Number of GPUs'
	)
	st.plotly_chart(fig_scaling, use_container_width=True)

	# Application scaling analysis
	st.subheader("Application Scaling Analysis")

	app_scaling_scenarios = [4, 8, 12, 16, 20, 24, 32, 40]
	app_scaling_data = []

	for apps in app_scaling_scenarios:
	deploy_nodes = math.ceil(apps / 4)
	app_scaling_data.append({
	'Apps per Tenant': apps,
	'Total Apps': apps * num_tenants,
	'Deploy Nodes per Tenant': deploy_nodes,
	'Total Deploy Nodes': deploy_nodes * num_tenants,
	'Deploy Node Ratio': f"1:{4 if apps >= 4 else apps}"
	})

	app_scaling_df = pd.DataFrame(app_scaling_data)

	st.dataframe(
	app_scaling_df,
	use_container_width=True,
	hide_index=True,
	column_config={
	"Apps per Tenant": st.column_config.NumberColumn("Apps/Tenant", width="small"),
	"Total Apps": st.column_config.NumberColumn("Total Apps", width="small"),
	"Deploy Nodes per Tenant": st.column_config.NumberColumn("Deploy/Tenant", width="small"),
	"Total Deploy Nodes": st.column_config.NumberColumn("Total Deploy", width="medium"),
	"Deploy Node Ratio": st.column_config.TextColumn("Ratio", width="small")
	}
	)

	# App scaling visualization
	fig_app_scaling = px.line(
	app_scaling_df,
	x='Apps per Tenant',
	y='Total Deploy Nodes',
	title='Deployment Nodes Scaling with Application Count',
	markers=True
	)
	st.plotly_chart(fig_app_scaling, use_container_width=True)

	with tab5:
	st.subheader("Technical Specifications")

	# Model specifications
	st.markdown("### 🤖 LLM Model Specifications")
	model_specs_data = [{
	'Property': 'Model Name',
	'Value': selected_model
	}, {
	'Property': 'Organization',
	'Value': model_spec['org']
	}, {
	'Property': 'Total Parameters',
	'Value': f"{model_spec['params']}B"
	}, {
	'Property': 'Active Parameters',
	'Value': f"{model_spec['active_params']}B"
	}, {
	'Property': 'Max Context Length',
	'Value': f"{model_spec['max_context']:,} tokens"
	}, {
	'Property': 'Base TPS',
	'Value': f"{model_spec['base_tps']:,}"
	}, {
	'Property': 'License',
	'Value': model_spec['license']
	}, {
	'Property': 'Architecture Type',
	'Value': 'Mixture of Experts (MoE)' if model_spec['params'] != model_spec['active_params'] else 'Dense Model'
	}]

	model_specs_df = pd.DataFrame(model_specs_data)
	st.dataframe(model_specs_df, use_container_width=True)

	# GPU specifications
	st.markdown("### 🖥️ GPU Specifications")
	gpu_specs_data = [{
	'Property': 'GPU Model',
	'Value': selected_gpu
	}, {
	'Property': 'Memory Capacity',
	'Value': f"{gpu_spec['memory']} GB"
	}, {
	'Property': 'Compute Capability',
	'Value': gpu_spec['compute']
	}, {
	'Property': 'TPS Range',
	'Value': f"{gpu_spec['tps_min']:,} - {gpu_spec['tps_max']:,}"
	}, {
	'Property': 'Efficiency Tier',
	'Value': gpu_spec['efficiency_tier']
	}, {
	'Property': 'Model Precision',
	'Value': precision
	}]

	gpu_specs_df = pd.DataFrame(gpu_specs_data)
	st.dataframe(gpu_specs_df, use_container_width=True)

	# Platform specifications
	st.markdown("### 🗏 Platform Infrastructure Specifications")
	platform_specs_data = [{
	'Component': 'Standard K8s Nodes',
	'Specification': f"{infrastructure['totals']['total_standard_nodes']} nodes × 8 vCPUs × 32GB RAM"
	}, {
	'Component': 'VectorDB Nodes',
	'Specification': f"{infrastructure['totals']['total_vectordb_nodes']} nodes × 16 vCPUs × 64GB RAM"
	}, {
	'Component': 'GPU Nodes',
	'Specification': f"{gpu_requirements['actual_gpus_allocated']} × {selected_gpu} ({gpu_requirements['best_config']['num_nodes']} nodes × {gpu_requirements['best_config']['gpus_per_node']} GPUs)" if gpu_requirements['best_config'] else f"{gpu_requirements['total_gpus_needed']} × {selected_gpu}"
	}, {
	'Component': 'Total CPU Cores',
	'Specification': f"{infrastructure['totals']['total_cpu']} cores"
	}, {
	'Component': 'Total RAM',
	'Specification': f"{infrastructure['totals']['total_ram']} GB"
	}, {
	'Component': 'Total GPU Memory',
	'Specification': f"{gpu_requirements['actual_gpus_allocated'] * gpu_spec['memory']} GB"
	}, {
	'Component': 'Applications per Tenant',
	'Specification': f"{apps_per_tenant} apps × {num_tenants} tenants = {infrastructure['totals']['total_apps']} total apps"
	}, {
	'Component': 'Deployment Nodes per Tenant',
	'Specification': f"{infrastructure['totals']['deploy_nodes_per_tenant']} node(s) (1 node per 4 apps)"
	}]

	platform_specs_df = pd.DataFrame(platform_specs_data)
	st.dataframe(platform_specs_df, use_container_width=True)

	# Provider Pricing Configuration Summary
	st.markdown("### 💰 Provider Pricing Configuration")

	# Create tabs for each provider
	price_tab1, price_tab2, price_tab3, price_tab4 = st.tabs(["AWS", "Azure", "GCP", "On-Premise"])

	with price_tab1:
	aws_config_data = [{
	'Cost Component': 'Standard Compute Node',
	'Specification': 'm5.2xlarge (8 vCPU, 32GB)',
	'Cost per Hour': f"${aws_standard_node:.3f}",
	'Status': '✏️ Custom' if aws_standard_node != 0.384 else '✅ Default'
	}, {
	'Cost Component': 'VectorDB Node',
	'Specification': 'm5.4xlarge (16 vCPU, 64GB)',
	'Cost per Hour': f"${aws_vectordb_node:.3f}",
	'Status': '✏️ Custom' if aws_vectordb_node != 0.768 else '✅ Default'
	}, {
	'Cost Component': 'Jump Host',
	'Specification': 'm5.large (2 vCPU, 8GB)',
	'Cost per Hour': f"${aws_jump_host:.3f}",
	'Status': '✏️ Custom' if aws_jump_host != 0.096 else '✅ Default'
	}, {
	'Cost Component': 'EKS Management',
	'Specification': 'Managed Kubernetes',
	'Cost per Hour': f"${aws_k8s_management:.3f}",
	'Status': '✏️ Custom' if aws_k8s_management != 0.10 else '✅ Default'
	}, {
	'Cost Component': 'H200 141GB GPU',
	'Specification': 'Flagship+ GPU',
	'Cost per Hour': f"${aws_h200:.2f}",
	'Status': '✏️ Custom' if aws_h200 != 15.70 else '✅ Default'
	}, {
	'Cost Component': 'H100 80GB GPU',
	'Specification': 'Flagship GPU',
	'Cost per Hour': f"${aws_h100:.2f}",
	'Status': '✏️ Custom' if aws_h100 != 6.01 else '✅ Default'
	}, {
	'Cost Component': 'A100 80GB GPU',
	'Specification': 'Excellent GPU',
	'Cost per Hour': f"${aws_a100_80:.2f}",
	'Status': '✏️ Custom' if aws_a100_80 != 3.43 else '✅ Default'
	}, {
	'Cost Component': 'A100 40GB GPU',
	'Specification': 'Good GPU',
	'Cost per Hour': f"${aws_a100_40:.2f}",
	'Status': '✏️ Custom' if aws_a100_40 != 2.75 else '✅ Default'
	}, {
	'Cost Component': 'L40S GPU',
	'Specification': 'Very Good GPU',
	'Cost per Hour': f"${aws_l40s:.2f}",
	'Status': '✏️ Custom' if aws_l40s != 1.67 else '✅ Default'
	}]
	aws_config_df = pd.DataFrame(aws_config_data)
	st.dataframe(aws_config_df, use_container_width=True)

	with price_tab2:
	azure_config_data = [{
	'Cost Component': 'Standard Compute Node',
	'Specification': 'Standard_D8s_v3 (8 vCPU, 32GB)',
	'Cost per Hour': f"${azure_standard_node:.3f}",
	'Status': '✏️ Custom' if azure_standard_node != 0.384 else '✅ Default'
	}, {
	'Cost Component': 'VectorDB Node',
	'Specification': 'Standard_D16s_v3 (16 vCPU, 64GB)',
	'Cost per Hour': f"${azure_vectordb_node:.3f}",
	'Status': '✏️ Custom' if azure_vectordb_node != 0.768 else '✅ Default'
	}, {
	'Cost Component': 'Jump Host',
	'Specification': 'Standard_D2s_v3 (2 vCPU, 8GB)',
	'Cost per Hour': f"${azure_jump_host:.3f}",
	'Status': '✏️ Custom' if azure_jump_host != 0.096 else '✅ Default'
	}, {
	'Cost Component': 'AKS Management',
	'Specification': 'Managed Kubernetes (Free)',
	'Cost per Hour': f"${azure_k8s_management:.3f}",
	'Status': '✏️ Custom' if azure_k8s_management != 0.0 else '✅ Default'
	}, {
	'Cost Component': 'H200 141GB GPU',
	'Specification': 'Flagship+ GPU',
	'Cost per Hour': f"${azure_h200:.2f}",
	'Status': '✏️ Custom' if azure_h200 != 12.29 else '✅ Default'
	}, {
	'Cost Component': 'H100 80GB GPU',
	'Specification': 'Flagship GPU',
	'Cost per Hour': f"${azure_h100:.2f}",
	'Status': '✏️ Custom' if azure_h100 != 6.98 else '✅ Default'
	}, {
	'Cost Component': 'A100 80GB GPU',
	'Specification': 'Excellent GPU',
	'Cost per Hour': f"${azure_a100_80:.2f}",
	'Status': '✏️ Custom' if azure_a100_80 != 3.67 else '✅ Default'
	}, {
	'Cost Component': 'A100 40GB GPU',
	'Specification': 'Good GPU',
	'Cost per Hour': f"${azure_a100_40:.2f}",
	'Status': '✏️ Custom' if azure_a100_40 != 3.67 else '✅ Default'
	}]
	azure_config_df = pd.DataFrame(azure_config_data)
	st.dataframe(azure_config_df, use_container_width=True)

	with price_tab3:
	gcp_config_data = [{
	'Cost Component': 'Standard Compute Node',
	'Specification': 'n1-standard-8 (8 vCPU, 30GB)',
	'Cost per Hour': f"${gcp_standard_node:.3f}",
	'Status': '✏️ Custom' if gcp_standard_node != 0.379 else '✅ Default'
	}, {
	'Cost Component': 'VectorDB Node',
	'Specification': 'n1-standard-16 (16 vCPU, 60GB)',
	'Cost per Hour': f"${gcp_vectordb_node:.3f}",
	'Status': '✏️ Custom' if gcp_vectordb_node != 0.758 else '✅ Default'
	}, {
	'Cost Component': 'Jump Host',
	'Specification': 'e2-medium (2 vCPU, 8GB)',
	'Cost per Hour': f"${gcp_jump_host:.3f}",
	'Status': '✏️ Custom' if gcp_jump_host != 0.067 else '✅ Default'
	}, {
	'Cost Component': 'GKE Management',
	'Specification': 'Managed Kubernetes',
	'Cost per Hour': f"${gcp_k8s_management:.3f}",
	'Status': '✏️ Custom' if gcp_k8s_management != 0.10 else '✅ Default'
	}, {
	'Cost Component': 'H100 80GB GPU',
	'Specification': 'Flagship GPU',
	'Cost per Hour': f"${gcp_h100:.2f}",
	'Status': '✏️ Custom' if gcp_h100 != 11.06 else '✅ Default'
	}, {
	'Cost Component': 'A100 80GB GPU',
	'Specification': 'Excellent GPU',
	'Cost per Hour': f"${gcp_a100_80:.2f}",
	'Status': '✏️ Custom' if gcp_a100_80 != 2.48 else '✅ Default'
	}, {
	'Cost Component': 'A100 40GB GPU',
	'Specification': 'Good GPU',
	'Cost per Hour': f"${gcp_a100_40:.2f}",
	'Status': '✏️ Custom' if gcp_a100_40 != 1.46 else '✅ Default'
	}]
	gcp_config_df = pd.DataFrame(gcp_config_data)
	st.dataframe(gcp_config_df, use_container_width=True)

	with price_tab4:
	onprem_config_data = [{
	'Cost Component': 'Standard Compute Node',
	'Specification': '8 vCPU, 32GB RAM',
	'Cost per Hour': f"${onprem_standard_node:.3f}",
	'Status': '✏️ Custom' if onprem_standard_node != 0.192 else '✅ Default'
	}, {
	'Cost Component': 'VectorDB Node',
	'Specification': '16 vCPU, 64GB RAM',
	'Cost per Hour': f"${onprem_vectordb_node:.3f}",
	'Status': '✏️ Custom' if onprem_vectordb_node != 0.384 else '✅ Default'
	}, {
	'Cost Component': 'Jump Host',
	'Specification': '2 vCPU, 8GB RAM',
	'Cost per Hour': f"${onprem_jump_host:.3f}",
	'Status': '✏️ Custom' if onprem_jump_host != 0.048 else '✅ Default'
	}, {
	'Cost Component': 'GPU Pricing',
	'Specification': f'{onprem_gpu_multiplier*100:.0f}% of AWS pricing',
	'Cost per Hour': f"${GPUS[selected_gpu]['pricing']['on-premise']:.2f} (for {selected_gpu})",
	'Status': '✏️ Custom' if onprem_gpu_multiplier != 0.55 else '✅ Default'
	}, {
	'Cost Component': 'K8s Management',
	'Specification': 'Self-managed operational cost',
	'Cost per Hour': f"${onprem_k8s_management:.3f}",
	'Status': '✏️ Custom' if onprem_k8s_management != 0.05 else '✅ Default'
	}, {
	'Cost Component': 'Network Infrastructure',
	'Specification': 'Switches, routers, firewalls',
	'Cost per Hour': f"${onprem_network:.3f}",
	'Status': '✏️ Custom' if onprem_network != 0.020 else '✅ Default'
	}, {
	'Cost Component': 'Storage SAN/NAS',
	'Specification': 'Per GB per month',
	'Cost per Hour': f"${onprem_storage_per_gb:.3f}/GB/month",
	'Status': '✏️ Custom' if onprem_storage_per_gb != 0.05 else '✅ Default'
	}, {
	'Cost Component': 'Hardware Load Balancer',
	'Specification': 'F5/Citrix ADC amortized',
	'Cost per Hour': f"${onprem_load_balancer:.3f}",
	'Status': '✏️ Custom' if onprem_load_balancer != 0.010 else '✅ Default'
	}, {
	'Cost Component': 'Power & Cooling',
	'Specification': 'Datacenter utilities',
	'Cost per Hour': f"${onprem_power_cooling:.3f}",
	'Status': '✏️ Custom' if onprem_power_cooling != 0.030 else '✅ Default'
	}, {
	'Cost Component': 'Datacenter Space',
	'Specification': 'Rack space and facilities',
	'Cost per Hour': f"${onprem_datacenter_space:.3f}",
	'Status': '✏️ Custom' if onprem_datacenter_space != 0.015 else '✅ Default'
	}, {
	'Cost Component': 'Maintenance & Support',
	'Specification': 'Vendor support contracts',
	'Cost per Hour': f"${onprem_maintenance:.3f}",
	'Status': '✏️ Custom' if onprem_maintenance != 0.025 else '✅ Default'
	}]
	onprem_config_df = pd.DataFrame(onprem_config_data)
	st.dataframe(onprem_config_df, use_container_width=True)

	st.markdown("""
	💡 Configuration Tips:
	- Adjust pricing in the sidebar under "Cloud Provider Pricing (Optional)"
	- Default values based on public pricing as of 2024/2025
	- Customize based on your actual contract rates, discounts, or negotiated pricing
	- All calculations update automatically when values are changed
	- Click "🔄 Reset All Pricing to Defaults" in sidebar to restore original values
	""")

	# VM Types Summary
	st.markdown("### 🖥️ Deployment Options Summary")

	deployment_options_data = []
	for provider in CLOUD_PRICING.keys():
	pricing = CLOUD_PRICING[provider]
	deployment_options_data.append({
	'Provider': provider,
	'Standard Node': pricing['description'],
	'VectorDB Node': pricing['vectordb_node']['instance_type'],
	'Jump Host': pricing['jump_host']['instance_type'],
	'Managed K8s': pricing['name']
	})

	deployment_df = pd.DataFrame(deployment_options_data)
	st.dataframe(deployment_df, use_container_width=True)

	# Recommendations section
	st.header("💡 Recommendations & Insights")

	col1, col2 = st.columns(2)

	with col1:
	st.subheader("🎯 Performance Recommendations")

	if gpu_requirements['bottleneck'] == 'Memory':
	st.info("💾 Memory-bound workload: Consider using INT8 or INT4 quantization to reduce memory requirements")
	else:
	st.info("⚡ Throughput-bound workload: Current memory is sufficient, focus on GPU count for throughput")

	capacity_headroom = gpu_requirements['max_conversations_per_minute'] - conversations_per_minute
	headroom_percentage = (capacity_headroom / gpu_requirements['max_conversations_per_minute']) * 100

	if headroom_percentage < 20:
	st.warning(f"🚨 Low headroom ({headroom_percentage:.1f}%): System near capacity. Consider adding more GPUs or optimizing workload distribution")
	elif headroom_percentage > 70:
	st.success(f"✅ High headroom ({headroom_percentage:.1f}%): System has significant capacity for growth")
	else:
	st.info(f"📊 Balanced headroom ({headroom_percentage:.1f}%): Good balance between capacity and resource efficiency")

	# Application deployment recommendations
	if apps_per_tenant > 12:
	st.warning(f"📦 High app density: {apps_per_tenant} apps per tenant requires {infrastructure['totals']['deploy_nodes_per_tenant']} deployment nodes. Consider application consolidation")
	elif apps_per_tenant <= 4:
	st.success(f"✅ Efficient deployment: Only 1 deployment node needed for {apps_per_tenant} apps per tenant")
	else:
	st.info(f"📊 Moderate app density: {infrastructure['totals']['deploy_nodes_per_tenant']} deployment nodes for {apps_per_tenant} apps per tenant")

	with col2:
	st.subheader("💰 Cost Optimization")

	available_providers = get_available_providers_for_gpu(gpu_spec)

	if len(available_providers) >= 2:
	cheapest_provider = min(available_providers,
	key=lambda x: all_costs[x]['totals']['total_cost'])
	most_expensive_provider = max(available_providers,
	key=lambda x: all_costs[x]['totals']['total_cost'])

	savings = all_costs[most_expensive_provider]['totals']['total_cost'] - all_costs[cheapest_provider]['totals']['total_cost']
	savings_percentage = (savings / all_costs[most_expensive_provider]['totals']['total_cost']) * 100

	if cheapest_provider == 'On-Premise':
	st.success(f"💡 Recommended Option: On-Premise Deployment")
	st.info(f"💰 Cost Advantage: ${savings:.2f} ({savings_percentage:.1f}%) savings compared to {most_expensive_provider}")
	st.warning("⚠️ Consider: Initial capex, datacenter readiness, and operational expertise for on-premise")
	else:
	st.success(f"💡 Recommended Provider: {cheapest_provider}")
	st.info(f"💰 Potential Savings: ${savings:.2f} ({savings_percentage:.1f}%) compared to {most_expensive_provider}")

	# Cost distribution insight
	cheapest_costs = all_costs[cheapest_provider]
	platform_percentage = (cheapest_costs['totals']['platform_cost'] / cheapest_costs['totals']['total_cost']) * 100
	gpu_percentage = (cheapest_costs['totals']['gpu_cost'] / cheapest_costs['totals']['total_cost']) * 100

	if gpu_percentage > 70:
	st.warning("🖥️ GPU-heavy costs: Consider optimizing model size or using more efficient GPUs")
	else:
	st.info(f"⚖️ Balanced infrastructure: Platform ({platform_percentage:.0f}%) vs GPU ({gpu_percentage:.0f}%)")

	elif len(available_providers) == 1:
	available_provider = available_providers[0]
	st.success(f"💡 Available Option: {available_provider}")
	if available_provider == 'On-Premise':
	st.info("🏢 On-premise is your only deployment option for this GPU")
	else:
	st.warning("⚠️ Limited Options: Only one provider has the selected GPU available")

	# Show cost distribution for the only available provider
	provider_costs = all_costs[available_provider]
	platform_percentage = (provider_costs['totals']['platform_cost'] / provider_costs['totals']['total_cost']) * 100
	gpu_percentage = (provider_costs['totals']['gpu_cost'] / provider_costs['totals']['total_cost']) * 100

	st.info(f"📊 Cost Distribution: Platform ({platform_percentage:.0f}%) vs GPU ({gpu_percentage:.0f}%)")

	else:
	st.error("❌ No Available Options: Selected GPU is not available on any deployment option")
	st.warning("Action Required: Please select a different GPU model")

	# Show alternative GPUs
	st.markdown("💡 Suggested Alternatives:")
	alternatives = []
	for gpu_name, gpu_data in GPUS.items():
	available_on = get_available_providers_for_gpu(gpu_data)
	if available_on:
	alternatives.append(f"• {gpu_name} - Available on: {', '.join(available_on)}")

	if alternatives:
	for alt in alternatives[:3]: # Show top 3 alternatives
	st.markdown(alt)

	# Infrastructure Summary Box
	st.header("📋 Infrastructure Summary")

	summary_col1, summary_col2, summary_col3 = st.columns(3)

	with summary_col1:
	st.markdown("### Platform Infrastructure")
	st.markdown(f"""
	- Tenants: {num_tenants}
	- Apps per Tenant: {apps_per_tenant}
	- Total Applications: {infrastructure['totals']['total_apps']}
	- Standard Nodes: {infrastructure['totals']['total_standard_nodes']} (8 vCPU, 32GB)
	- VectorDB Nodes: {infrastructure['totals']['total_vectordb_nodes']} (16 vCPU, 64GB)
	- Total Platform Nodes: {infrastructure['totals']['total_nodes']}
	""")

	with summary_col2:
	st.markdown("### GPU Infrastructure")
	st.markdown(f"""
	- Model: {selected_model}
	- GPU Type: {selected_gpu}
	- Precision: {precision}
	- GPUs Required: {gpu_requirements['total_gpus_needed']}
	- GPUs Allocated: {gpu_requirements['actual_gpus_allocated']}
	- GPU Configuration: {gpu_requirements['best_config']['num_nodes']} nodes × {gpu_requirements['best_config']['gpus_per_node']} GPUs
	""")

	with summary_col3:
	st.markdown("### Performance Metrics")
	capacity_headroom = gpu_requirements['max_conversations_per_minute'] - conversations_per_minute
	headroom_percentage = (capacity_headroom / gpu_requirements['max_conversations_per_minute']) * 100
	st.markdown(f"""
	- Target Load: {conversations_per_minute} conv/min
	- Max Capacity: {gpu_requirements['max_conversations_per_minute']:.0f} conv/min
	- Capacity Headroom: {headroom_percentage:.1f}%
	- Bottleneck: {gpu_requirements['bottleneck']}
	- Total TPS: {gpu_requirements['total_system_tps']:.0f}
	- Tokens/Conv: {tokens_per_conversation}
	""")

	if __name__ == "__main__":
	create_comprehensive_dashboard()