Upload finops/finops-policy.yaml with huggingface_hub
Browse files- finops/finops-policy.yaml +73 -0
finops/finops-policy.yaml
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# =============================================================================
|
| 2 |
+
# FinOps Policy — Cloud Waste Detection & Rightsizing Rules
|
| 3 |
+
# =============================================================================
|
| 4 |
+
|
| 5 |
+
policies:
|
| 6 |
+
# --- Unused Resources ---
|
| 7 |
+
- id: FINOPS-001
|
| 8 |
+
name: "Detect unused EBS volumes"
|
| 9 |
+
severity: medium
|
| 10 |
+
check: "aws ec2 describe-volumes --filters Name=status,Values=available"
|
| 11 |
+
action: "Create snapshot, delete volume after 7 days"
|
| 12 |
+
estimated_savings: "$50-200/month per volume"
|
| 13 |
+
|
| 14 |
+
- id: FINOPS-002
|
| 15 |
+
name: "Detect idle RDS instances"
|
| 16 |
+
severity: medium
|
| 17 |
+
check: "Connection count < 5 for 7 days"
|
| 18 |
+
action: "Downsize instance class or stop non-prod"
|
| 19 |
+
estimated_savings: "30-50% of RDS cost"
|
| 20 |
+
|
| 21 |
+
- id: FINOPS-003
|
| 22 |
+
name: "Detect unattached EIPs"
|
| 23 |
+
severity: low
|
| 24 |
+
check: "aws ec2 describe-addresses --filter Name=association-id,Values=''"
|
| 25 |
+
action: "Release EIP"
|
| 26 |
+
estimated_savings: "$3.60/month per EIP"
|
| 27 |
+
|
| 28 |
+
# --- Rightsizing ---
|
| 29 |
+
- id: FINOPS-010
|
| 30 |
+
name: "EC2 rightsizing recommendations"
|
| 31 |
+
severity: medium
|
| 32 |
+
check: "CPU < 15% for 14 days OR Memory < 25% for 14 days"
|
| 33 |
+
action: "Downsize to next tier (e.g., m6i.xlarge -> m6i.large)"
|
| 34 |
+
estimated_savings: "20-40% per instance"
|
| 35 |
+
|
| 36 |
+
- id: FINOPS-011
|
| 37 |
+
name: "Over-provisioned K8s workloads"
|
| 38 |
+
severity: medium
|
| 39 |
+
check: "Container CPU request > 2x actual P95 usage"
|
| 40 |
+
action: "Reduce requests to P95 + 20% headroom"
|
| 41 |
+
estimated_savings: "30-50% of cluster cost"
|
| 42 |
+
|
| 43 |
+
# --- Scheduling ---
|
| 44 |
+
- id: FINOPS-020
|
| 45 |
+
name: "Non-prod environment scheduling"
|
| 46 |
+
severity: high
|
| 47 |
+
check: "Dev/staging workloads running 24/7"
|
| 48 |
+
action: "Scale to 0 outside business hours (Mon-Fri 8am-8pm)"
|
| 49 |
+
estimated_savings: "65% of non-prod compute"
|
| 50 |
+
|
| 51 |
+
# --- Reserved Instances Coverage ---
|
| 52 |
+
- id: FINOPS-030
|
| 53 |
+
name: "RI coverage below 70%"
|
| 54 |
+
severity: high
|
| 55 |
+
check: "RI coverage < 70% for consistent workloads"
|
| 56 |
+
action: "Purchase RIs for EKS node groups + RDS"
|
| 57 |
+
estimated_savings: "30-40% vs on-demand"
|
| 58 |
+
|
| 59 |
+
# --- Storage Tiering ---
|
| 60 |
+
- id: FINOPS-040
|
| 61 |
+
name: "S3 intelligent tiering"
|
| 62 |
+
severity: medium
|
| 63 |
+
check: "S3 objects > 90 days in STANDARD"
|
| 64 |
+
action: "Enable S3 Intelligent-Tiering on all buckets"
|
| 65 |
+
estimated_savings: "40-60% on infrequent access data"
|
| 66 |
+
|
| 67 |
+
# --- GPU Utilization ---
|
| 68 |
+
- id: FINOPS-050
|
| 69 |
+
name: "Underutilized GPU nodes"
|
| 70 |
+
severity: critical
|
| 71 |
+
check: "GPU utilization < 30% for 4 hours"
|
| 72 |
+
action: "Scale down GPU node group or use KEDA for demand-based scaling"
|
| 73 |
+
estimated_savings: "$2-6/hour per GPU"
|