Spaces:

scaler-hack
/

scaler-openenv

Sleeping

App Files Files Community

scaler-openenv / alert.py

suraj-01

Initial

b14c6e3 about 1 month ago

raw

history blame contribute delete

6.65 kB

	#!/usr/bin/env python3
	"""
	Adaptive Alert Triage — Alert Flood Generator (FIXED)

	Sends realistic alerts to the Docker server's /ingest/alerts endpoint.
	The server queues them so the RL environment uses real alert distributions
	instead of purely synthetic ones.

	Fixes from original:
	1. Field name was "type" — server expects "alert_type"
	2. Added --burst mode to send correlated chains (tests hard task)
	3. Server health check before flooding
	4. Correlation group metadata included

	Usage:
	python alert.py --count 500
	python alert.py --count 200 --burst --rate 0.02
	python alert.py --server https://your-ngrok-url.ngrok.io --count 1000
	"""

	import requests
	import random
	import time
	import json
	import argparse
	import threading
	from typing import List, Dict

	ALERT_TYPES = ["CPU", "MEMORY", "DISK", "NETWORK", "APPLICATION", "SECURITY"]

	# Realistic correlation chains (mimics utils.generate_correlated_alerts)
	CORRELATION_CHAINS = [
	["CPU", "MEMORY", "APPLICATION"],
	["NETWORK", "APPLICATION", "APPLICATION"],
	["DISK", "MEMORY", "APPLICATION"],
	["SECURITY", "NETWORK", "APPLICATION"],
	]


	def generate_alert(alert_id: int, force_type: str = None) -> Dict:
	"""
	Generate a single realistic alert.

	FIX: field is "alert_type" not "type" — must match the server's
	AlertIngestionRequest schema.
	"""
	severity = round(random.uniform(0.25, 0.99), 2)
	confidence = round(random.uniform(0.45, 0.99), 2)

	# Higher-severity alerts tend to have higher detection confidence
	if severity > 0.85:
	confidence = round(random.uniform(0.80, 0.99), 2)
	elif severity < 0.40:
	confidence = round(random.uniform(0.40, 0.65), 2)

	atype = force_type if force_type else random.choice(ALERT_TYPES)

	return {
	"id": f"prod-{alert_id:08d}",
	"visible_severity": severity,
	"confidence": confidence,
	"type": atype,
	"age": 0,
	}


	def generate_correlated_burst(start_id: int) -> List[Dict]:
	"""Generate a set of 3 correlated alerts (simulates cascading failure)."""
	chain = random.choice(CORRELATION_CHAINS)
	alerts = []
	for i, atype in enumerate(chain):
	base_sev = round(0.60 + i * 0.12 + random.uniform(-0.05, 0.05), 2)
	base_sev = min(base_sev, 0.99)
	alert = generate_alert(start_id + i, force_type=atype)
	alert["visible_severity"] = base_sev
	alert["confidence"] = round(random.uniform(0.75, 0.95), 2)
	alert["is_correlated"] = True
	alert["correlation_chain"] = chain
	alerts.append(alert)
	return alerts


	def check_server(server_url: str) -> bool:
	try:
	r = requests.get(f"{server_url}/health", timeout=4)
	return r.status_code == 200
	except Exception:
	return False


	def send_alerts(server_url: str, alerts: List[Dict], rate: float) -> int:
	session = requests.Session()
	queued = 0
	print(f"\n🚨 Sending {len(alerts)} alerts to {server_url}/ingest/alerts ...")

	for i, alert in enumerate(alerts):
	try:
	resp = session.post(
	f"{server_url}/ingest/alerts",
	json=alert,
	headers={"Content-Type": "application/json"},
	timeout=5,
	)
	if resp.status_code == 200:
	queued += 1
	if i % 100 == 0 and i > 0:
	q = resp.json().get("queued", "?")
	print(f" {i+1:>5}/{len(alerts)} queued={q}")
	else:
	print(f" ❌ alert {i}: HTTP {resp.status_code} — {resp.text[:80]}")

	except Exception as e:
	print(f" ⚠ alert {i}: {e}")

	time.sleep(rate)

	print(f"\n✅ Flood done — {queued}/{len(alerts)} accepted")
	return queued


	def monitor_loop(server_url: str):
	"""Background thread: print live metrics every 5 s."""
	while True:
	time.sleep(5)
	try:
	d = requests.get(f"{server_url}/metrics", timeout=3).json()
	print(
	f"\n📈 METRICS score={d.get('mean_score', '?')} "
	f"queue={d.get('queue_size', '?')} "
	f"active={d.get('active_alerts', '?')} "
	f"episodes={d.get('episodes_completed', '?')}"
	)
	except Exception:
	pass


	def main():
	parser = argparse.ArgumentParser(description="Flood the alert triage server")
	parser.add_argument("--server", default="https://731a-2401-4900-619d-77b-a572-ef53-21cd-5507.ngrok-free.app")
	parser.add_argument("--count", type=int, default=500)
	parser.add_argument("--rate", type=float, default=0.05,
	help="Seconds between alerts (default 0.05)")
	parser.add_argument("--burst", action="store_true",
	help="Interleave correlated 3-alert bursts (hard task training)")
	parser.add_argument("--monitor", action="store_true",
	help="Print live metrics while flooding")
	args = parser.parse_args()

	print("=" * 55)
	print(" Alert Triage — Alert Flood Generator")
	print("=" * 55)
	print(f" Server : {args.server}")
	print(f" Count : {args.count}")
	print(f" Rate : {args.rate}s / alert")
	print(f" Burst : {args.burst}")

	# Sanity check
	if not check_server(args.server):
	print(f"\n❌ Server not reachable at {args.server}")
	print(" Start with: docker compose up --build -d")
	return

	print(" Server : ✅ reachable\n")

	# Build alert list
	alerts: List[Dict] = []
	i = 0
	while len(alerts) < args.count:
	if args.burst and random.random() < 0.15: # 15% chance of a burst
	burst = generate_correlated_burst(i)
	alerts.extend(burst)
	i += len(burst)
	else:
	alerts.append(generate_alert(i))
	i += 1

	alerts = alerts[:args.count]

	# Show sample
	print("Sample alerts:")
	for a in alerts[:2]:
	print(f" {json.dumps(a)}")
	print()

	# Start monitor if requested
	if args.monitor:
	t = threading.Thread(target=monitor_loop, args=(args.server,), daemon=True)
	t.start()

	# Flood
	queued = send_alerts(args.server, alerts, args.rate)

	# Final metrics
	print("\nFinal server metrics:")
	try:
	print(json.dumps(
	requests.get(f"{args.server}/metrics", timeout=5).json(),
	indent=2
	))
	except Exception as e:
	print(f" (could not fetch: {e})")


	if __name__ == "__main__":
	main()