Spaces:

mansidw
/

incident-copilot-api

Sleeping

App Files Files Community

Mansi Dwivedi commited on Feb 27

Commit

ec5a0c4

1 Parent(s): 737a6ff

Add application file

Browse files

Files changed (5) hide show

.gitignore +263 -0
Dockerfile +15 -0
app.py +419 -0
requirements.txt +79 -0
seed_logs.py +411 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,263 @@

+# Created by https://www.toptal.com/developers/gitignore/api/python,flask
+# Edit at https://www.toptal.com/developers/gitignore?templates=python,flask
+### Flask ###
+instance/*
+!instance/.gitignore
+.webassets-cache
+.env
+### Flask.Python Stack ###
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+### Python ###
+# Byte-compiled / optimized / DLL files
+# C extensions
+# Distribution / packaging
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+# Installer logs
+# Unit test / coverage reports
+# Translations
+# Django stuff:
+# Flask stuff:
+# Scrapy stuff:
+# Sphinx documentation
+# PyBuilder
+# Jupyter Notebook
+# IPython
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+# Celery stuff
+# SageMath parsed files
+# Environments
+# Spyder project settings
+# Rope project settings
+# mkdocs documentation
+# mypy
+# Pyre type checker
+# pytype static type analyzer
+# Cython debug symbols
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+### Python Patch ###
+# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
+poetry.toml
+# ruff
+.ruff_cache/
+# LSP config files
+pyrightconfig.json
+# End of https://www.toptal.com/developers/gitignore/api/python,flask
+.vscode

Dockerfile ADDED Viewed

	@@ -0,0 +1,15 @@

+FROM python:3.11-slim
+WORKDIR /app
+# Install dependencies
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy source
+COPY . .
+# Hugging Face Spaces runs on port 7860
+EXPOSE 7860
+CMD ["gunicorn", "app:app", "--bind", "0.0.0.0:7860", "--workers", "2", "--timeout", "120"]

app.py ADDED Viewed

	@@ -0,0 +1,419 @@

+import os
+import json
+import uuid
+import httpx
+import asyncio
+import requests
+from datetime import datetime, timezone
+from flask import Flask, request, jsonify
+from flask_jwt_extended import (
+    JWTManager,
+    create_access_token,
+    jwt_required,
+    get_jwt_identity,
+)
+import secrets
+from flask_cors import CORS
+from dotenv import load_dotenv
+# Import the Microsoft Agent Framework A2A classes
+from a2a.client import A2ACardResolver
+from agent_framework.a2a import A2AAgent
+import subprocess, sys, os, logging
+from apscheduler.schedulers.background import BackgroundScheduler
+from apscheduler.triggers.cron import CronTrigger
+import atexit
+load_dotenv()
+# ── SCHEDULER SETUP (add this block after your Flask app is created) ──────────
+# Resolve the absolute path to seed_logs.py so the job works regardless of
+# the working directory Flask was launched from.
+BACKEND_DIR = os.path.dirname(os.path.abspath(__file__))  # .../backend/
+SEED_SCRIPT = os.path.join(BACKEND_DIR, "seed_logs.py")
+def run_seed_logs():
+    """
+    Spawns seed_logs.py as a subprocess using the same Python interpreter
+    that is running Flask. stdout/stderr are forwarded to the Flask logger
+    so you can see seeder output in your server logs.
+    """
+    logging.info("[Scheduler] Starting daily seed_logs.py run...")
+    try:
+        result = subprocess.run(
+            [sys.executable, SEED_SCRIPT],  # same venv Python → same packages
+            capture_output=True,
+            text=True,
+            cwd=BACKEND_DIR,  # run from backend/ so .env is found
+        )
+        if result.returncode == 0:
+            logging.info(
+                "[Scheduler] seed_logs.py completed successfully.\n%s", result.stdout
+            )
+        else:
+            logging.error(
+                "[Scheduler] seed_logs.py failed (exit %d).\nSTDOUT: %s\nSTDERR: %s",
+                result.returncode,
+                result.stdout,
+                result.stderr,
+            )
+    except Exception as exc:
+        logging.exception("[Scheduler] Unexpected error running seed_logs.py: %s", exc)
+app = Flask(__name__)
+app.config["JWT_SECRET_KEY"] = os.getenv("JWT_SECRET_KEY") or secrets.token_hex(32)
+jwt = JWTManager(app)
+CORS(app)
+# Create the scheduler (BackgroundScheduler runs in a daemon thread —
+# it doesn't block Flask and stops automatically when the process exits).
+scheduler = BackgroundScheduler(timezone="UTC")
+scheduler.add_job(
+    func=run_seed_logs,
+    trigger=CronTrigger(
+        hour=0,  # midnight UTC
+        minute=0,
+        second=0,
+        timezone="UTC",
+    ),
+    id="daily_seed_logs",
+    name="Daily seed_logs.py at 00:00 UTC",
+    replace_existing=True,  # safe to call even if job already registered
+    misfire_grace_time=60 * 5,  # if the server was down at midnight, run
+    # the job within the next 5 minutes instead
+    # of skipping it entirely
+)
+scheduler.start()
+logging.info(
+    "[Scheduler] APScheduler started. Next seed run: %s",
+    scheduler.get_job("daily_seed_logs").next_run_time,
+)
+# Cleanly shut the scheduler down when Flask/Gunicorn exits
+atexit.register(lambda: scheduler.shutdown(wait=False))
+# ---------------------------------------------------------
+# Configuration
+# ---------------------------------------------------------
+# NOTE: Make sure this is your Kibana URL (e.g. https://xxxx.kb.asia-south1.gcp.elastic-cloud.com)
+KIBANA_URL = os.getenv("KIBANA_URL", "http://localhost:5601").rstrip("/")
+ES_API_KEY = os.getenv("ES_API_KEY")
+AGENT_ID = os.getenv("AGENT_ID")
+# ---------------------------------------------------------
+# Mock Database (In-Memory for MVP)
+# ---------------------------------------------------------
+db = {"cases": {}, "actions": {}, "audit_logs": []}
+def now_iso():
+    return datetime.now(timezone.utc).isoformat()
+def add_audit_log(case_id, actor, action_type, details):
+    log = {
+        "id": uuid.uuid4().hex[:8],
+        "case_id": case_id,
+        "actor": actor,
+        "action_type": action_type,
+        "details": details,
+        "timestamp": now_iso(),
+    }
+    db["audit_logs"].append(log)
+    print(f"[AUDIT] {action_type}: {details}")
+@app.route("/api/admin/seed", methods=["POST"])
+def trigger_seed():
+    """Manually kick off seed_logs.py — protected by a simple header check."""
+    scheduler.add_job(
+        func=run_seed_logs,
+        id="manual_seed",
+        replace_existing=True,
+    )
+    return jsonify({"status": "seeder job queued"}), 202
+# AUTH ENDPOINTS
+@app.route("/api/auth/login", methods=["POST"])
+def login():
+    data = request.json or {}
+    username = data.get("username", "")
+    password = data.get("password", "")
+    if username != os.getenv("UI_AUTH_USERNAME") or password != os.getenv(
+        "UI_AUTH_PASSWORD"
+    ):
+        return jsonify({"msg": "Bad username or password"}), 401
+    token = create_access_token(identity=username)
+    return jsonify({"access_token": token})
+@app.route("/api/auth/me", methods=["GET"])
+@jwt_required()
+def me():
+    return jsonify({"user": get_jwt_identity()})
+@app.route("/api/auth/logout", methods=["POST"])
+@jwt_required()
+def logout():
+    # MVP: client deletes token; real logout would use token revocation/denylist
+    return jsonify({"ok": True})
+# ---------------------------------------------------------
+# 1. Alert Webhook (Simulates an alert triggering a Case)
+# ---------------------------------------------------------
+@app.route("/api/alerts", methods=["POST"])
+@jwt_required()
+def receive_alert():
+    data = request.json
+    case_id = f"CASE-{uuid.uuid4().hex[:6].upper()}"
+    db["cases"][case_id] = {
+        "id": case_id,
+        "status": "OPEN",
+        "severity": data.get("severity", "P1"),
+        "title": data.get("title", "High 500 Error Rate detected"),
+        "service": data.get("service", "checkout-api"),
+        "start_time": data.get("start_time"),
+        "end_time": data.get("end_time"),
+        "created_at": now_iso(),
+        "analysis": None,
+    }
+    add_audit_log(
+        case_id, "System", "CASE_CREATED", f"Alert triggered for {data.get('service')}"
+    )
+    return jsonify({"message": "Case created", "case_id": case_id}), 201
+# ---------------------------------------------------------
+# 2. Analyze (Calls Elastic Agent via Microsoft Agent Framework)
+# ---------------------------------------------------------
+@app.route("/api/cases/<case_id>/analyze", methods=["POST"])
+@jwt_required()
+async def analyze_case(case_id):
+    case = db["cases"].get(case_id)
+    if not case:
+        return jsonify({"error": "Case not found"}), 404
+    prompt = (
+        f"An alert fired for {case['service']} between {case['start_time']} and {case['end_time']}. "
+        "Please investigate this using your tools. Follow your system instructions to find the root cause "
+        "and return ONLY the raw JSON format with causal_chain, hypotheses, and mitigations. "
+        "Do not include markdown blocks like ```json."
+    )
+    add_audit_log(
+        case_id, "System", "ANALYSIS_STARTED", "Sent context to Elastic AI Agent"
+    )
+    # The Kibana A2A base URL
+    a2a_agent_host = f"{KIBANA_URL}/api/agent_builder/a2a"
+    custom_headers = {"Authorization": f"ApiKey {ES_API_KEY}", "kbn-xsrf": "true"}
+    try:
+        # Use httpx AsyncClient as required by the Agent Framework
+        async with httpx.AsyncClient(
+            timeout=120.0, headers=custom_headers
+        ) as http_client:
+            # 1. Resolve the A2A Agent Card using the Agent ID
+            resolver = A2ACardResolver(
+                httpx_client=http_client, base_url=a2a_agent_host
+            )
+            # The Agent Card path uses your Agent ID
+            agent_card = await resolver.get_agent_card(
+                relative_card_path=f"/{AGENT_ID}.json"
+            )
+            print(f"Found Agent: {agent_card.name} - {agent_card.description}")
+            # 2. Use the Agent Framework to connect to the Elastic Agent
+            agent = A2AAgent(
+                name=agent_card.name,
+                description=agent_card.description,
+                agent_card=agent_card,
+                url=a2a_agent_host,
+                http_client=http_client,
+            )
+            print("Sending prompt to Elastic A2A agent...")
+            # 3. Execute the Run command (this handles the JSON-RPC complexity automatically)
+            response = await agent.run(prompt)
+            # Extract the text from the response
+            agent_reply = ""
+            for message in response.messages:
+                agent_reply += message.text
+            # Clean up Markdown formatting if the LLM accidentally added it
+            if agent_reply.startswith("```json"):
+                agent_reply = agent_reply.strip("```json").strip("```").strip()
+            analysis_data = json.loads(agent_reply)
+            case["analysis"] = analysis_data
+            case["status"] = "INVESTIGATED"
+            # Auto-create PENDING actions from the agent's mitigations
+            for mitigation in analysis_data.get("mitigations", []):
+                action_id = f"ACT-{uuid.uuid4().hex[:6].upper()}"
+                db["actions"][action_id] = {
+                    "id": action_id,
+                    "case_id": case_id,
+                    "type": mitigation.get("type", "UNKNOWN"),
+                    "action": mitigation.get("action", ""),
+                    "status": "PENDING",
+                    "created_at": now_iso(),
+                }
+            add_audit_log(
+                case_id,
+                "System",
+                "ANALYSIS_COMPLETE",
+                "Agent identified root cause and proposed mitigations",
+            )
+            return jsonify(
+                {
+                    "message": "Analysis complete",
+                    "analysis": analysis_data,
+                    "actions": db["actions"],
+                }
+            )
+    except Exception as e:
+        print("Error calling agent via Agent Framework:", e)
+        return jsonify({"error": str(e)}), 500
+# ---------------------------------------------------------
+# 3. Human-in-the-Loop (HITL) Approval
+# ---------------------------------------------------------
+@app.route("/api/actions/<action_id>/approve", methods=["POST"])
+@jwt_required()
+def approve_action(action_id):
+    action = db["actions"].get(action_id)
+    if not action:
+        return jsonify({"error": "Action not found"}), 404
+    data = request.json or {}
+    user = data.get("user", "OnCall-Engineer-1")
+    action["status"] = "APPROVED"
+    action["approved_by"] = user
+    action["approved_at"] = now_iso()
+    add_audit_log(
+        action["case_id"],
+        user,
+        "ACTION_APPROVED",
+        f"Approved execution of {action['type']}",
+    )
+    return jsonify({"message": "Action approved", "action": action})
+# ---------------------------------------------------------
+# 4. Execute Action (Calls the Mock Executor)
+# ---------------------------------------------------------
+@app.route("/api/actions/<action_id>/execute", methods=["POST"])
+@jwt_required()
+def execute_action(action_id):
+    action = db["actions"].get(action_id)
+    if not action:
+        return jsonify({"error": "Action not found"}), 404
+    if action["status"] != "APPROVED":
+        return (
+            jsonify({"error": f"Cannot execute action in status: {action['status']}"}),
+            400,
+        )
+    action["status"] = "RUNNING"
+    add_audit_log(
+        action["case_id"],
+        "System",
+        "EXECUTION_STARTED",
+        f"Running runbook for {action['type']}",
+    )
+    # ---> Call the Mock HTTP Executor
+    mock_executor_url = request.host_url.rstrip("/") + "/api/mock_executor"
+    resp = requests.post(
+        mock_executor_url, json={"action": action["action"], "type": action["type"]}
+    )
+    if resp.status_code == 200:
+        action["status"] = "SUCCESS"
+        add_audit_log(
+            action["case_id"],
+            "MockExecutor",
+            "EXECUTION_SUCCESS",
+            "Runbook completed successfully",
+        )
+    else:
+        action["status"] = "FAILED"
+        add_audit_log(
+            action["case_id"], "MockExecutor", "EXECUTION_FAILED", "Runbook failed"
+        )
+    return jsonify({"message": "Execution finished", "action": action})
+# ---------------------------------------------------------
+# 5. Mock HTTP Executor
+# ---------------------------------------------------------
+@app.route("/api/mock_executor", methods=["POST"])
+@jwt_required()
+def mock_executor():
+    data = request.json
+    import time
+    time.sleep(2)
+    print(
+        f"[MOCK EXECUTOR] Successfully applied {data.get('type')}: {data.get('action')}"
+    )
+    return jsonify({"status": "OK", "run_id": f"RUN-{uuid.uuid4().hex[:6]}"}), 200
+# ---------------------------------------------------------
+# 6. Read Endpoints
+# ---------------------------------------------------------
+@app.route("/api/cases", methods=["GET"])
+@jwt_required()
+def get_cases():
+    return jsonify(list(db["cases"].values()))
+@app.route("/api/cases/<case_id>", methods=["GET"])
+@jwt_required()
+def get_case(case_id):
+    case = db["cases"].get(case_id)
+    if not case:
+        return jsonify({"error": "Not found"}), 404
+    case_actions = [a for a in db["actions"].values() if a["case_id"] == case_id]
+    case_audits = [l for l in db["audit_logs"] if l["case_id"] == case_id]
+    return jsonify({"case": case, "actions": case_actions, "audit_logs": case_audits})
+@app.route("/api/health")
+def health():
+    return jsonify({"status": "ok"}), 200
+if __name__ == "__main__":
+    app.run(port=5000, debug=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,79 @@

+a2a-sdk==0.3.24
+agent-framework-a2a==1.0.0b260225
+agent-framework-core==1.0.0rc2
+annotated-types==0.7.0
+anyio==4.12.1
+APScheduler==3.10.4
+attrs==25.4.0
+azure-ai-projects==2.0.0b3
+azure-core==1.38.2
+azure-identity==1.25.2
+azure-storage-blob==12.28.0
+blinker==1.9.0
+certifi==2026.2.25
+cffi==2.0.0
+charset-normalizer==3.4.4
+click==8.3.1
+colorama==0.4.6
+cryptography==46.0.5
+distro==1.9.0
+Flask==3.1.3
+flask-cors==6.0.2
+Flask-JWT-Extended==4.7.1
+gunicorn>=21.2.0
+google-api-core==2.30.0
+google-auth==2.48.0
+googleapis-common-protos==1.72.0
+h11==0.16.0
+httpcore==1.0.9
+httpx==0.28.1
+httpx-sse==0.4.3
+idna==3.11
+importlib_metadata==8.7.1
+isodate==0.7.2
+itsdangerous==2.2.0
+Jinja2==3.1.6
+jiter==0.13.0
+jsonschema==4.26.0
+jsonschema-specifications==2025.9.1
+MarkupSafe==3.0.3
+mcp==1.26.0
+msal==1.35.0
+msal-extensions==1.3.1
+openai==2.24.0
+opentelemetry-api==1.39.1
+opentelemetry-sdk==1.39.1
+opentelemetry-semantic-conventions==0.60b1
+opentelemetry-semantic-conventions-ai==0.4.14
+packaging==26.0
+proto-plus==1.27.1
+protobuf==6.33.5
+pyasn1==0.6.2
+pyasn1_modules==0.4.2
+pycparser==3.0
+pydantic==2.12.5
+pydantic-settings==2.13.1
+pydantic_core==2.41.5
+PyJWT==2.11.0
+python-dotenv==1.2.1
+python-multipart==0.0.22
+pytz==2025.2
+pywin32==311
+referencing==0.37.0
+requests==2.32.5
+rpds-py==0.30.0
+rsa==4.9.1
+six==1.17.0
+sniffio==1.3.1
+sse-starlette==3.2.0
+starlette==0.52.1
+tqdm==4.67.3
+typing-inspection==0.4.2
+typing_extensions==4.15.0
+tzdata==2025.3
+tzlocal==5.3.1
+urllib3==2.6.3
+uvicorn==0.41.0
+websockets==16.0
+Werkzeug==3.1.6
+zipp==3.23.0

seed_logs.py ADDED Viewed

	@@ -0,0 +1,411 @@

+#!/usr/bin/env python3
+import os
+import json
+import uuid
+import random
+import argparse
+from datetime import datetime, timedelta, timezone
+import requests
+from dotenv import load_dotenv
+# ----------------------------
+# Env helpers
+# ----------------------------
+def getenv_int(key: str, default: int) -> int:
+    v = os.getenv(key)
+    return default if v is None or v == "" else int(v)
+def getenv_float(key: str, default: float) -> float:
+    v = os.getenv(key)
+    return default if v is None or v == "" else float(v)
+def getenv_bool(key: str, default: bool) -> bool:
+    v = os.getenv(key)
+    if v is None or v == "":
+        return default
+    return v.strip().lower() in ("1", "true", "yes", "y", "on")
+def iso(ts: datetime) -> str:
+    return ts.astimezone(timezone.utc).isoformat()
+def new_trace_id() -> str:
+    return uuid.uuid4().hex  # 32 hex chars
+# ----------------------------
+# Elasticsearch bulk client
+# ----------------------------
+class ESClient:
+    def __init__(
+        self,
+        base_url: str,
+        api_key: str | None,
+        username: str | None,
+        password: str | None,
+        verify_ssl: bool,
+    ):
+        self.base_url = base_url.rstrip("/")
+        self.verify_ssl = verify_ssl
+        self.session = requests.Session()
+        self.session.headers.update({"Content-Type": "application/x-ndjson"})
+        if api_key:
+            self.session.headers.update({"Authorization": f"ApiKey {api_key}"})
+        elif username and password:
+            self.session.auth = (username, password)
+    def bulk(self, ndjson_lines: list[str]) -> dict:
+        body = "\n".join(ndjson_lines) + "\n"  # final newline required
+        r = self.session.post(
+            f"{self.base_url}/_bulk", data=body, timeout=30, verify=self.verify_ssl
+        )
+        r.raise_for_status()
+        return r.json()
+# ----------------------------
+# Document builders (ECS-ish)
+# ----------------------------
+def mk_doc_logs(
+    dataset: str,
+    namespace: str,
+    service_name: str,
+    level: str,
+    message: str,
+    ts: datetime,
+    trace_id: str | None,
+    service_version: str | None,
+    host: str | None,
+    extra: dict | None = None,
+) -> dict:
+    doc = {
+        "@timestamp": iso(ts),
+        "data_stream": {"type": "logs", "dataset": dataset, "namespace": namespace},
+        "event": {"dataset": dataset},
+        "service": {"name": service_name},
+        "log": {"level": level},
+        "message": message,
+    }
+    if service_version:
+        doc["service"]["version"] = service_version
+    if host:
+        doc["host"] = {"name": host}
+    if trace_id:
+        doc["trace"] = {"id": trace_id}
+    if extra:
+        doc.update(extra)
+    return doc
+def action_create(index_name: str) -> str:
+    return json.dumps({"create": {"_index": index_name}})
+def doc_line(doc: dict) -> str:
+    return json.dumps(doc, separators=(",", ":"))
+# ----------------------------
+# Seeder logic
+# ----------------------------
+def seed_once(
+    es: ESClient,
+    namespace: str,
+    run_id: str,
+    baseline_minutes: int,
+    incident_minutes: int,
+    recovery_minutes: int,
+    rps: int,
+    batch_size_requests: int,
+    incident_timeout_ratio: float,
+    incident_deadlock_ratio: float,
+    random_seed: int | None,
+) -> None:
+    if random_seed is not None:
+        random.seed(random_seed)
+    # Data stream names (follow logs-<dataset>-<namespace>)
+    checkout_stream = f"logs-checkout_api-{namespace}"
+    payment_stream = f"logs-payment_service-{namespace}"
+    postgres_stream = f"logs-postgres-{namespace}"
+    total_minutes = baseline_minutes + incident_minutes + recovery_minutes
+    start_ts = datetime.now(timezone.utc) - timedelta(minutes=total_minutes)
+    phases = [
+        ("baseline", baseline_minutes),
+        ("incident", incident_minutes),
+        ("recovery", recovery_minutes),
+    ]
+    hosts = {
+        "checkout": ["chk-1", "chk-2"],
+        "payment": ["pay-1", "pay-2", "pay-3"],
+        "postgres": ["pg-1"],
+    }
+    versions = {
+        "checkout": "3.1.2",
+        "payment_ok": "2.6.9",
+        "payment_bad": "2.7.0",
+    }
+    ndjson_lines: list[str] = []
+    total_docs = 0
+    errors = 0
+    def flush():
+        nonlocal ndjson_lines, total_docs, errors
+        if not ndjson_lines:
+            return
+        resp = es.bulk(ndjson_lines)
+        if resp.get("errors"):
+            errors += 1
+        ndjson_lines = []
+    # Optional: emit a deploy marker just before incident begins
+    deploy_marker_emitted = False
+    cursor = start_ts
+    for phase_name, phase_minutes in phases:
+        seconds = phase_minutes * 60
+        # Deploy marker at the start of incident phase
+        if phase_name == "incident" and not deploy_marker_emitted:
+            deploy_marker_emitted = True
+            t_id = None
+            pay_host = random.choice(hosts["payment"])
+            deploy_doc = mk_doc_logs(
+                dataset="payment_service",
+                namespace=namespace,
+                service_name="payment-service",
+                level="INFO",
+                message="deploy payment-service v2.7.0",
+                ts=cursor,
+                trace_id=t_id,
+                service_version=versions["payment_bad"],
+                host=pay_host,
+                extra={
+                    "labels": {
+                        "run_id": run_id,
+                        "phase": "deploy",
+                        "change_type": "deploy",
+                    }
+                },
+            )
+            ndjson_lines.append(action_create(payment_stream))
+            ndjson_lines.append(doc_line(deploy_doc))
+            total_docs += 1
+        for _sec in range(seconds):
+            # For each second, generate rps "requests"
+            for _ in range(rps):
+                t_id = new_trace_id()
+                chk_host = random.choice(hosts["checkout"])
+                pay_host = random.choice(hosts["payment"])
+                pg_host = random.choice(hosts["postgres"])
+                # Default healthy request
+                http_status = 200
+                chk_level = "INFO"
+                chk_msg = "checkout ok"
+                pay_level = "INFO"
+                pay_msg = "payment ok"
+                pay_version = versions["payment_ok"]
+                pg_level = "INFO"
+                pg_msg = "query ok"
+                # Simulated db pool metrics
+                pool_max = 50
+                pool_in_use = random.randint(5, 25)
+                pool_wait_ms = random.randint(0, 20)
+                query_ms = random.randint(5, 40)
+                if phase_name == "incident":
+                    pay_version = versions["payment_bad"]
+                    # Pool usage rises
+                    pool_in_use = random.randint(45, 50)
+                    pool_wait_ms = random.randint(200, 2000)
+                    query_ms = random.randint(200, 6000)
+                    # Some requests hit pool acquire timeout
+                    if random.random() < incident_timeout_ratio:
+                        pay_level = "ERROR"
+                        pay_msg = "db.pool acquire timeout"
+                        http_status = 500
+                        chk_level = "ERROR"
+                        chk_msg = "Upstream timeout calling payment-service"
+                    # Some requests show deadlock/lock issues
+                    if random.random() < incident_deadlock_ratio:
+                        pg_level = "ERROR"
+                        pg_msg = random.choice(
+                            [
+                                "deadlock detected",
+                                "canceling statement due to lock timeout",
+                            ]
+                        )
+                if phase_name == "recovery":
+                    # Things normalize again
+                    pool_in_use = random.randint(8, 22)
+                    pool_wait_ms = random.randint(0, 50)
+                    query_ms = random.randint(5, 80)
+                base_labels = {"run_id": run_id, "phase": phase_name}
+                checkout_doc = mk_doc_logs(
+                    dataset="checkout_api",
+                    namespace=namespace,
+                    service_name="checkout-api",
+                    level=chk_level,
+                    message=chk_msg,
+                    ts=cursor,
+                    trace_id=t_id,
+                    service_version=versions["checkout"],
+                    host=chk_host,
+                    extra={
+                        "http": {
+                            "request": {"method": "POST"},
+                            "response": {"status_code": http_status},
+                        },
+                        "labels": {**base_labels, "timeout_ms": 2000},
+                    },
+                )
+                payment_doc = mk_doc_logs(
+                    dataset="payment_service",
+                    namespace=namespace,
+                    service_name="payment-service",
+                    level=pay_level,
+                    message=pay_msg,
+                    ts=cursor,
+                    trace_id=t_id,
+                    service_version=pay_version,
+                    host=pay_host,
+                    extra={
+                        "labels": {
+                            **base_labels,
+                            "db.pool_max": pool_max,
+                            "db.pool_in_use": pool_in_use,
+                            "db.pool_wait_ms": pool_wait_ms,
+                            "db.query_ms": query_ms,
+                        }
+                    },
+                )
+                postgres_doc = mk_doc_logs(
+                    dataset="postgres",
+                    namespace=namespace,
+                    service_name="postgres",
+                    level=pg_level,
+                    message=pg_msg,
+                    ts=cursor,
+                    trace_id=t_id,
+                    service_version=None,
+                    host=pg_host,
+                    extra={
+                        "labels": {
+                            **base_labels,
+                            "db.query_ms": query_ms,
+                            "db.query": "UPDATE orders SET status=?",
+                        }
+                    },
+                )
+                # Add to bulk payload (3 docs per request)
+                for index_name, doc in [
+                    (checkout_stream, checkout_doc),
+                    (payment_stream, payment_doc),
+                    (postgres_stream, postgres_doc),
+                ]:
+                    ndjson_lines.append(action_create(index_name))
+                    ndjson_lines.append(doc_line(doc))
+                    total_docs += 1
+                # Flush when batch is big enough (batch_size_requests requests -> 3*2*batch_size_requests lines)
+                if (total_docs % (batch_size_requests * 3)) == 0:
+                    flush()
+            cursor += timedelta(seconds=1)
+    flush()
+    print("Seed complete")
+    print(f"Namespace: {namespace}")
+    print(f"Run ID: {run_id}")
+    print(f"Streams: {checkout_stream}, {payment_stream}, {postgres_stream}")
+    print(f"Docs indexed: {total_docs}")
+    print(
+        f"Bulk responses with errors: {errors} (check Elasticsearch response details if > 0)"
+    )
+def main():
+    load_dotenv()  # reads .env into environment variables for os.getenv [web:320]
+    parser = argparse.ArgumentParser(
+        description="One-time ECS-ish log seeder: baseline -> incident -> recovery"
+    )
+    parser.add_argument("--namespace", default=os.getenv("SEED_NAMESPACE", "mvp1"))
+    parser.add_argument(
+        "--run-id", default=os.getenv("SEED_RUN_ID", f"run_{uuid.uuid4().hex[:8]}")
+    )
+    args = parser.parse_args()
+    es_url = os.getenv("ES_URL", "http://localhost:9200")
+    es_api_key = os.getenv("ES_API_KEY")  # base64 ApiKey
+    es_username = os.getenv("ES_USERNAME")
+    es_password = os.getenv("ES_PASSWORD")
+    verify_ssl = getenv_bool("ES_VERIFY_SSL", True)
+    baseline_minutes = getenv_int("SEED_BASELINE_MINUTES", 10)
+    incident_minutes = getenv_int("SEED_INCIDENT_MINUTES", 3)
+    recovery_minutes = getenv_int("SEED_RECOVERY_MINUTES", 5)
+    rps = getenv_int("SEED_RPS", 5)
+    batch_size_requests = getenv_int("SEED_BATCH_SIZE_REQUESTS", 200)
+    incident_timeout_ratio = getenv_float("SEED_INCIDENT_TIMEOUT_RATIO", 0.30)
+    incident_deadlock_ratio = getenv_float("SEED_INCIDENT_DEADLOCK_RATIO", 0.20)
+    random_seed = os.getenv("SEED_RANDOM_SEED")
+    random_seed = None if not random_seed else int(random_seed)
+    es = ESClient(
+        base_url=es_url,
+        api_key=es_api_key,
+        username=es_username,
+        password=es_password,
+        verify_ssl=verify_ssl,
+    )
+    seed_once(
+        es=es,
+        namespace=args.namespace,
+        run_id=args.run_id,
+        baseline_minutes=baseline_minutes,
+        incident_minutes=incident_minutes,
+        recovery_minutes=recovery_minutes,
+        rps=rps,
+        batch_size_requests=batch_size_requests,
+        incident_timeout_ratio=incident_timeout_ratio,
+        incident_deadlock_ratio=incident_deadlock_ratio,
+        random_seed=random_seed,
+    )
+if __name__ == "__main__":
+    main()