smhs16 commited on
Commit
ac01a7a
·
verified ·
1 Parent(s): f36550c

Upload 9 files

Browse files
Files changed (9) hide show
  1. .env.example +34 -0
  2. .gitignore +63 -0
  3. Dockerfile +38 -0
  4. README.md +163 -7
  5. app.py +145 -0
  6. docker-compose.yml +111 -0
  7. dvc.yaml +51 -0
  8. params.yaml +24 -0
  9. requirements.txt +47 -0
.env.example ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # .env.example — Copy to .env and fill in your values.
2
+ # Never commit .env to Git.
3
+
4
+ # Database
5
+ POSTGRES_PASSWORD=your_secure_password_here
6
+ DATABASE_URL=postgresql://admin:your_secure_password_here@localhost:5432/flights
7
+
8
+ # AWS
9
+ AWS_ACCESS_KEY_ID=AKIA...
10
+ AWS_SECRET_ACCESS_KEY=...
11
+ AWS_DEFAULT_REGION=us-east-1
12
+ S3_RAW_BUCKET=flight-delay-raw-data
13
+ S3_PROCESSED_BUCKET=flight-delay-processed
14
+
15
+ # MLflow
16
+ MLFLOW_TRACKING_URI=http://localhost:5000
17
+
18
+ # BTS API (optional — bulk CSV download doesn't need it)
19
+ BTS_API_KEY=
20
+
21
+ # Airflow
22
+ AIRFLOW_FERNET_KEY=
23
+ AIRFLOW_SECRET_KEY=changeme_in_production
24
+
25
+ # Grafana
26
+ GRAFANA_PASSWORD=admin
27
+
28
+ # Monitoring
29
+ DRIFT_SHARE_THRESHOLD=0.30
30
+ SQS_RETRAIN_QUEUE_URL=https://sqs.us-east-1.amazonaws.com/123456789/retrain-queue
31
+
32
+ # Deployment
33
+ API_ENDPOINT=https://your-alb-dns.us-east-1.elb.amazonaws.com
34
+ MODEL_VERSION=v1.0.0
.gitignore ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.egg-info/
5
+ .eggs/
6
+ dist/
7
+ build/
8
+ .venv/
9
+ venv/
10
+ env/
11
+
12
+ # Data (tracked by DVC, not Git)
13
+ data/raw/
14
+ data/processed/
15
+ !data/.gitkeep
16
+
17
+ # Models (tracked by DVC or MLflow)
18
+ models/*.pkl
19
+ models/*.joblib
20
+ !models/.gitkeep
21
+
22
+ # MLflow
23
+ mlruns/
24
+ mlartifacts/
25
+
26
+ # Reports
27
+ reports/*.png
28
+ reports/*.html
29
+ reports/*.json
30
+ !reports/.gitkeep
31
+
32
+ # Metrics (DVC manages)
33
+ metrics/
34
+
35
+ # Env
36
+ .env
37
+ .env.*
38
+ !.env.example
39
+
40
+ # Editors
41
+ .vscode/
42
+ .idea/
43
+ *.swp
44
+
45
+ # Testing
46
+ .pytest_cache/
47
+ .coverage
48
+ coverage.xml
49
+ htmlcov/
50
+
51
+ # OS
52
+ .DS_Store
53
+ Thumbs.db
54
+
55
+ # Jupyter
56
+ .ipynb_checkpoints/
57
+ *.ipynb
58
+
59
+ # Terraform
60
+ terraform/.terraform/
61
+ terraform/*.tfstate
62
+ terraform/*.tfstate.backup
63
+ terraform/.terraform.lock.hcl
Dockerfile ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Multi-stage build — keeps final image small and dependency-free from build tools
2
+ # ── Stage 1: Build ────────────────────────────────────────────────────────────
3
+ FROM python:3.11-slim AS builder
4
+
5
+ WORKDIR /app
6
+ COPY requirements.txt .
7
+ RUN pip install --no-cache-dir --user -r requirements.txt
8
+
9
+ # ── Stage 2: Runtime ──────────────────────────────────────────────────────────
10
+ FROM python:3.11-slim
11
+
12
+ WORKDIR /app
13
+
14
+ # Non-root user for security
15
+ RUN useradd -m -u 1001 appuser
16
+
17
+ # Copy installed packages from builder
18
+ COPY --from=builder /root/.local /home/appuser/.local
19
+
20
+ # Copy application source
21
+ COPY --chown=appuser:appuser . .
22
+
23
+ # Create writable dirs the app needs
24
+ RUN mkdir -p models data/raw data/processed reports metrics \
25
+ && chown -R appuser:appuser models data reports metrics
26
+
27
+ USER appuser
28
+
29
+ ENV PATH=/home/appuser/.local/bin:$PATH \
30
+ PYTHONUNBUFFERED=1 \
31
+ PYTHONDONTWRITEBYTECODE=1
32
+
33
+ EXPOSE 8000
34
+
35
+ HEALTHCHECK --interval=30s --timeout=10s --start-period=15s --retries=3 \
36
+ CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"
37
+
38
+ CMD ["uvicorn", "api.main:app", "--host", "0.0.0.0", "--port", "8000", "--workers", "2"]
README.md CHANGED
@@ -1,13 +1,169 @@
1
  ---
2
- title: Flight Delay Prediction
3
- emoji: 🏆
4
- colorFrom: red
5
  colorTo: indigo
6
  sdk: gradio
7
- sdk_version: 6.9.0
8
  app_file: app.py
9
- pinned: false
10
- short_description: flight delay prediction
 
 
 
 
 
 
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Flight Delay Prediction Platform
3
+ emoji: ✈️
4
+ colorFrom: blue
5
  colorTo: indigo
6
  sdk: gradio
7
+ sdk_version: 4.31.0
8
  app_file: app.py
9
+ pinned: true
10
+ license: mit
11
+ tags:
12
+ - machine-learning
13
+ - tabular-classification
14
+ - xgboost
15
+ - flight-delay
16
+ - mlops
17
  ---
18
 
19
+ # ✈️ Flight Delay Prediction Platform
20
+
21
+ > End-to-end ML system: ETL → Training → MLOps → FastAPI → Cloud Deploy → Monitoring
22
+
23
+ [![CI/CD](https://github.com/YOUR_USERNAME/flight-delay-platform/actions/workflows/deploy.yml/badge.svg)](https://github.com/YOUR_USERNAME/flight-delay-platform/actions)
24
+ [![Python 3.11](https://img.shields.io/badge/python-3.11-blue.svg)](https://python.org)
25
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
26
+
27
+ ---
28
+
29
+ ## 🗺 Architecture
30
+
31
+ ```
32
+ BTS API / OpenSky
33
+
34
+
35
+ ┌─────────┐ ┌──────────┐ ┌──────────┐
36
+ │ ETL │───▶│ Postgres│───▶│ Feature │
37
+ │ Airflow │ │ + S3 │ │ Store │
38
+ └─────────┘ └──────────┘ └──────────┘
39
+
40
+
41
+ ┌──────────────┐
42
+ │ XGBoost / │
43
+ │ LightGBM │
44
+ │ Training │
45
+ └──────┬───────┘
46
+ │ MLflow
47
+
48
+ ┌──────────────┐
49
+ │ FastAPI │
50
+ │ Inference │──▶ AWS ECS / GCP Cloud Run
51
+ └──────┬───────┘
52
+
53
+
54
+ ┌──────────────┐
55
+ │ Evidently │
56
+ │ + Grafana │
57
+ └──────────────┘
58
+ ```
59
+
60
+ ## 🚀 Quick Start
61
+
62
+ ```bash
63
+ # 1. Clone
64
+ git clone https://huggingface.co/spaces/YOUR_USERNAME/flight-delay-platform
65
+ cd flight-delay-platform
66
+
67
+ # 2. Install
68
+ pip install -r requirements.txt
69
+
70
+ # 3. Run local stack
71
+ docker-compose up -d
72
+
73
+ # 4. Trigger ETL
74
+ python -m etl.extract --start-date 2024-01-01 --end-date 2024-06-30
75
+
76
+ # 5. Train models
77
+ python -m ml.train
78
+
79
+ # 6. Start API
80
+ uvicorn api.main:app --reload --port 8000
81
+
82
+ # 7. Try a prediction
83
+ curl -X POST http://localhost:8000/predict \
84
+ -H "Content-Type: application/json" \
85
+ -d '{"dep_hour":8,"dep_dayofweek":1,"dep_month":3,
86
+ "carrier_enc":3,"origin_enc":10,"dest_enc":25,
87
+ "crs_elapsed_time":185,"distance":1400,
88
+ "origin_delay_rate":0.22,"is_weekend":0,"is_peak_hour":1}'
89
+ ```
90
+
91
+ ## 📂 Project Structure
92
+
93
+ ```
94
+ flight-delay-platform/
95
+ ├── app.py # Gradio demo (HF Spaces entry point)
96
+ ├── etl/ # Data pipeline
97
+ │ ├── extract.py
98
+ │ ├── transform.py
99
+ │ ├── validate.py
100
+ │ └── load.py
101
+ ├── ml/ # Model training
102
+ │ ├── train.py
103
+ │ ├── evaluate.py
104
+ │ ├── features.py
105
+ │ └── config.py
106
+ ├── mlops/ # Model registry & promotion
107
+ │ ├── model_registry.py
108
+ │ └── promote.py
109
+ ├── api/ # FastAPI serving
110
+ │ ├── main.py
111
+ │ ├── schemas.py
112
+ │ └── middleware.py
113
+ ├── monitoring/ # Drift detection & metrics
114
+ │ ├── drift_detector.py
115
+ │ └── prometheus.yml
116
+ ├── dags/ # Airflow DAGs
117
+ │ ├── etl_dag.py
118
+ │ └── retrain_dag.py
119
+ ├── tests/ # Pytest suite
120
+ │ ├── test_etl.py
121
+ │ ├── test_model.py
122
+ │ └── test_api.py
123
+ ├── terraform/ # IaC (AWS ECS)
124
+ │ ├── main.tf
125
+ │ └── variables.tf
126
+ ├── .github/workflows/ # CI/CD
127
+ │ └── deploy.yml
128
+ ├── docker-compose.yml
129
+ ├── Dockerfile
130
+ ├── dvc.yaml
131
+ ├── params.yaml
132
+ └── requirements.txt
133
+ ```
134
+
135
+ ## 🧠 Models
136
+
137
+ | Model | CV AUC | F1 | Notes |
138
+ |---|---|---|---|
139
+ | Logistic Regression | ~0.72 | ~0.65 | Baseline |
140
+ | LightGBM | ~0.83 | ~0.74 | Fast, good default |
141
+ | **XGBoost (tuned)** | **~0.86** | **~0.77** | **Production model** |
142
+
143
+ Target: predict whether a flight will be **≥15 minutes late** (FAA standard).
144
+
145
+ ## 🔑 Key Features Used
146
+
147
+ - `dep_hour`, `dep_dayofweek`, `dep_month`, `is_weekend`, `is_peak_hour`
148
+ - `carrier_enc`, `origin_enc`, `dest_enc`
149
+ - `crs_elapsed_time`, `distance`
150
+ - `origin_delay_rate` (rolling 30-day historical delay rate per airport)
151
+ - `weather_wind_speed`, `weather_precip_mm`
152
+
153
+ ## 🛠 Tech Stack
154
+
155
+ | Layer | Tools |
156
+ |---|---|
157
+ | Orchestration | Apache Airflow 2.8 |
158
+ | ML | XGBoost, LightGBM, Scikit-learn |
159
+ | Experiment Tracking | MLflow + DVC |
160
+ | Serving | FastAPI + Uvicorn |
161
+ | Containerization | Docker + Docker Compose |
162
+ | Cloud | AWS ECS Fargate + S3 + RDS |
163
+ | IaC | Terraform |
164
+ | Monitoring | Evidently AI + Prometheus + Grafana |
165
+ | CI/CD | GitHub Actions |
166
+
167
+ ## 📄 License
168
+
169
+ MIT © 2024
app.py ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ app.py — Gradio demo for HF Spaces.
3
+ Loads the trained model (or a stub) and exposes a live prediction UI.
4
+ """
5
+
6
+ import gradio as gr
7
+ import pandas as pd
8
+ import numpy as np
9
+ import pickle
10
+ import os
11
+ from pathlib import Path
12
+
13
+ # ── Load model (falls back to a stub if not yet trained) ──────────────────────
14
+ MODEL_PATH = Path("models/best_model.pkl")
15
+
16
+ def load_model():
17
+ if MODEL_PATH.exists():
18
+ with open(MODEL_PATH, "rb") as f:
19
+ return pickle.load(f)
20
+ return None # stub mode
21
+
22
+ model = load_model()
23
+
24
+ CARRIER_MAP = {"AA": 0, "DL": 1, "UA": 2, "WN": 3, "B6": 4, "AS": 5, "F9": 6, "NK": 7}
25
+ AIRPORT_STUB = { # small lookup for demo
26
+ "ATL": 0, "LAX": 1, "ORD": 2, "DFW": 3, "DEN": 4,
27
+ "JFK": 5, "SFO": 6, "SEA": 7, "LAS": 8, "MIA": 9,
28
+ }
29
+
30
+ def predict_delay(
31
+ dep_hour, dep_dayofweek, dep_month,
32
+ carrier, origin, dest,
33
+ crs_elapsed_time, distance,
34
+ origin_delay_rate,
35
+ is_weekend, is_peak_hour,
36
+ ):
37
+ carrier_enc = CARRIER_MAP.get(carrier, 0)
38
+ origin_enc = AIRPORT_STUB.get(origin, 0)
39
+ dest_enc = AIRPORT_STUB.get(dest, 1)
40
+
41
+ features = pd.DataFrame([{
42
+ "dep_hour": int(dep_hour),
43
+ "dep_dayofweek": int(dep_dayofweek),
44
+ "dep_month": int(dep_month),
45
+ "carrier_enc": carrier_enc,
46
+ "origin_enc": origin_enc,
47
+ "dest_enc": dest_enc,
48
+ "crs_elapsed_time": float(crs_elapsed_time),
49
+ "distance": float(distance),
50
+ "origin_delay_rate": float(origin_delay_rate),
51
+ "is_weekend": int(is_weekend == "Yes"),
52
+ "is_peak_hour": int(is_peak_hour == "Yes"),
53
+ }])
54
+
55
+ if model is not None:
56
+ prob = float(model.predict_proba(features)[0, 1])
57
+ else:
58
+ # Demo stub: simple heuristic
59
+ prob = min(1.0, (
60
+ origin_delay_rate * 0.5 +
61
+ (0.15 if is_peak_hour == "Yes" else 0) +
62
+ (0.1 if is_weekend == "Yes" else 0) +
63
+ (dep_month in [6, 7, 12]) * 0.1
64
+ ) + np.random.normal(0, 0.05))
65
+ prob = max(0.0, prob)
66
+
67
+ label = "🔴 LIKELY DELAYED" if prob >= 0.5 else "🟢 LIKELY ON TIME"
68
+ confidence = "HIGH" if abs(prob - 0.5) > 0.25 else "MEDIUM" if abs(prob - 0.5) > 0.1 else "LOW"
69
+ bar = "█" * int(prob * 20) + "░" * (20 - int(prob * 20))
70
+
71
+ return (
72
+ f"{label}\n\n"
73
+ f"Delay probability : {prob:.1%}\n"
74
+ f"Confidence : {confidence}\n"
75
+ f"[{bar}] {prob:.1%}"
76
+ )
77
+
78
+
79
+ # ── UI ────────────────────────────────────────────────────────────────────────
80
+ with gr.Blocks(
81
+ title="✈️ Flight Delay Predictor",
82
+ theme=gr.themes.Base(primary_hue="blue", neutral_hue="slate"),
83
+ ) as demo:
84
+
85
+ gr.Markdown("""
86
+ # ✈️ Flight Delay Prediction
87
+ Predict whether a flight will be **≥ 15 minutes late** using the trained XGBoost model.
88
+ > Part of the [Flight Delay ML Platform](https://github.com/YOUR_USERNAME/flight-delay-platform)
89
+ """)
90
+
91
+ with gr.Row():
92
+ with gr.Column():
93
+ gr.Markdown("### ✈️ Flight Details")
94
+ carrier = gr.Dropdown(list(CARRIER_MAP.keys()), value="AA", label="Airline")
95
+ origin = gr.Dropdown(list(AIRPORT_STUB.keys()), value="ATL", label="Origin Airport")
96
+ dest = gr.Dropdown(list(AIRPORT_STUB.keys()), value="LAX", label="Destination Airport")
97
+ distance = gr.Slider(100, 5000, value=1400, step=50, label="Distance (miles)")
98
+ crs_elapsed_time = gr.Slider(30, 600, value=185, step=5, label="Scheduled Duration (min)")
99
+
100
+ with gr.Column():
101
+ gr.Markdown("### 🕐 Schedule")
102
+ dep_hour = gr.Slider(0, 23, value=8, step=1, label="Departure Hour (0–23)")
103
+ dep_dayofweek = gr.Slider(0, 6, value=1, step=1, label="Day of Week (0=Mon, 6=Sun)")
104
+ dep_month = gr.Slider(1, 12, value=3, step=1, label="Month")
105
+ is_weekend = gr.Radio(["Yes", "No"], value="No", label="Weekend Flight?")
106
+ is_peak_hour = gr.Radio(["Yes", "No"], value="Yes", label="Peak Hour? (7–9am / 5–8pm)")
107
+
108
+ with gr.Column():
109
+ gr.Markdown("### 🌦 Airport History")
110
+ origin_delay_rate = gr.Slider(
111
+ 0.0, 1.0, value=0.22, step=0.01,
112
+ label="Origin Airport 30-Day Delay Rate"
113
+ )
114
+ gr.Markdown("### 📊 Prediction")
115
+ output = gr.Textbox(label="Result", lines=5, interactive=False)
116
+ predict_btn = gr.Button("Predict Delay →", variant="primary")
117
+
118
+ predict_btn.click(
119
+ fn=predict_delay,
120
+ inputs=[
121
+ dep_hour, dep_dayofweek, dep_month,
122
+ carrier, origin, dest,
123
+ crs_elapsed_time, distance,
124
+ origin_delay_rate, is_weekend, is_peak_hour,
125
+ ],
126
+ outputs=output,
127
+ )
128
+
129
+ gr.Examples(
130
+ examples=[
131
+ [8, 1, 3, "AA", "ATL", "LAX", 185, 1400, 0.22, "No", "Yes"],
132
+ [18, 4, 7, "UA", "ORD", "JFK", 140, 780, 0.38, "No", "Yes"],
133
+ [6, 6, 1, "WN", "DEN", "SFO", 95, 950, 0.12, "Yes", "No" ],
134
+ [14, 3, 12,"DL", "ATL", "MIA", 75, 660, 0.45, "No", "No" ],
135
+ ],
136
+ inputs=[
137
+ dep_hour, dep_dayofweek, dep_month,
138
+ carrier, origin, dest,
139
+ crs_elapsed_time, distance,
140
+ origin_delay_rate, is_weekend, is_peak_hour,
141
+ ],
142
+ )
143
+
144
+ if __name__ == "__main__":
145
+ demo.launch()
docker-compose.yml ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: "3.9"
2
+
3
+ # Full local development stack
4
+ # Start everything: docker-compose up -d
5
+ # View logs: docker-compose logs -f inference-api
6
+ # Tear down: docker-compose down -v
7
+
8
+ services:
9
+
10
+ # ── Postgres ────────────────────────────────────────────────────────────────
11
+ postgres:
12
+ image: postgres:15-alpine
13
+ environment:
14
+ POSTGRES_DB: flights
15
+ POSTGRES_USER: admin
16
+ POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-localpassword}
17
+ ports: ["5432:5432"]
18
+ volumes: [postgres_data:/var/lib/postgresql/data]
19
+ healthcheck:
20
+ test: ["CMD-SHELL", "pg_isready -U admin -d flights"]
21
+ interval: 10s
22
+ timeout: 5s
23
+ retries: 5
24
+
25
+ # ── MLflow tracking server ───────────────────────────────────────────────────
26
+ mlflow:
27
+ image: python:3.11-slim
28
+ depends_on:
29
+ postgres: { condition: service_healthy }
30
+ command: >
31
+ sh -c "pip install -q mlflow psycopg2-binary boto3 &&
32
+ mlflow server
33
+ --host 0.0.0.0
34
+ --port 5000
35
+ --backend-store-uri postgresql://admin:${POSTGRES_PASSWORD:-localpassword}@postgres:5432/flights
36
+ --default-artifact-root /mlruns"
37
+ ports: ["5000:5000"]
38
+ volumes: [mlruns:/mlruns]
39
+ environment:
40
+ MLFLOW_TRACKING_URI: http://localhost:5000
41
+
42
+ # ── Airflow (single-container quickstart) ────────────────────────────────────
43
+ airflow:
44
+ image: apache/airflow:2.9.1-python3.11
45
+ depends_on:
46
+ postgres: { condition: service_healthy }
47
+ environment:
48
+ AIRFLOW__CORE__EXECUTOR: LocalExecutor
49
+ AIRFLOW__DATABASE__SQL_ALCHEMY_CONN: postgresql+psycopg2://admin:${POSTGRES_PASSWORD:-localpassword}@postgres:5432/flights
50
+ AIRFLOW__CORE__FERNET_KEY: ${AIRFLOW_FERNET_KEY:-}
51
+ AIRFLOW__WEBSERVER__SECRET_KEY: ${AIRFLOW_SECRET_KEY:-changeme}
52
+ AIRFLOW__CORE__LOAD_EXAMPLES: "false"
53
+ MLFLOW_TRACKING_URI: http://mlflow:5000
54
+ ports: ["8080:8080"]
55
+ volumes:
56
+ - ./dags:/opt/airflow/dags
57
+ - ./etl:/opt/airflow/etl
58
+ - ./ml:/opt/airflow/ml
59
+ - ./mlops:/opt/airflow/mlops
60
+ - ./data:/opt/airflow/data
61
+ command: >
62
+ bash -c "airflow db init &&
63
+ airflow users create --username admin --password admin
64
+ --firstname Admin --lastname User --role Admin --email admin@example.com &&
65
+ airflow webserver & airflow scheduler"
66
+
67
+ # ── Inference API ────────────────────────────────────────────────────────────
68
+ inference-api:
69
+ build: .
70
+ depends_on:
71
+ postgres: { condition: service_healthy }
72
+ ports: ["8000:8000"]
73
+ environment:
74
+ DATABASE_URL: postgresql://admin:${POSTGRES_PASSWORD:-localpassword}@postgres:5432/flights
75
+ MLFLOW_TRACKING_URI: http://mlflow:5000
76
+ MODEL_VERSION: local
77
+ volumes:
78
+ - ./models:/app/models # mount trained models
79
+ - ./data:/app/data
80
+ healthcheck:
81
+ test: ["CMD-SHELL", "curl -f http://localhost:8000/health || exit 1"]
82
+ interval: 30s
83
+ timeout: 10s
84
+ retries: 3
85
+
86
+ # ── Prometheus ───────────────────────────────────────────────────────────────
87
+ prometheus:
88
+ image: prom/prometheus:v2.51.0
89
+ ports: ["9090:9090"]
90
+ volumes:
91
+ - ./monitoring/prometheus.yml:/etc/prometheus/prometheus.yml:ro
92
+ - prometheus_data:/prometheus
93
+ command:
94
+ - '--config.file=/etc/prometheus/prometheus.yml'
95
+ - '--storage.tsdb.retention.time=15d'
96
+
97
+ # ── Grafana ──────────────────────────────────────────────────────────────────
98
+ grafana:
99
+ image: grafana/grafana:10.4.0
100
+ depends_on: [prometheus]
101
+ ports: ["3000:3000"]
102
+ environment:
103
+ GF_SECURITY_ADMIN_PASSWORD: ${GRAFANA_PASSWORD:-admin}
104
+ volumes:
105
+ - grafana_data:/var/lib/grafana
106
+
107
+ volumes:
108
+ postgres_data:
109
+ mlruns:
110
+ prometheus_data:
111
+ grafana_data:
dvc.yaml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ stages:
2
+ extract:
3
+ cmd: python -m etl.extract --start-date ${extract.start_date} --end-date ${extract.end_date}
4
+ params:
5
+ - params.yaml:
6
+ - extract.start_date
7
+ - extract.end_date
8
+ outs:
9
+ - data/raw/flights_raw.parquet
10
+
11
+ transform:
12
+ cmd: python -m etl.transform
13
+ deps:
14
+ - data/raw/flights_raw.parquet
15
+ - etl/transform.py
16
+ outs:
17
+ - data/processed/features.parquet
18
+ - models/label_encoders.pkl
19
+
20
+ train:
21
+ cmd: python -m ml.train --data data/processed/features.parquet
22
+ deps:
23
+ - data/processed/features.parquet
24
+ - ml/train.py
25
+ - ml/config.py
26
+ params:
27
+ - params.yaml:
28
+ - model.n_estimators
29
+ - model.max_depth
30
+ - model.learning_rate
31
+ - model.subsample
32
+ - model.colsample_bytree
33
+ metrics:
34
+ - metrics/scores.json:
35
+ cache: false
36
+ outs:
37
+ - models/best_model.pkl
38
+
39
+ evaluate:
40
+ cmd: python -m ml.evaluate
41
+ deps:
42
+ - models/best_model.pkl
43
+ - data/processed/features.parquet
44
+ - ml/evaluate.py
45
+ outs:
46
+ - reports/roc_curve.png:
47
+ cache: false
48
+ - reports/pr_curve.png:
49
+ cache: false
50
+ - reports/eval_metrics.json:
51
+ cache: false
params.yaml ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # params.yaml — Single source of truth for pipeline parameters.
2
+ # DVC reads this file to detect when stages need to re-run.
3
+ # MLflow logs these automatically during training.
4
+
5
+ extract:
6
+ start_date: "2023-01-01"
7
+ end_date: "2024-01-01"
8
+ airline: null # null = all airlines
9
+
10
+ model:
11
+ n_estimators: 500
12
+ max_depth: 6
13
+ learning_rate: 0.05
14
+ subsample: 0.8
15
+ colsample_bytree: 0.8
16
+ scale_pos_weight: 2.5
17
+ cv_folds: 5
18
+ random_state: 42
19
+
20
+ evaluate:
21
+ threshold: 0.5
22
+
23
+ monitoring:
24
+ drift_share_threshold: 0.30
requirements.txt ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ── Core ──────────────────────────────────────────────────────────────────────
2
+ pandas==2.2.2
3
+ numpy==1.26.4
4
+ scikit-learn==1.4.2
5
+
6
+ # ── ML Models ─────────────────────────────────────────────────────────────────
7
+ xgboost==2.0.3
8
+ lightgbm==4.3.0
9
+ hyperopt==0.2.7
10
+ shap==0.45.0
11
+
12
+ # ── MLOps ─────────────────────────────────────────────────────────────────────
13
+ mlflow==2.13.0
14
+ dvc[s3]==3.50.1
15
+
16
+ # ── API ───────────────────────────────────────────────────────────────────────
17
+ fastapi==0.111.0
18
+ uvicorn[standard]==0.29.0
19
+ pydantic==2.7.1
20
+ python-multipart==0.0.9
21
+
22
+ # ── ETL / Data ────────────────────────────────────────────────────────────────
23
+ requests==2.32.2
24
+ sqlalchemy==2.0.30
25
+ psycopg2-binary==2.9.9
26
+ pyarrow==16.0.0
27
+ great-expectations==0.18.19
28
+ apache-airflow==2.9.1
29
+
30
+ # ── Cloud ─────────────────────────────────────────────────────────────────────
31
+ boto3==1.34.106
32
+
33
+ # ── Monitoring ────────────────────────────────────────────────────────────────
34
+ evidently==0.4.30
35
+ prometheus-client==0.20.0
36
+
37
+ # ── UI (HF Spaces) ────────────────────────────────────────────────────────────
38
+ gradio==4.31.0
39
+
40
+ # ── Dev / Testing ─────────────────────────────────────────────────────────────
41
+ pytest==8.2.0
42
+ pytest-cov==5.0.0
43
+ httpx==0.27.0 # FastAPI TestClient
44
+ ruff==0.4.4
45
+ mypy==1.10.0
46
+ loguru==0.7.2
47
+ python-dotenv==1.0.1