File size: 5,530 Bytes
bb8424a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
x-airflow-common:
  &airflow-common
  build:
    context: ./airflow
    dockerfile: Dockerfile.airflow
  environment:
    &airflow-common-env
    AIRFLOW__CORE__EXECUTOR: LocalExecutor
    AIRFLOW__CORE__AUTH_MANAGER: airflow.providers.fab.auth_manager.fab_auth_manager.FabAuthManager 
    AIRFLOW__DATABASE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:airflow@postgres/airflow 
    AIRFLOW__CORE__FERNET_KEY: ${AIRFLOW__CORE__FERNET_KEY} # needed for multiuser or production
    AIRFLOW__API_AUTH__JWT_SECRET: ${JWT_SECRET} 
    AIRFLOW__WEBSERVER__SECRET_KEY: ${AIRFLOW__WEBSERVER__SECRET_KEY}
    AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION: 'true'
    AIRFLOW__CORE__LOAD_EXAMPLES: 'false'
    AIRFLOW__WEBSERVER__BASE_URL: http://localhost:8080
    AIRFLOW__CORE__EXECUTION_API_SERVER_URL: 'http://airflow-apiserver:8080/execution/'
    AIRFLOW__SCHEDULER__ENABLE_HEALTH_CHECK: 'true'
    # _PIP_ADDITIONAL_REQUIREMENTS: ${_PIP_ADDITIONAL_REQUIREMENTS:-}
    AIRFLOW_CONFIG: '/opt/airflow/config/airflow.cfg'
    # MLFLOW_TRACKING_URI: 'http://mlflow:5000'

  volumes:
    - ${AIRFLOW_PROJ_DIR:-.}/airflow/dags:/opt/airflow/dags
    - ${AIRFLOW_PROJ_DIR:-.}/airflow/logs:/opt/airflow/logs
    - ${AIRFLOW_PROJ_DIR:-.}/airflow/config:/opt/airflow/config
    - ${AIRFLOW_PROJ_DIR:-.}/airflow/plugins:/opt/airflow/plugins
    - ${AIRFLOW_PROJ_DIR:-.}/data:/opt/airflow/data
    - ${AIRFLOW_PROJ_DIR:-.}/scripts:/opt/airflow/scripts
    - ${AIRFLOW_PROJ_DIR:-.}/model:/opt/airflow/model
    - ${AIRFLOW_PROJ_DIR:-.}/app:/opt/airflow/app
  user: "${AIRFLOW_UID:-50000}:0"
  depends_on:
    &airflow-common-depends-on
    postgres:
      condition: service_healthy

services:
  postgres:
    image: postgres:13
    environment:
      POSTGRES_USER: ${POSTGRES_USER} # using nginx would provide an added measure of security wnen in production
      POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
      POSTGRES_DB: airflow
    volumes:
      - postgres-db-volume:/var/lib/postgresql/data
    healthcheck:
      test: ["CMD", "pg_isready", "-U", "airflow"]
      interval: 10s
      retries: 5
      start_period: 5s
    restart: always

  airflow-apiserver:
    <<: *airflow-common
    command: api-server
    ports:
      - "8080:8080"
    healthcheck:
      test: ["CMD", "curl", "--fail", "http://localhost:8080/api/v2/version"]
      interval: 30s
      timeout: 10s
      retries: 5
      start_period: 30s
    restart: always
    depends_on:
      <<: *airflow-common-depends-on
      airflow-init:
        condition: service_completed_successfully

  airflow-scheduler:
    <<: *airflow-common
    command: scheduler
    healthcheck:
      test: ["CMD", "curl", "--fail", "http://localhost:8974/health"]
      interval: 30s
      timeout: 60s
      retries: 5
      start_period: 30s
    restart: always
    depends_on:
      airflow-init:
        condition: service_completed_successfully

  airflow-dag-processor:
    <<: *airflow-common
    command: dag-processor
    healthcheck:
      test: ["CMD-SHELL", 'airflow jobs check --job-type DagProcessorJob --hostname "$${HOSTNAME}"']
      interval: 30s
      timeout: 10s
      retries: 5
      start_period: 30s
    restart: always
    depends_on:
      airflow-init:
        condition: service_completed_successfully

  airflow-triggerer:
    <<: *airflow-common
    command: triggerer
    healthcheck:
      test: ["CMD-SHELL", 'airflow jobs check --job-type TriggererJob --hostname "$${HOSTNAME}"']
      interval: 40s
      timeout: 10s
      retries: 5
      start_period: 40s
    restart: always
    depends_on:
      airflow-init:
        condition: service_completed_successfully

  airflow-init:
    <<: *airflow-common
    entrypoint: /bin/bash
    command:
      - -c
      - |
        [[ -z "${AIRFLOW_UID}" ]] && export AIRFLOW_UID=$(id -u)
        mkdir -p /opt/airflow/{logs,dags,plugins,config}
        /entrypoint airflow config list >/dev/null
        chown -R "${AIRFLOW_UID}:0" /opt/airflow
        ls -la /opt/airflow/{logs,dags,plugins,config}
    environment:
      <<: *airflow-common-env
      _AIRFLOW_DB_MIGRATE: 'true'
      _AIRFLOW_WWW_USER_CREATE: 'true'
      _AIRFLOW_WWW_USER_USERNAME: ${AIRFLOW_WWW_USER_USERNAME}
      _AIRFLOW_WWW_USER_PASSWORD: ${AIRFLOW_WWW_USER_PASSWORD}
      # _PIP_ADDITIONAL_REQUIREMENTS: ${_PIP_ADDITIONAL_REQUIREMENTS}
    user: "0:0"

  airflow-cli:
    <<: *airflow-common
    profiles: [debug]
    command:
      - bash
      - -c
      - airflow
    environment:
      <<: *airflow-common-env
      CONNECTION_CHECK_MAX_COUNT: "0"
    depends_on:
      postgres:
        condition: service_healthy

  rag-api:
    build: ./app
    ports:
      - "8000:8000"
    volumes:
      - ./vectorstore:/app/vectorstore

  # streamlit:
  #   container_name: streamlit_app
  #   build:
  #     context: ./app
  #     dockerfile: Dockerfile.streamlit
  #   volumes:
  #     - ./app:/app
  #     - ./data:/data
  #     - ./script:/script
  #   working_dir: /app
  #   ports:
  #     - "127.0.0.1:8501:8501"

  # mlflow:
  #   build:
  #     context: ./mlflow
  #     dockerfile: Dockerfile.mlflow
  #   ports:
  #     - "127.0.0.1:5000:5000"

  # Optional nginx reverse proxy for shared/internal environments
  # nginx:
  #   image: nginx:alpine
  #   ports:
  #     - "80:80"
  #   volumes:
  #     - ./nginx/default.conf:/etc/nginx/conf.d/default.conf:ro
  #     - ./nginx/htpasswd:/etc/nginx/.htpasswd:ro
  #   depends_on:
  #     - airflow-apiserver
  #     - mlflow
  #     - streamlit

volumes:
  postgres-db-volume: