arthurcornelio88 commited on
Commit
89660d7
·
1 Parent(s): c53595e
Files changed (1) hide show
  1. start.sh +97 -37
start.sh CHANGED
@@ -9,22 +9,35 @@ log() {
9
  echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1"
10
  }
11
 
12
- ### 🔹 Generate secrets if not set
13
- log "🔑 Checking and generating secrets..."
14
- export AIRFLOW__WEBSERVER__SECRET_KEY="${AIRFLOW__WEBSERVER__SECRET_KEY:-$(openssl rand -hex 32)}"
15
- export AIRFLOW__WEBSERVER__COOKIE_SECRET="${AIRFLOW__WEBSERVER__COOKIE_SECRET:-$(openssl rand -hex 32)}"
16
- log "✅ Generated AIRFLOW__WEBSERVER__SECRET_KEY and COOKIE_SECRET"
 
 
 
 
 
17
 
18
- ### 🔹 Ensure required system commands exist
19
  log "🔍 Checking required system commands..."
20
- for cmd in pgrep ss; do
21
- if ! command -v "$cmd" &>/dev/null; then
22
- log "❌ $cmd not found! Installing..."
23
- apt-get update && apt-get install -y procps iproute2
24
- fi
25
- done
 
 
 
26
 
27
- ### 🔹 Authenticate using Google Cloud Service Account Key
 
 
 
 
 
 
28
  if [[ -n "${GOOGLE_APPLICATION_CREDENTIALS_CONTENT:-}" ]]; then
29
  log "🔐 Setting up Google Cloud credentials..."
30
  echo "$GOOGLE_APPLICATION_CREDENTIALS_CONTENT" > /opt/airflow/hf-airflow-key.json
@@ -40,34 +53,46 @@ else
40
  exit 1
41
  fi
42
 
43
- ### 🔹 Fetch GCP Project ID
44
  log "🔍 Fetching GCP_PROJECT_ID..."
45
- GCP_PROJECT_ID=$(gcloud secrets versions access latest --secret=gcp-project-id --project=jedha2024 2>/dev/null | tr -d ' \r\n')
 
 
 
46
 
47
  if [[ -z "$GCP_PROJECT_ID" || ! "$GCP_PROJECT_ID" =~ ^[a-zA-Z0-9-]+$ ]]; then
48
  log "❌ ERROR: GCP_PROJECT_ID is missing or invalid ('$GCP_PROJECT_ID'). Exiting..."
49
  exit 1
50
  fi
 
 
51
  export GCP_PROJECT_ID
 
52
  gcloud config set project "$GCP_PROJECT_ID"
53
- log "✅ Retrieved GCP_PROJECT_ID: $GCP_PROJECT_ID"
54
 
55
  ### 🔹 Fetch Other Required Secrets
56
- for secret in gcp-zone airflow-db-url; do
57
- log "🔍 Fetching $secret..."
58
- var_name=$(echo "$secret" | tr '-' '_') # Remplace uniquement le "-" par "_"
59
- value=$(gcloud secrets versions access latest --secret="$secret" --project="$GCP_PROJECT_ID" 2>/dev/null)
60
- export "$var_name"="$value" # Stocke la valeur brute, sans la modifier
61
- log " Retrieved $var_name"
62
- done
 
 
63
 
 
 
64
 
65
- ### 🔹 Extract Service Account Email
 
66
  SERVICE_ACCOUNT_EMAIL=$(jq -r '.client_email' /opt/airflow/hf-airflow-key.json)
 
67
  if [[ -z "$SERVICE_ACCOUNT_EMAIL" || "$SERVICE_ACCOUNT_EMAIL" == "null" ]]; then
68
  log "❌ ERROR: Failed to extract service account email from credentials JSON. Exiting..."
69
  exit 1
70
  fi
 
71
  export SERVICE_ACCOUNT_EMAIL
72
  log "✅ Retrieved SERVICE_ACCOUNT_EMAIL: $SERVICE_ACCOUNT_EMAIL"
73
 
@@ -79,10 +104,11 @@ GCP_APIS=(
79
  "secretmanager.googleapis.com"
80
  "iam.googleapis.com"
81
  )
 
82
  for API in "${GCP_APIS[@]}"; do
83
  gcloud services enable "$API" --project="$GCP_PROJECT_ID" &
84
  done
85
- wait
86
  log "✅ All required GCP APIs are enabled."
87
 
88
  ### 🔹 Airflow Database Setup
@@ -99,35 +125,65 @@ log "🔹 Setting Airflow variables..."
99
  airflow variables set GCP_PROJECT_ID "$GCP_PROJECT_ID"
100
  airflow variables set GCP_ZONE "$GCP_ZONE"
101
  airflow variables set SERVICE_ACCOUNT_EMAIL "$SERVICE_ACCOUNT_EMAIL"
 
102
  log "✅ Airflow variables set."
103
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  ### 🚀 Start Airflow Services
105
- log "🛠 Starting Airflow webserver..."
106
- exec airflow webserver --debug --port 7860 --host 0.0.0.0 &> /opt/airflow/logs/webserver.log &
107
- sleep 5
108
 
109
- log "🛠 Starting Airflow scheduler..."
110
- exec airflow scheduler &> /opt/airflow/logs/scheduler.log &
 
 
 
 
 
111
  log "✅ Airflow services started."
112
 
113
- ### 🔹 Verify Airflow is Running
 
114
  sleep 10
 
 
115
  log "🛠 Checking if Airflow processes are running..."
116
- if pgrep -fa "airflow" &>/dev/null; then
117
  log "✅ Airflow processes detected!"
118
  else
119
  log "❌ No running Airflow processes found!"
120
  fi
121
 
122
- ### 🔹 Verify Airflow Webserver Port
123
  log "🛠 Checking if Airflow is listening on port 7860..."
124
- if ss -tulnp | grep ":7860" &>/dev/null; then
125
  log "✅ Airflow is listening on port 7860!"
126
  else
127
  log "❌ No process found on port 7860!"
128
  fi
129
 
130
- ### 🔹 Fetch Last Logs
131
  log "🛠 Checking available Airflow logs..."
132
  ls -lah /opt/airflow/logs/ > /opt/airflow/logs/available_logs.log
133
 
@@ -139,10 +195,14 @@ else
139
  log "❌ No Airflow logs found!"
140
  fi
141
 
142
- ### 🔹 Test Set-Cookie Header
143
  log "🛠 Testing Set-Cookie header..."
144
  curl -i http://127.0.0.1:7860 | grep Set-Cookie | tee /opt/airflow/logs/set_cookie.log || log "❌ No Set-Cookie found!"
145
 
146
- ### 🔹 Keep Logs Visible
 
 
 
 
147
  log "🛠 Keeping logs visible for debugging..."
148
  tail -f /opt/airflow/logs/webserver.log
 
9
  echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1"
10
  }
11
 
12
+ # Générer des secrets si non définis
13
+ if [[ -z "$AIRFLOW__WEBSERVER__SECRET_KEY" ]]; then
14
+ export AIRFLOW__WEBSERVER__SECRET_KEY=$(openssl rand -hex 32)
15
+ fi
16
+
17
+ if [[ -z "$AIRFLOW__WEBSERVER__COOKIE_SECRET" ]]; then
18
+ export AIRFLOW__WEBSERVER__COOKIE_SECRET=$(openssl rand -hex 32)
19
+ fi
20
+
21
+ echo "🔑 Generated AIRFLOW__WEBSERVER__SECRET_KEY and COOKIE_SECRET"
22
 
 
23
  log "🔍 Checking required system commands..."
24
+ if ! command -v pgrep &>/dev/null; then
25
+ log "❌ pgrep not found! Installing..."
26
+ apt-get update && apt-get install -y procps
27
+ fi
28
+
29
+ if ! command -v ss &>/dev/null; then
30
+ log "❌ ss not found! Installing iproute2..."
31
+ apt-get update && apt-get install -y iproute2
32
+ fi
33
 
34
+ ### 🔹 Ensure `gcloud` is installed
35
+ if ! command -v gcloud &>/dev/null; then
36
+ log "❌ ERROR: Google Cloud SDK (gcloud) is not installed. Exiting..."
37
+ exit 1
38
+ fi
39
+
40
+ ### 🔹 Authenticate using Google Service Account Key
41
  if [[ -n "${GOOGLE_APPLICATION_CREDENTIALS_CONTENT:-}" ]]; then
42
  log "🔐 Setting up Google Cloud credentials..."
43
  echo "$GOOGLE_APPLICATION_CREDENTIALS_CONTENT" > /opt/airflow/hf-airflow-key.json
 
53
  exit 1
54
  fi
55
 
56
+ ### 🔹 Fetch GCP Project ID Directly
57
  log "🔍 Fetching GCP_PROJECT_ID..."
58
+ RAW_GCP_PROJECT_ID=$(gcloud secrets versions access latest --secret=gcp-project-id --project=jedha2024 2>/dev/null)
59
+
60
+ # Ensure clean formatting (remove spaces, newlines)
61
+ GCP_PROJECT_ID=$(echo "$RAW_GCP_PROJECT_ID" | tr -d '\r' | tr -d '\n' | tr -d ' ')
62
 
63
  if [[ -z "$GCP_PROJECT_ID" || ! "$GCP_PROJECT_ID" =~ ^[a-zA-Z0-9-]+$ ]]; then
64
  log "❌ ERROR: GCP_PROJECT_ID is missing or invalid ('$GCP_PROJECT_ID'). Exiting..."
65
  exit 1
66
  fi
67
+
68
+ log "✅ Retrieved GCP_PROJECT_ID: $GCP_PROJECT_ID"
69
  export GCP_PROJECT_ID
70
+
71
  gcloud config set project "$GCP_PROJECT_ID"
 
72
 
73
  ### 🔹 Fetch Other Required Secrets
74
+ log "🔍 Fetching GCP_ZONE..."
75
+ RAW_GCP_ZONE=$(gcloud secrets versions access latest --secret=gcp-zone --project="$GCP_PROJECT_ID" 2>/dev/null)
76
+ GCP_ZONE=$(echo "$RAW_GCP_ZONE" | tr -d '\r' | tr -d '\n' | tr -d ' ')
77
+ log "✅ Retrieved GCP_ZONE: $GCP_ZONE"
78
+
79
+ log "🔍 Fetching AIRFLOW_DB_URL..."
80
+ RAW_AIRFLOW_DB_URL=$(gcloud secrets versions access latest --secret=airflow-db-url --project="$GCP_PROJECT_ID" 2>/dev/null)
81
+ AIRFLOW_DB_URL=$(echo "$RAW_AIRFLOW_DB_URL" | tr -d '\r' | tr -d '\n' | tr -d ' ')
82
+ log "✅ Retrieved AIRFLOW_DB_URL."
83
 
84
+ export GCP_ZONE
85
+ export AIRFLOW_DB_URL
86
 
87
+ ### 🔹 Extract Service Account Email from JSON
88
+ log "🔍 Extracting service account email from credentials JSON..."
89
  SERVICE_ACCOUNT_EMAIL=$(jq -r '.client_email' /opt/airflow/hf-airflow-key.json)
90
+
91
  if [[ -z "$SERVICE_ACCOUNT_EMAIL" || "$SERVICE_ACCOUNT_EMAIL" == "null" ]]; then
92
  log "❌ ERROR: Failed to extract service account email from credentials JSON. Exiting..."
93
  exit 1
94
  fi
95
+
96
  export SERVICE_ACCOUNT_EMAIL
97
  log "✅ Retrieved SERVICE_ACCOUNT_EMAIL: $SERVICE_ACCOUNT_EMAIL"
98
 
 
104
  "secretmanager.googleapis.com"
105
  "iam.googleapis.com"
106
  )
107
+
108
  for API in "${GCP_APIS[@]}"; do
109
  gcloud services enable "$API" --project="$GCP_PROJECT_ID" &
110
  done
111
+ wait # Ensure all API enabling requests complete
112
  log "✅ All required GCP APIs are enabled."
113
 
114
  ### 🔹 Airflow Database Setup
 
125
  airflow variables set GCP_PROJECT_ID "$GCP_PROJECT_ID"
126
  airflow variables set GCP_ZONE "$GCP_ZONE"
127
  airflow variables set SERVICE_ACCOUNT_EMAIL "$SERVICE_ACCOUNT_EMAIL"
128
+
129
  log "✅ Airflow variables set."
130
 
131
+ ### 🔹 Wait for Airflow Variables to Be Available
132
+ log "🕒 Waiting for Airflow variables to be set..."
133
+ REQUIRED_VARS=("GCP_PROJECT_ID" "GCP_ZONE" "SERVICE_ACCOUNT_EMAIL")
134
+
135
+ for VAR in "${REQUIRED_VARS[@]}"; do
136
+ until [[ "$(airflow variables get "$VAR" 2>/dev/null)" != "" ]]; do
137
+ log "⏳ Waiting for $VAR to be available..."
138
+ sleep 5
139
+ done
140
+ log "✅ $VAR is set."
141
+ done
142
+
143
+ #python /opt/airflow/csrf_fix.py &
144
+ #log "✅ CSRF fix applied."
145
+
146
+ #python /opt/airflow/csrf_debug.py &
147
+ #log "✅ CSRF debug applied."
148
+
149
+ echo "Checking if CSRF token exists in session..."
150
+ python /opt/airflow/csrf_debug.py &
151
+
152
+ echo "Enabling CSRF token in headers..."
153
+ export AIRFLOW__WEBSERVER__CSRF_HEADER_NAME="X-CSRF-Token"
154
+
155
  ### 🚀 Start Airflow Services
 
 
 
156
 
157
+ log "🛠 Manually testing Airflow webserver startup..."
158
+ airflow webserver --debug &
159
+ sleep 5 # Attendre un peu
160
+ pgrep -fa "airflow"
161
+
162
+ sleep 5 # Small delay before starting scheduler
163
+ airflow scheduler > /dev/null 2>&1 &
164
  log "✅ Airflow services started."
165
 
166
+ # 🕒 Wait for Airflow to start before running debug commands
167
+ log "⏳ Waiting for Airflow to stabilize..."
168
  sleep 10
169
+
170
+ ### 🔹 Check if Airflow is Running
171
  log "🛠 Checking if Airflow processes are running..."
172
+ if pgrep -fa "airflow" > /opt/airflow/logs/airflow_processes.log; then
173
  log "✅ Airflow processes detected!"
174
  else
175
  log "❌ No running Airflow processes found!"
176
  fi
177
 
178
+ ### 🔹 Check if Airflow Webserver is Listening
179
  log "🛠 Checking if Airflow is listening on port 7860..."
180
+ if ss -tulnp | grep ":7860" > /opt/airflow/logs/airflow_ports.log 2>/dev/null; then
181
  log "✅ Airflow is listening on port 7860!"
182
  else
183
  log "❌ No process found on port 7860!"
184
  fi
185
 
186
+ ### 🔹 Inspect Available Airflow Logs
187
  log "🛠 Checking available Airflow logs..."
188
  ls -lah /opt/airflow/logs/ > /opt/airflow/logs/available_logs.log
189
 
 
195
  log "❌ No Airflow logs found!"
196
  fi
197
 
198
+ ### 🔹 Test if Set-Cookie Header is Being Sent
199
  log "🛠 Testing Set-Cookie header..."
200
  curl -i http://127.0.0.1:7860 | grep Set-Cookie | tee /opt/airflow/logs/set_cookie.log || log "❌ No Set-Cookie found!"
201
 
202
+ log "🛠 Checking Airflow webserver logs..."
203
+ ls -lah /opt/airflow/logs/webserver/
204
+ cat /opt/airflow/logs/webserver/*.log | tee /opt/airflow/logs/latest_webserver.log || log "❌ No Webserver logs found!"
205
+
206
+ ### 🔹 Keep Logs Visible in Hugging Face Spaces
207
  log "🛠 Keeping logs visible for debugging..."
208
  tail -f /opt/airflow/logs/webserver.log