Georg commited on
Commit
75b68fd
·
1 Parent(s): 22ad055

Prepare job build context

Browse files
Files changed (1) hide show
  1. deploy.sh +38 -12
deploy.sh CHANGED
@@ -85,9 +85,10 @@ JOB_ID=$(echo "${JOB_OUTPUT}" | awk '/Job ID:/ {print $3}')
85
  if [ -z "${JOB_ID}" ]; then
86
  echo "Warning: Could not parse HF job id. See /tmp/hf_image_job.log"
87
  else
88
- echo "Following job logs for 1 minute..."
89
  if [ -x "${HF_BIN}" ]; then
90
  HF_BIN_PATH="${HF_BIN}" JOB_ID="${JOB_ID}" "${PY_BIN}" - <<'PY'
 
91
  import os
92
  import subprocess
93
  import sys
@@ -96,15 +97,27 @@ import time
96
  hf = os.environ["HF_BIN_PATH"]
97
  job_id = os.environ["JOB_ID"]
98
 
99
- proc = subprocess.Popen([hf, "jobs", "logs", job_id], stdout=sys.stdout, stderr=sys.stderr)
100
  try:
101
- time.sleep(60)
 
 
 
 
 
 
 
 
 
 
 
 
102
  finally:
103
- proc.terminate()
104
  try:
105
- proc.wait(timeout=5)
106
  except Exception:
107
- proc.kill()
108
  PY
109
  echo ""
110
  echo "Job status:"
@@ -154,9 +167,10 @@ JOB_ID_L2=$(echo "${JOB_OUTPUT_L2}" | awk '/Job ID:/ {print $3}')
154
  if [ -z "${JOB_ID_L2}" ]; then
155
  echo "Warning: Could not parse HF job id for L2. See /tmp/hf_image_job_l2.log"
156
  else
157
- echo "Following L2 job logs for 1 minute..."
158
  if [ -x "${HF_BIN}" ]; then
159
  HF_BIN_PATH="${HF_BIN}" JOB_ID="${JOB_ID_L2}" "${PY_BIN}" - <<'PY'
 
160
  import os
161
  import subprocess
162
  import sys
@@ -165,15 +179,27 @@ import time
165
  hf = os.environ["HF_BIN_PATH"]
166
  job_id = os.environ["JOB_ID"]
167
 
168
- proc = subprocess.Popen([hf, "jobs", "logs", job_id], stdout=sys.stdout, stderr=sys.stderr)
169
  try:
170
- time.sleep(60)
 
 
 
 
 
 
 
 
 
 
 
 
171
  finally:
172
- proc.terminate()
173
  try:
174
- proc.wait(timeout=5)
175
  except Exception:
176
- proc.kill()
177
  PY
178
  echo ""
179
  echo "L2 job status:"
 
85
  if [ -z "${JOB_ID}" ]; then
86
  echo "Warning: Could not parse HF job id. See /tmp/hf_image_job.log"
87
  else
88
+ echo "Following job logs until completion..."
89
  if [ -x "${HF_BIN}" ]; then
90
  HF_BIN_PATH="${HF_BIN}" JOB_ID="${JOB_ID}" "${PY_BIN}" - <<'PY'
91
+ import json
92
  import os
93
  import subprocess
94
  import sys
 
97
  hf = os.environ["HF_BIN_PATH"]
98
  job_id = os.environ["JOB_ID"]
99
 
100
+ log_proc = subprocess.Popen([hf, "jobs", "logs", job_id], stdout=sys.stdout, stderr=sys.stderr)
101
  try:
102
+ while True:
103
+ inspect = subprocess.run([hf, "jobs", "inspect", job_id], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, text=True)
104
+ if inspect.returncode == 0:
105
+ try:
106
+ data = json.loads(inspect.stdout)[0]
107
+ stage = (data.get("status") or {}).get("stage", "UNKNOWN")
108
+ except Exception:
109
+ stage = "UNKNOWN"
110
+ else:
111
+ stage = "UNKNOWN"
112
+ if stage in {"SUCCESS","SUCCEEDED","COMPLETED","DONE","FAILED","ERROR","CANCELED","CANCELLED"}:
113
+ break
114
+ time.sleep(15)
115
  finally:
116
+ log_proc.terminate()
117
  try:
118
+ log_proc.wait(timeout=5)
119
  except Exception:
120
+ log_proc.kill()
121
  PY
122
  echo ""
123
  echo "Job status:"
 
167
  if [ -z "${JOB_ID_L2}" ]; then
168
  echo "Warning: Could not parse HF job id for L2. See /tmp/hf_image_job_l2.log"
169
  else
170
+ echo "Following L2 job logs until completion..."
171
  if [ -x "${HF_BIN}" ]; then
172
  HF_BIN_PATH="${HF_BIN}" JOB_ID="${JOB_ID_L2}" "${PY_BIN}" - <<'PY'
173
+ import json
174
  import os
175
  import subprocess
176
  import sys
 
179
  hf = os.environ["HF_BIN_PATH"]
180
  job_id = os.environ["JOB_ID"]
181
 
182
+ log_proc = subprocess.Popen([hf, "jobs", "logs", job_id], stdout=sys.stdout, stderr=sys.stderr)
183
  try:
184
+ while True:
185
+ inspect = subprocess.run([hf, "jobs", "inspect", job_id], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, text=True)
186
+ if inspect.returncode == 0:
187
+ try:
188
+ data = json.loads(inspect.stdout)[0]
189
+ stage = (data.get("status") or {}).get("stage", "UNKNOWN")
190
+ except Exception:
191
+ stage = "UNKNOWN"
192
+ else:
193
+ stage = "UNKNOWN"
194
+ if stage in {"SUCCESS","SUCCEEDED","COMPLETED","DONE","FAILED","ERROR","CANCELED","CANCELLED"}:
195
+ break
196
+ time.sleep(15)
197
  finally:
198
+ log_proc.terminate()
199
  try:
200
+ log_proc.wait(timeout=5)
201
  except Exception:
202
+ log_proc.kill()
203
  PY
204
  echo ""
205
  echo "L2 job status:"