Spaces:

k-l-lambda
/

starry

Running

App Files Files Community

k-l-lambda commited on Feb 17

Commit

7b97a14

1 Parent(s): 197a18e

fix: run model download and ML predictors in background so services start immediately

Browse files

Files changed (1) hide show

docker-entrypoint.sh +82 -87

docker-entrypoint.sh CHANGED Viewed

@@ -118,107 +118,102 @@ hf_download() {
   fi
 }
-if [ -n "$HF_TOKEN" ] && [ -f "$MODELS_YAML" ]; then
-  echo "Downloading model files from HuggingFace..."
-  # Download bdtopo ONNX models
-  if [ -n "$BDTOPO_MODEL_PATH" ] && [ ! -f "$BDTOPO_MODEL_PATH" ]; then
-    _bdtopo_path=$(yaml_get bdtopo path)
-    _bdtopo_dir="$MODELS_BASE/starry-dist/$_bdtopo_path"
-    mkdir -p "$_bdtopo_dir"
-    sed -n '/^  files:/,/^[^ ]/{ /^ *- /p }' "$MODELS_YAML" | sed 's/^ *- *//' | while read -r FILE; do
-      hf_download "starry-dist" "$_bdtopo_path/$FILE"
-    done
-  fi
-  # Download PyTorch model directories (starry-dist)
-  for model_var in LAYOUT_MODEL_PATH MASK_MODEL_PATH SEMANTIC_MODEL_PATH GAUGE_MODEL_PATH; do
-    eval model_path=\$$model_var
-    if [ -n "$model_path" ]; then
-      # Download .state.yaml which lists the actual model files
-      hf_download "starry-dist" "$model_path/.state.yaml"
-      state_yaml="$MODELS_BASE/starry-dist/$model_path/.state.yaml"
-      if [ -f "$state_yaml" ]; then
-        # Extract model filename from .state.yaml
-        model_file=$(sed -n 's/^file: *//p' "$state_yaml" | head -1)
-        if [ -n "$model_file" ]; then
-          hf_download "starry-dist" "$model_path/$model_file"
-        fi
-        # Extract sub-model files if present (semantic clusters)
-        sed -n '/^subs:/,/^[a-z]/{ /^  - /p }' "$state_yaml" | sed 's/^ *- *//' | while read -r sub; do
-          if [ -n "$sub" ]; then
-            hf_download "starry-dist" "$model_path/$sub/.state.yaml"
-            sub_state="$MODELS_BASE/starry-dist/$model_path/$sub/.state.yaml"
-            if [ -f "$sub_state" ]; then
-              sub_file=$(sed -n 's/^file: *//p' "$sub_state" | head -1)
-              if [ -n "$sub_file" ]; then
-                hf_download "starry-dist" "$model_path/$sub/$sub_file"
               fi
             fi
-          fi
-        done
       fi
-    fi
-  done
-  # Download OCR/TF models (ocr-dist) — these use config YAML files
-  for config_var in LOC_MODEL_PATH OCR_CONFIG BRACKETS_CONFIG; do
-    eval config_path=\$$config_var
-    if [ -n "$config_path" ]; then
-      # For loc, the path is a directory; for ocr/brackets, it's a YAML config
-      if echo "$config_path" | grep -q '\.yaml$\|\.yml$'; then
-        # Download the config YAML
-        hf_download "ocr-dist" "$config_path"
-        config_file="$MODELS_BASE/ocr-dist/$config_path"
-        if [ -f "$config_file" ]; then
-          # Extract model paths from the config
-          sed -n 's/^ *model_path: *//p' "$config_file" | while read -r mpath; do
-            if [ -n "$mpath" ]; then
-              # Download all files in the model directory
-              _dir=$(dirname "$config_path")
-              hf_download "ocr-dist" "$_dir/$mpath"
             fi
-          done
-        fi
-      else
-        # Directory-based model (loc) — download .state.yaml and model file
-        hf_download "ocr-dist" "$config_path/.state.yaml"
-        state_yaml="$MODELS_BASE/ocr-dist/$config_path/.state.yaml"
-        if [ -f "$state_yaml" ]; then
-          model_file=$(sed -n 's/^file: *//p' "$state_yaml" | head -1)
-          if [ -n "$model_file" ]; then
-            hf_download "ocr-dist" "$config_path/$model_file"
           fi
         fi
       fi
-    fi
-  done
-  echo "Model download complete."
-fi
-# ── Set predictor addresses (internal ZMQ) ──
-export PREDICTOR_LAYOUT="${PREDICTOR_LAYOUT:-tcp://127.0.0.1:12022}"
-export PREDICTOR_GAUGE="${PREDICTOR_GAUGE:-tcp://127.0.0.1:12023}"
-export PREDICTOR_GAUGE_RENDERER="${PREDICTOR_GAUGE_RENDERER:-tcp://127.0.0.1:15656}"
-export PREDICTOR_MASK="${PREDICTOR_MASK:-tcp://127.0.0.1:12024}"
-export PREDICTOR_SEMANTIC="${PREDICTOR_SEMANTIC:-tcp://127.0.0.1:12025}"
-export PREDICTOR_LOC="${PREDICTOR_LOC:-tcp://127.0.0.1:12026}"
-export PREDICTOR_OCR="${PREDICTOR_OCR:-tcp://127.0.0.1:12027}"
-export PREDICTOR_BRACKETS="${PREDICTOR_BRACKETS:-tcp://127.0.0.1:12028}"
-# ── Start Python ML predictors via supervisord ──
-if [ -f /home/node/app/supervisord.conf ] && [ -d /home/node/app/backend/python-services ]; then
-  if [ -n "$LAYOUT_MODEL_PATH" ]; then
-    echo "Starting Python ML predictors via supervisord..."
-    supervisord -c /home/node/app/supervisord.conf &
-    sleep 2
-  else
-    echo "Skipping ML predictors (no model paths configured)."
   fi
-fi
 # ── Run database migrations ──
 echo 'Running database migrations...'

   fi
 }
+# ── Set predictor addresses (internal ZMQ) ──
+export PREDICTOR_LAYOUT="${PREDICTOR_LAYOUT:-tcp://127.0.0.1:12022}"
+export PREDICTOR_GAUGE="${PREDICTOR_GAUGE:-tcp://127.0.0.1:12023}"
+export PREDICTOR_GAUGE_RENDERER="${PREDICTOR_GAUGE_RENDERER:-tcp://127.0.0.1:15656}"
+export PREDICTOR_MASK="${PREDICTOR_MASK:-tcp://127.0.0.1:12024}"
+export PREDICTOR_SEMANTIC="${PREDICTOR_SEMANTIC:-tcp://127.0.0.1:12025}"
+export PREDICTOR_LOC="${PREDICTOR_LOC:-tcp://127.0.0.1:12026}"
+export PREDICTOR_OCR="${PREDICTOR_OCR:-tcp://127.0.0.1:12027}"
+export PREDICTOR_BRACKETS="${PREDICTOR_BRACKETS:-tcp://127.0.0.1:12028}"
+# ── Download models & start ML predictors (background) ──
+# Runs in background so Node services and nginx can start immediately.
+(
+  if [ -n "$HF_TOKEN" ] && [ -f "$MODELS_YAML" ]; then
+    echo "Downloading model files from HuggingFace (background)..."
+    # Download bdtopo ONNX models
+    if [ -n "$BDTOPO_MODEL_PATH" ] && [ ! -f "$BDTOPO_MODEL_PATH" ]; then
+      _bdtopo_path=$(yaml_get bdtopo path)
+      _bdtopo_dir="$MODELS_BASE/starry-dist/$_bdtopo_path"
+      mkdir -p "$_bdtopo_dir"
+      sed -n '/^  files:/,/^[^ ]/{ /^ *- /p }' "$MODELS_YAML" | sed 's/^ *- *//' | while read -r FILE; do
+        hf_download "starry-dist" "$_bdtopo_path/$FILE"
+      done
+    fi
+    # Download PyTorch model directories (starry-dist)
+    for model_var in LAYOUT_MODEL_PATH MASK_MODEL_PATH SEMANTIC_MODEL_PATH GAUGE_MODEL_PATH; do
+      eval model_path=\$$model_var
+      if [ -n "$model_path" ]; then
+        hf_download "starry-dist" "$model_path/.state.yaml"
+        state_yaml="$MODELS_BASE/starry-dist/$model_path/.state.yaml"
+        if [ -f "$state_yaml" ]; then
+          model_file=$(sed -n 's/^file: *//p' "$state_yaml" | head -1)
+          if [ -n "$model_file" ]; then
+            hf_download "starry-dist" "$model_path/$model_file"
+          fi
+          sed -n '/^subs:/,/^[a-z]/{ /^  - /p }' "$state_yaml" | sed 's/^ *- *//' | while read -r sub; do
+            if [ -n "$sub" ]; then
+              hf_download "starry-dist" "$model_path/$sub/.state.yaml"
+              sub_state="$MODELS_BASE/starry-dist/$model_path/$sub/.state.yaml"
+              if [ -f "$sub_state" ]; then
+                sub_file=$(sed -n 's/^file: *//p' "$sub_state" | head -1)
+                if [ -n "$sub_file" ]; then
+                  hf_download "starry-dist" "$model_path/$sub/$sub_file"
+                fi
               fi
             fi
+          done
+        fi
       fi
+    done
+    # Download OCR/TF models (ocr-dist)
+    for config_var in LOC_MODEL_PATH OCR_CONFIG BRACKETS_CONFIG; do
+      eval config_path=\$$config_var
+      if [ -n "$config_path" ]; then
+        if echo "$config_path" | grep -q '\.yaml$\|\.yml$'; then
+          hf_download "ocr-dist" "$config_path"
+          config_file="$MODELS_BASE/ocr-dist/$config_path"
+          if [ -f "$config_file" ]; then
+            sed -n 's/^ *model_path: *//p' "$config_file" | while read -r mpath; do
+              if [ -n "$mpath" ]; then
+                _dir=$(dirname "$config_path")
+                hf_download "ocr-dist" "$_dir/$mpath"
+              fi
+            done
+          fi
+        else
+          hf_download "ocr-dist" "$config_path/.state.yaml"
+          state_yaml="$MODELS_BASE/ocr-dist/$config_path/.state.yaml"
+          if [ -f "$state_yaml" ]; then
+            model_file=$(sed -n 's/^file: *//p' "$state_yaml" | head -1)
+            if [ -n "$model_file" ]; then
+              hf_download "ocr-dist" "$config_path/$model_file"
             fi
           fi
         fi
       fi
+    done
+    echo "Model download complete."
+  fi
+  # Start ML predictors after models are downloaded
+  if [ -f /home/node/app/supervisord.conf ] && [ -d /home/node/app/backend/python-services ]; then
+    if [ -n "$LAYOUT_MODEL_PATH" ]; then
+      echo "Starting Python ML predictors via supervisord..."
+      supervisord -c /home/node/app/supervisord.conf
+    else
+      echo "Skipping ML predictors (no model paths configured)."
+    fi
   fi
+) &
 # ── Run database migrations ──
 echo 'Running database migrations...'