tao-shen Claude Opus 4.6 commited on
Commit
80c4d9b
·
1 Parent(s): 980afb9

fix: start nginx first, restore in background

Browse files

snapshot_download of 7000+ files takes too long before port 7860
opens, causing HF startup timeout (RUNTIME_ERROR).

Fix: start all services + nginx immediately, then run restore
(download + rsync) in a background thread.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (1) hide show
  1. entrypoint.py +22 -14
entrypoint.py CHANGED
@@ -498,8 +498,6 @@ def main():
498
  resolve_config()
499
  system_info()
500
  ensure_dataset_repo()
501
- restore_state()
502
- restore_packages()
503
  ensure_passwords()
504
 
505
  # Write env for other processes
@@ -508,29 +506,39 @@ def main():
508
  f.write(f'export HF_DATASET_REPO="{HF_DATASET_REPO}"\n')
509
  f.write(f'export PERSIST_PATH="{PERSIST_PATH}"\n')
510
 
511
- # Start services
512
  start_sshd()
513
  start_ws_bridge()
514
  start_ttyd()
515
  start_log_streamer()
516
 
517
- # Background threads
518
- threading.Thread(target=sync_loop, daemon=True).start()
519
- threading.Thread(target=heartbeat_loop, daemon=True).start()
 
 
520
 
521
  log("========================================")
522
- log("system ready")
523
  log(f" Terminal: https://<space>.hf.space/")
524
  log(f" Logs: https://<space>.hf.space/runlog")
525
- log(f" SSE: https://<space>.hf.space/runlog/stream")
526
  log("========================================")
527
 
528
- # Start nginx as subprocess (NOT exec we need threads to stay alive)
529
- log("starting nginx on 0.0.0.0:7860 ...")
530
- nginx_proc = subprocess.Popen(
531
- ["nginx", "-c", "/etc/nginx/nginx.conf", "-g", "daemon off;"]
532
- )
533
- log(f"[ OK ] nginx PID={nginx_proc.pid}")
 
 
 
 
 
 
 
 
 
534
 
535
  # Final save on SIGTERM
536
  def on_sigterm(sig, frame):
 
498
  resolve_config()
499
  system_info()
500
  ensure_dataset_repo()
 
 
501
  ensure_passwords()
502
 
503
  # Write env for other processes
 
506
  f.write(f'export HF_DATASET_REPO="{HF_DATASET_REPO}"\n')
507
  f.write(f'export PERSIST_PATH="{PERSIST_PATH}"\n')
508
 
509
+ # Start services + nginx FIRST (open port 7860 fast to avoid HF timeout)
510
  start_sshd()
511
  start_ws_bridge()
512
  start_ttyd()
513
  start_log_streamer()
514
 
515
+ log("starting nginx on 0.0.0.0:7860 ...")
516
+ nginx_proc = subprocess.Popen(
517
+ ["nginx", "-c", "/etc/nginx/nginx.conf", "-g", "daemon off;"]
518
+ )
519
+ log(f"[ OK ] nginx PID={nginx_proc.pid}")
520
 
521
  log("========================================")
522
+ log("system ready (restore runs in background)")
523
  log(f" Terminal: https://<space>.hf.space/")
524
  log(f" Logs: https://<space>.hf.space/runlog")
 
525
  log("========================================")
526
 
527
+ # Restore in background (download + rsync can take minutes for large datasets)
528
+ def background_restore():
529
+ try:
530
+ restore_state()
531
+ restore_packages()
532
+ ensure_passwords() # re-ensure after restore
533
+ log("── background restore complete ──")
534
+ except Exception as e:
535
+ log(f"── background restore error: {e} ──")
536
+
537
+ threading.Thread(target=background_restore, daemon=True).start()
538
+
539
+ # Background threads
540
+ threading.Thread(target=sync_loop, daemon=True).start()
541
+ threading.Thread(target=heartbeat_loop, daemon=True).start()
542
 
543
  # Final save on SIGTERM
544
  def on_sigterm(sig, frame):