Ark-kun commited on
Commit
7b3d004
·
verified ·
1 Parent(s): 9dca78f

fix: MT - Fixed start_all_active_tenant_orchestrators to handle orchestrator startup errors

Browse files
huggingface_overlay/start_HuggingFace_multi_tenant.py CHANGED
@@ -649,12 +649,19 @@ def start_all_active_tenant_orchestrators():
649
  for tenant_row in session.scalars(
650
  sqlalchemy.select(TenantRow).where(TenantRow.orchestrator_active)
651
  ):
652
- # TODO: Respect the orchestrator_config
653
- _ = get_or_start_orchestrator(
654
- tenant_id=tenant_row.id,
655
- tenant_namespace=tenant_row.name,
656
- tenant_token=tenant_row.access_token,
657
- )
 
 
 
 
 
 
 
658
 
659
 
660
  # region: API Server initialization
 
649
  for tenant_row in session.scalars(
650
  sqlalchemy.select(TenantRow).where(TenantRow.orchestrator_active)
651
  ):
652
+ try:
653
+ # TODO: Respect the orchestrator_config
654
+ _ = get_or_start_orchestrator(
655
+ tenant_id=tenant_row.id,
656
+ tenant_namespace=tenant_row.name,
657
+ tenant_token=tenant_row.access_token,
658
+ )
659
+ except Exception as ex,
660
+ logger.exception(
661
+ f"start_all_active_tenant_orchestrators: Error starting orchestrator for {tenant_row.id=}. Marking the orchestrator as inactive."
662
+ )
663
+ tenant_row.orchestrator_active = False
664
+ session.commit()
665
 
666
 
667
  # region: API Server initialization