| apiVersion: leaderworkerset.x-k8s.io/v1 |
| kind: LeaderWorkerSet |
| metadata: |
| name: deepseekr10528-decode-main |
| spec: |
| leaderWorkerTemplate: |
| leaderTemplate: |
| metadata: |
| labels: |
| role: leader |
| spec: |
| containers: |
| - command: |
| - python3 |
| - -m |
| - sglang.launch_server |
| - --port |
| - "30000" |
| - --host |
| - "0.0.0.0" |
| - --model-path |
| - /work/models |
| - --chunked-prefill-size |
| - "262144" |
| - --page-size |
| - "64" |
| - --enable-dp-attention |
| - --enable-dp-lm-head |
| - --dp-size |
| - "16" |
| - --moe-a2a-backend |
| - deepep |
| - --disaggregation-mode |
| - decode |
| - --mem-fraction-static |
| - "0.849" |
| - --context-length |
| - "32768" |
| - --disaggregation-ib-device |
| - "mlx5_bond_0,mlx5_bond_1,mlx5_bond_2,mlx5_bond_3" |
| - --cuda-graph-max-bs |
| - "64" |
| - --max-running-requests |
| - "2048" |
| - --tp-size |
| - "16" |
| - --dist-init-addr |
| - $(LWS_LEADER_ADDRESS):20102 |
| - --nnodes |
| - $(LWS_GROUP_SIZE) |
| - --node-rank |
| - $(LWS_WORKER_INDEX) |
| - --trust-remote-code |
| - --ep-num-redundant-experts |
| - "32" |
| - --moe-dense-tp-size |
| - "1" |
| env: |
| - name: CUDA_LAUNCH_BLOCKING |
| value: "0" |
| - name: NVSHMEM_IB_GID_INDEX |
| value: "3" |
| - name: NVSHMEM_ENABLE_NIC_PE_MAPPING |
| value: "1" |
| - name: NVSHMEM_HCA_PE_MAPPING |
| value: "mlx5_bond_0:1:2,mlx5_bond_1:1:2,mlx5_bond_2:1:2,mlx5_bond_3:1:2" |
| - name: NCCL_IB_QPS_PER_CONNECTION |
| value: "8" |
| - name: NCCL_IB_SPLIT_DATA_ON_QPS |
| value: "1" |
| - name: NCCL_NET_PLUGIN |
| value: "none" |
| - name: NCCL_IB_TC |
| value: "136" |
| - name: NCCL_MIN_NCHANNELS |
| value: "4" |
| - name: NCCL_IB_SL |
| value: "5" |
| - name: MC_TE_METRIC |
| value: "true" |
| - name: SGLANG_MOONCAKE_TRANS_THREAD |
| value: "16" |
| - name: SGLANG_ENABLE_JIT_DEEPGEMM |
| value: "1" |
| - name: NCCL_IB_HCA |
| value: ^=mlx5_0,mlx5_5,mlx5_6 |
| - name: LWS_WORKER_INDEX |
| valueFrom: |
| fieldRef: |
| fieldPath: metadata.labels['leaderworkerset.sigs.k8s.io/worker-index'] |
| image: lmsysorg/sglang:latest |
| name: sglang-leader |
| ports: |
| - containerPort: 30000 |
| protocol: TCP |
| readinessProbe: |
| periodSeconds: 30 |
| tcpSocket: |
| port: 30000 |
| resources: |
| limits: |
| nvidia.com/gpu: "8" |
| securityContext: |
| capabilities: |
| add: |
| - IPC_LOCK |
| privileged: true |
| volumeMounts: |
| - mountPath: /root/.cache |
| name: sgl-cache |
| - mountPath: /dev/shm |
| name: dshm |
| - mountPath: /work/models |
| name: model |
| - mountPath: /dev/infiniband |
| name: ib |
| - mountPath: /sgl-workspace/sglang/python/sglang/srt/layers/moe/fused_moe_triton/configs |
| name: cf |
| dnsPolicy: ClusterFirstWithHostNet |
| hostIPC: true |
| hostNetwork: true |
| nodeSelector: |
| |
| pd: "yes" |
| tolerations: |
| |
| - key: bopd |
| operator: Exists |
| - key: node-role |
| operator: Exists |
| volumes: |
| - hostPath: |
| path: /data1/sgl_cache1 |
| type: DirectoryOrCreate |
| name: sgl-cache |
| - emptyDir: |
| medium: Memory |
| name: dshm |
| - hostPath: |
| path: /data1/maas_hosted_models/models/DeepSeek-R1-0528/deepseek_r1_0528 |
| name: model |
| - hostPath: |
| path: /dev/infiniband |
| name: ib |
| - hostPath: |
| path: /data1/maas_hosted_models/models/fused_moe_triton/configs |
| name: cf |
| restartPolicy: RecreateGroupOnPodRestart |
| size: 2 |
| workerTemplate: |
| metadata: {} |
| spec: |
| containers: |
| - command: |
| - python3 |
| - -m |
| - sglang.launch_server |
| - --model-path |
| - /work/models |
| - --chunked-prefill-size |
| - "262144" |
| - --page-size |
| - "64" |
| - --enable-dp-attention |
| - --enable-dp-lm-head |
| - --dp-size |
| - "16" |
| - --moe-a2a-backend |
| - deepep |
| - --disaggregation-mode |
| - decode |
| - --mem-fraction-static |
| - "0.849" |
| - --context-length |
| - "32768" |
| - --disaggregation-ib-device |
| - "mlx5_bond_0,mlx5_bond_1,mlx5_bond_2,mlx5_bond_3" |
| - --cuda-graph-max-bs |
| - "64" |
| - --max-running-requests |
| - "2048" |
| - --tp-size |
| - "16" |
| - --dist-init-addr |
| - $(LWS_LEADER_ADDRESS):20102 |
| - --nnodes |
| - $(LWS_GROUP_SIZE) |
| - --node-rank |
| - $(LWS_WORKER_INDEX) |
| - --trust-remote-code |
| - --ep-num-redundant-experts |
| - "32" |
| - --moe-dense-tp-size |
| - "1" |
| env: |
| - name: NVSHMEM_IB_TRAFFIC_CLASS |
| value: "16" |
| - name: NVSHMEM_IB_GID_INDEX |
| value: "3" |
| - name: NVSHMEM_ENABLE_NIC_PE_MAPPING |
| value: "1" |
| - name: NVSHMEM_HCA_PE_MAPPING |
| value: "mlx5_bond_0:1:2,mlx5_bond_1:1:2,mlx5_bond_2:1:2,mlx5_bond_3:1:2" |
| - name: NCCL_IB_QPS_PER_CONNECTION |
| value: "8" |
| - name: NCCL_IB_SPLIT_DATA_ON_QPS |
| value: "1" |
| - name: NCCL_NET_PLUGIN |
| value: "none" |
| - name: NCCL_IB_TC |
| value: "136" |
| - name: NCCL_MIN_NCHANNELS |
| value: "4" |
| - name: MC_TE_METRIC |
| value: "true" |
| - name: NCCL_IB_SL |
| value: "5" |
| - name: SGLANG_MOONCAKE_TRANS_THREAD |
| value: "16" |
| - name: SGLANG_ENABLE_JIT_DEEPGEMM |
| value: "1" |
| - name: NCCL_IB_HCA |
| value: ^=mlx5_0,mlx5_5,mlx5_6 |
| - name: LWS_WORKER_INDEX |
| valueFrom: |
| fieldRef: |
| fieldPath: metadata.labels['leaderworkerset.sigs.k8s.io/worker-index'] |
| image: lmsysorg/sglang:latest |
| name: sglang-worker |
| ports: |
| - containerPort: 30001 |
| resources: |
| limits: |
| nvidia.com/gpu: "8" |
| securityContext: |
| capabilities: |
| add: |
| - IPC_LOCK |
| privileged: true |
| volumeMounts: |
| - mountPath: /root/.cache |
| name: sgl-cache |
| - mountPath: /dev/shm |
| name: dshm |
| - mountPath: /work/models |
| name: model |
| - mountPath: /dev/infiniband |
| name: ib |
| - mountPath: /sgl-workspace/sglang/python/sglang/srt/layers/moe/fused_moe_triton/configs |
| name: cf |
| dnsPolicy: ClusterFirstWithHostNet |
| hostIPC: true |
| hostNetwork: true |
| nodeSelector: |
| |
| pd: "yes" |
| tolerations: |
| |
| - key: bopd |
| operator: Exists |
| - key: node-role |
| operator: Exists |
| volumes: |
| - hostPath: |
| path: /data1/sgl_cache1 |
| type: DirectoryOrCreate |
| name: sgl-cache |
| - emptyDir: |
| medium: Memory |
| name: dshm |
| - hostPath: |
| path: /dev/infiniband |
| name: ib |
| - hostPath: |
| |
| path: /data1/maas_hosted_models/models/DeepSeek-R1-0528/deepseek_r1_0528 |
| name: model |
| - hostPath: |
| |
| path: /data1/maas_hosted_models/models/fused_moe_triton/configs |
| name: cf |
| networkConfig: |
| subdomainPolicy: Shared |
| replicas: 1 |
| rolloutStrategy: |
| rollingUpdateConfiguration: |
| maxSurge: 0 |
| maxUnavailable: 1 |
| type: RollingUpdate |
| startupPolicy: LeaderCreated |
|
|