| CUDA_VISIBLE_DEVICES=0,1 \ | |
| swift deploy \ | |
| --model Qwen/Qwen3-8B \ | |
| --infer_backend sglang \ | |
| --max_new_tokens 2048 \ | |
| --sglang_context_length 8192 \ | |
| --sglang_tp_size 2 \ | |
| --served_model_name Qwen3-8B | |
| # After the server-side deployment above is successful, use the command below to perform a client call test. | |
| # curl http://localhost:8000/v1/chat/completions \ | |
| # -H "Content-Type: application/json" \ | |
| # -d '{ | |
| # "model": "Qwen3-8B", | |
| # "messages": [{"role": "user", "content": "What is your name?"}], | |
| # "temperature": 0 | |
| # }' | |