| CUDA_VISIBLE_DEVICES=0 swift deploy \ | |
| --model Qwen/Qwen2.5-7B-Instruct \ | |
| --infer_backend vllm \ | |
| --served_model_name Qwen2.5-7B-Instruct | |
| # After the server-side deployment above is successful, use the command below to perform a client call test. | |
| # curl http://localhost:8000/v1/chat/completions \ | |
| # -H "Content-Type: application/json" \ | |
| # -d '{ | |
| # "model": "Qwen2.5-7B-Instruct", | |
| # "messages": [{"role": "user", "content": "What is your name?"}], | |
| # "temperature": 0 | |
| # }' | |