File size: 1,889 Bytes
2ff6c5a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#!/bin/bash
set -x

# Reinstall torch 2.6.0 (without upgrading flash_attn)
pip install torch==2.6.0 --no-deps \
    -i https://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com \
    2>&1 | tail -5

# Reinstall vllm 0.8.3 without deps (already has deps)
pip install vllm==0.8.3 --no-deps \
    -i https://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com \
    2>&1 | tail -5

# Reinstall verl 0.7.0
pip install verl==0.7.0 --no-deps \
    -i https://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com \
    2>&1 | tail -5

# Reinstall xformers for vllm
pip install xformers==0.0.29.post2 --no-deps \
    -i https://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com \
    2>&1 | tail -5

# Also need compatible NVIDIA packages for torch 2.6
pip install 'nvidia-cublas-cu12==12.4.5.8' 'nvidia-cudnn-cu12==9.1.0.70' 'nvidia-nccl-cu12==2.21.5' \
    'nvidia-cusolver-cu12==11.6.1.9' 'nvidia-cusparse-cu12==12.3.1.170' \
    'nvidia-cufft-cu12==11.2.1.3' 'nvidia-nvtx-cu12==12.4.127' \
    'nvidia-nvjitlink-cu12==12.4.127' 'nvidia-cuda-runtime-cu12==12.4.127' \
    'nvidia-cuda-cupti-cu12==12.4.127' 'nvidia-cuda-nvrtc-cu12==12.4.127' \
    'triton==3.2.0' \
    --no-deps \
    -i https://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com \
    2>&1 | tail -5

echo ""
echo "=== Verification ==="
python3 << 'PYEOF'
import torch
print(f"torch: {torch.__version__}")
print(f"CUDA: {torch.version.cuda}")
print(f"CXX11_ABI: {torch._C._GLIBCXX_USE_CXX11_ABI}")

try:
    from flash_attn import flash_attn_func
    import flash_attn
    print(f"flash_attn: {flash_attn.__version__} - OK")
except Exception as e:
    print(f"flash_attn ERROR: {e}")

import vllm
print(f"vLLM: {vllm.__version__}")

import transformers
print(f"transformers: {transformers.__version__}")

print("ALL_OK")
PYEOF

echo "REINSTALL_DONE"