File size: 1,893 Bytes
50ebd92
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
#!/bin/bash
# Fix partial 550 driver install: remove conflicting 530 package, install 550 stack, reconfigure.
# Run from repo root:  sudo bash scripts/fix_nvidia_550_install.sh

set -e
echo "=== Fixing NVIDIA 550 driver install ==="

# 1. Remove the package that blocked nvidia-kernel-common-550-server (file conflict on /usr/bin/nvidia-powerd)
echo "Removing nvidia-compute-utils-530..."
if dpkg -l nvidia-compute-utils-530 &>/dev/null; then
  apt remove -y nvidia-compute-utils-530 2>/dev/null || \
  apt remove -y --allow-change-held-packages nvidia-compute-utils-530 2>/dev/null || \
  { echo "  Forcing removal with dpkg (apt is in broken state)..."; dpkg --remove --force-depends nvidia-compute-utils-530; }
else
  echo "  (already removed)"
fi

# 2. Remove all remaining 530 packages that block 550 (broken deps or conflicts)
echo "Removing remaining 530 packages..."
for pkg in nvidia-fabricmanager-530 libnvidia-decode-530 libnvidia-encode-530 nvidia-utils-530 xserver-xorg-video-nvidia-530 \
  libnvidia-cfg1-530 libnvidia-compute-530 libnvidia-gl-530 libnvidia-common-530 libnvidia-extra-530 libnvidia-fbc1-530; do
  if dpkg -l "$pkg" 2>/dev/null | grep -q ^ii; then
    echo "  Removing $pkg..."
    apt remove -y --allow-change-held-packages "$pkg" 2>/dev/null || dpkg --remove --force-depends "$pkg" 2>/dev/null || true
  fi
done

# 3. Install 550 user-space stack
echo "Installing 550 user-space..."
apt --fix-broken install -y 2>/dev/null || true
apt install -y --allow-change-held-packages nvidia-headless-550-server nvidia-fabricmanager-550 2>/dev/null || \
apt install -y --allow-change-held-packages nvidia-headless-no-dkms-550-server nvidia-fabricmanager-550

# 4. Finish configuring any half-configured packages
echo "Configuring any pending packages..."
dpkg --configure -a

echo "Done. Reboot then run: nvidia-smi && bash scripts/try_nccl_8gpu.sh"
echo "  sudo reboot"