nanochat-eos / scripts /fix_nvidia_550_install.sh
ksjpswaroop's picture
Upload folder using huggingface_hub
50ebd92 verified
#!/bin/bash
# Fix partial 550 driver install: remove conflicting 530 package, install 550 stack, reconfigure.
# Run from repo root: sudo bash scripts/fix_nvidia_550_install.sh
set -e
echo "=== Fixing NVIDIA 550 driver install ==="
# 1. Remove the package that blocked nvidia-kernel-common-550-server (file conflict on /usr/bin/nvidia-powerd)
echo "Removing nvidia-compute-utils-530..."
if dpkg -l nvidia-compute-utils-530 &>/dev/null; then
apt remove -y nvidia-compute-utils-530 2>/dev/null || \
apt remove -y --allow-change-held-packages nvidia-compute-utils-530 2>/dev/null || \
{ echo " Forcing removal with dpkg (apt is in broken state)..."; dpkg --remove --force-depends nvidia-compute-utils-530; }
else
echo " (already removed)"
fi
# 2. Remove all remaining 530 packages that block 550 (broken deps or conflicts)
echo "Removing remaining 530 packages..."
for pkg in nvidia-fabricmanager-530 libnvidia-decode-530 libnvidia-encode-530 nvidia-utils-530 xserver-xorg-video-nvidia-530 \
libnvidia-cfg1-530 libnvidia-compute-530 libnvidia-gl-530 libnvidia-common-530 libnvidia-extra-530 libnvidia-fbc1-530; do
if dpkg -l "$pkg" 2>/dev/null | grep -q ^ii; then
echo " Removing $pkg..."
apt remove -y --allow-change-held-packages "$pkg" 2>/dev/null || dpkg --remove --force-depends "$pkg" 2>/dev/null || true
fi
done
# 3. Install 550 user-space stack
echo "Installing 550 user-space..."
apt --fix-broken install -y 2>/dev/null || true
apt install -y --allow-change-held-packages nvidia-headless-550-server nvidia-fabricmanager-550 2>/dev/null || \
apt install -y --allow-change-held-packages nvidia-headless-no-dkms-550-server nvidia-fabricmanager-550
# 4. Finish configuring any half-configured packages
echo "Configuring any pending packages..."
dpkg --configure -a
echo "Done. Reboot then run: nvidia-smi && bash scripts/try_nccl_8gpu.sh"
echo " sudo reboot"