#!/bin/bash set -e echo "🔧 PostBuild starting — optimizing CT-Chat Space..." # ------------------------------------------------------- # 1️⃣ Fix dependency mismatches (Gradio & Websockets) # ------------------------------------------------------- pip install --force-reinstall --no-cache-dir "websockets>=12" "gradio-client>=1.3.0" # ------------------------------------------------------- # 2️⃣ Create and register shared NLTK data directory # ------------------------------------------------------- echo "📁 Preparing shared NLTK data directory..." export NLTK_DATA="/usr/local/share/nltk_data" mkdir -p $NLTK_DATA chmod -R 777 $NLTK_DATA # ------------------------------------------------------- # 3️⃣ Preload all required NLTK resources (including punkt_tab) # ------------------------------------------------------- echo "📦 Downloading NLTK resources..." python -m nltk.downloader -d $NLTK_DATA \ punkt punkt_tab averaged_perceptron_tagger averaged_perceptron_tagger_eng stopwords wordnet omw-1.4 # ------------------------------------------------------- # 4️⃣ Verify NLTK installs and paths # ------------------------------------------------------- python - <<'PYCODE' import nltk, os print(f"NLTK data path → {nltk.data.path}") for pkg in ["punkt", "punkt_tab", "averaged_perceptron_tagger_eng", "stopwords", "wordnet"]: try: nltk.data.find(pkg) print(f"✅ Verified NLTK resource: {pkg}") except LookupError: print(f"⚠️ Missing NLTK resource: {pkg}") PYCODE # ------------------------------------------------------- # 5️⃣ Clean caches (stay <50GB) # ------------------------------------------------------- echo "🧹 Cleaning Hugging Face + Torch caches..." rm -rf /root/.cache/* || true rm -rf /home/user/.cache/* || true rm -rf /usr/local/share/nltk_data/taggers/__pycache__ || true rm -rf /home/user/app/hf_cache/* || true rm -rf /home/user/app/logs/* || true # ------------------------------------------------------- # 6️⃣ Ensure writable temporary cache for runtime # ------------------------------------------------------- echo "📦 Preparing /tmp/hf_cache..." mkdir -p /tmp/hf_cache chmod -R 777 /tmp/hf_cache # ------------------------------------------------------- # ✅ Done # ------------------------------------------------------- echo "✅ PostBuild completed successfully — NLTK preloaded (punkt_tab OK), cache ready at /tmp/hf_cache."