#!/usr/bin/env bash set -euo pipefail echo "🚀 Builder completo — FlashAttention LayerNorm, Apex, Q8, FlashAttention (GitHub) + upload" # ===== ConfiguraçÔes e diretĂłrios ===== APP_WHEELS="/app/wheels" APP_CUDA_CACHE="/app/cuda_cache" SRC_DIR="$APP_WHEELS/src" mkdir -p "$APP_WHEELS" "$APP_CUDA_CACHE" "$SRC_DIR" chmod -R 777 "$APP_WHEELS" || true export CUDA_CACHE_PATH="$APP_CUDA_CACHE" # Preserva licença NGC (se existir) if [ -f "/NGC-DL-CONTAINER-LICENSE" ]; then cp -f /NGC-DL-CONTAINER-LICENSE "$APP_WHEELS/" || true fi # ===== DependĂȘncias mĂ­nimas ===== python -m pip install -v -U \ pip build setuptools wheel hatchling hatch-vcs \ scikit-build-core cmake ninja packaging \ "huggingface_hub[hf_transfer]" || true # ===== Tags de ambiente (Python/CUDA/Torch) ===== PY_TAG="$(python -c 'import sys; print(f"cp{sys.version_info[0]}{sys.version_info[1]}")' 2>/dev/null || echo cp310)" TORCH_VER="$(python - <<'PY' try; import torch, re; v=torch.__version__; print(re.sub(r'\+.*$', '', v)) except; print("unknown") PY )" CU_TAG="$(python - <<'PY' try; import torch; cu=getattr(torch.version,"cuda",None); echo="cu"+cu.replace(".","") if cu else ""; print(echo) except; print("") PY )" echo "[env] PY_TAG=$PY_TAG TORCH_VER=$TORCH_VER CU_TAG=$CU_TAG" # ===== FunçÔes de checagem ===== check_flash_layer_norm_bin() { python - <<'PY' import importlib, sys modules = [ "dropout_layer_norm", "flash_attn.ops.layer_norm", "flash_attn.ops.rms_norm", ] for m in modules: try: importlib.import_module(m); sys.exit(0) except: pass sys.exit(1) PY } check_apex() { python - <<'PY' import sys try: from apex.normalization import FusedLayerNorm import importlib; importlib.import_module("fused_layer_norm_cuda") sys.exit(0) except: sys.exit(1) PY } check_q8() { python - <<'PY' import importlib.util, sys spec = importlib.util.find_spec("ltx_q8_kernels") or importlib.util.find_spec("q8_kernels") sys.exit(0 if spec else 1) PY } # ===== Helpers Hugging Face ===== install_from_hf_by_prefix() { local PREFIX="$1" python - <<'PY' || return 1 import os, sys, re from huggingface_hub import HfApi, hf_hub_download, HfFolder repo = os.getenv("SELF_HF_REPO_ID","eeuuia/Tmp") token = os.getenv("HF_TOKEN") or HfFolder.get_token() api = HfApi(token=token) files = api.list_repo_files(repo_id=repo, repo_type="model") cands = [f for f in files if f.endswith(".whl") and "/${PREFIX}-" in f and "${PY_TAG}" in f] pref = [f for f in cands if "${CU_TAG}" in f] or cands if not pref: sys.exit(1) target = sorted(pref, reverse=True)[0] hf_hub_download(repo_id=repo, filename=target, repo_type="model", local_dir="$APP_WHEELS") print(target) PY } # ===== Build functions ===== build_flash_layer_norm() { echo "=== FlashAttn LayerNorm ===" if install_from_hf_by_prefix "flash-attn"; then python -m pip install -v --no-deps "$APP_WHEELS"/flash_attn-*.whl || true check_flash_layer_norm_bin && return 0 echo "Wheel HF falhou, build local" fi SRC="$SRC_DIR/flash-attention" if [ -d "$SRC/.git" ]; then git -C "$SRC" fetch --all -p || true git -C "$SRC" reset --hard origin/main || true git -C "$SRC" clean -fdx || true else rm -rf "$SRC" git clone --depth 1 https://github.com/Dao-AILab/flash-attention "$SRC" fi export TORCH_CUDA_ARCH_LIST="$(python - <<'PY' import torch,sys try: cc="%d.%d"%torch.cuda.get_device_capability(0); print(cc) except: print("8.9") PY )" echo "[build] TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST" pushd "$SRC/csrc/layer_norm" >/dev/null python -m pip wheel -v --no-build-isolation --no-deps . -w "$APP_WHEELS" || true popd >/dev/null WHEEL=$(ls -t "$APP_WHEELS"/*flash*layer*norm*-*.whl 2>/dev/null | head -n1) python -m pip install -v --no-deps "${WHEEL:-$SRC/csrc/layer_norm}" || true check_flash_layer_norm_bin || echo "⚠ LayerNorm import falhou" } build_apex() { echo "=== Apex ===" SRC="$SRC_DIR/apex" rm -rf "$SRC" git clone --depth 1 https://github.com/NVIDIA/apex "$SRC" export APEX_CPP_EXT=1 APEX_CUDA_EXT=1 APEX_ALL_CONTRIB_EXT=0 python -m pip wheel -v --no-build-isolation --no-deps "$SRC" -w "$APP_WHEELS" || true python -m pip install -v --no-deps "$APP_WHEELS"/apex-*.whl || true } build_q8() { echo "=== Q8 Kernels ===" SRC="$SRC_DIR/q8_kernels" rm -rf "$SRC" git clone --filter=blob:none "$Q8_REPO" "$SRC" git -C "$SRC" checkout "$Q8_COMMIT" git -C "$SRC" submodule update --init --recursive python -m pip wheel -v --no-build-isolation --no-deps "$SRC" -w "$APP_WHEELS" || true python -m pip install -v --no-deps "$APP_WHEELS"/q8_kernels-*.whl || true } build_flash_attention_full() { echo "=== FlashAttention (full GitHub) ===" SRC="$SRC_DIR/flash-attention-full" rm -rf "$SRC" git clone --depth 1 https://github.com/Dao-AILab/flash-attention "$SRC" pushd "$SRC" >/dev/null export TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-cuda}" python -m pip wheel -v --no-build-isolation --no-deps . -w "$APP_WHEELS" || true popd >/dev/null W=$(ls -t "$APP_WHEELS"/flash_attn-*.whl 2>/dev/null | head -n1) if [ -n "$W" ]; then python -m pip install -v --no-deps "$W" else python -m pip install -v --no-deps git+https://github.com/Dao-AILab/flash-attention fi } # ===== Execução principal ===== build_apex build_q8 build_flash_attention_full build_flash_layer_norm # ===== Upload de wheels ===== python - <<'PY' import os from huggingface_hub import HfApi, HfFolder repo = os.getenv("SELF_HF_REPO_ID","eeuuia/Tmp") token = os.getenv("HF_TOKEN") or HfFolder.get_token() if not token: exit(0) api = HfApi(token=token) api.upload_folder( folder_path="$APP_WHEELS", repo_id=repo, repo_type="model", allow_patterns=["*.whl","NGC-DL-CONTAINER-LICENSE"], ignore_patterns=["**/src/**",".git/**"], ) print("✅ Upload concluĂ­do.") PY chmod -R 777 "$APP_WHEELS" || true echo "✅ Builder finalizado."