WCNegentropy
/

BitTransformerLM

@@ -1,59 +0,0 @@
-#!/bin/bash
-#
-# Launch TRUE 1.21B Parameter BitTransformerLM Training
-# ====================================================
-#
-# PROPER FSDP sharding across 4 GPUs + inference testing!
-#
-set -e
-echo "🔥 TRUE 1.21B PARAMETER BITTRANSFORMERLM TRAINING"
-echo "================================================="
-echo "🎯 PROPER FSDP SHARDING (not duplication!)"
-echo "✅ Based on proven 680M success"
-echo "🚀 Full training + inference testing"
-echo ""
-# Optimal environment setup
-export CUDA_VISIBLE_DEVICES=0,1,2,3
-export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
-export OMP_NUM_THREADS=12
-export HF_TOKEN="${HF_TOKEN:-your-token-here}"
-cd /data/BitTransformerLM/BitTransformerLM
-echo "🔍 Hardware Check:"
-python -c "
-import torch
-print(f'CUDA Available: {torch.cuda.is_available()}')
-print(f'GPU Count: {torch.cuda.device_count()}')
-for i in range(torch.cuda.device_count()):
-    props = torch.cuda.get_device_properties(i)
-    print(f'  GPU {i}: {props.name} ({props.total_memory / 1024**3:.1f}GB)')
-print(f'Total VRAM: {sum(torch.cuda.get_device_properties(i).total_memory for i in range(torch.cuda.device_count())) / 1024**3:.1f}GB')
-"
-echo ""
-echo "⚙️ TRUE 1.21B CONFIGURATION:"
-echo "  🎯 Parameters: 1,210,000,000+ (1.21B)"
-echo "  📐 Architecture: d_model=2048, layers=24, heads=32"
-echo "  🧠 Memory Strategy: FSDP Full Sharding across 4 GPUs"
-echo "  🔄 Sequence Length: 512 (optimized from 680M success)"
-echo "  ⚡ Mixed Precision: FP16"
-echo "  🛡️ Safety Telemetry: K, C, S metrics enabled"
-echo "  🔧 All Optimizations: Reversible + Checkpointing + Chunked Attention"
-echo ""
-echo "🚀 Starting TRUE 1.21B parameter training..."
-echo "   This WILL work - we've proven the capability!"
-echo ""
-# Launch training
-python true_1b_training.py
-echo ""
-echo "🏆 TRUE 1.21B BITTRANSFORMERLM TRAINING COMPLETED!"
-echo "📊 Check /data/true_1b_results.json for full results"
-echo "💾 Model checkpoint saved for inference"
-echo "🧪 Inference testing completed"