td-toolkit / requirements.txt
td-builder's picture
Fixed code: vocab mismatch fix for cross-arch merging (Llama/Falcon)
5d61448 verified
# TD Merge Pipeline - Complete Python Dependency List
# Python 3.11-3.12 (3.12 preferred)
# CUDA 12.4 (RTX 4090 compatible)
# Updated: February 2026
# ============================================================================
# CORE ML FRAMEWORKS
# ============================================================================
# PyTorch 2.4+ with CUDA 12.4 support (RTX 4090 compatible)
torch==2.4.1
torchvision==0.19.1
torchaudio==2.4.1
# NVIDIA CUDA Toolkit support (already installed on system)
# CUDA 12.4 for RTX 4090 compatibility
# Note: Install via: pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124
# ============================================================================
# TRANSFORMERS & MODEL LOADING
# ============================================================================
# Transformers library - must support Qwen3 (requires 4.51.0+)
transformers==4.51.0
# Safetensors for efficient model serialization
safetensors==0.4.5
# Accelerate for distributed training & multi-GPU support
accelerate==1.2.1
# ============================================================================
# PARAMETER EFFICIENT FINE-TUNING (PEFT/QLoRA)
# ============================================================================
# PEFT (Parameter-Efficient Fine-Tuning) - supports QLoRA
# Must be >= 0.14.0 for 8-bit weight merging
peft==0.14.0
# BitsAndBytes for 4-bit quantization (QLoRA)
# Works with PyTorch 2.4, stable with >= 0.42
bitsandbytes==0.44.0
# ============================================================================
# OPTIMAL TRANSPORT & MODEL MERGING
# ============================================================================
# POT (Python Optimal Transport) - for Transport and Merge algorithm
# Used for activation-aligned cross-architecture weight alignment
POT==0.9.6
# SciPy for optimization & linear algebra (OrthoMerge, LARV)
scipy==1.14.1
# NumPy for numerical operations
numpy==1.26.4
# Lark parser for td_lang DSL
lark>=1.1.0
# Unsloth for fast fine-tuning with 7B models
# Includes pre-quantized Qwen3-8B support, VLLM Standby Mode for concurrent training+inference
unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git@main
# ============================================================================
# REINFORCEMENT LEARNING (RL TRAINING)
# ============================================================================
# TRL (Transformers Reinforcement Learning)
# Provides GRPO (Group Relative Policy Optimization) trainer
# v0.27.2 stable, tested with transformers 4.40+
trl==0.27.2
# ============================================================================
# EVALUATION & BENCHMARKING
# ============================================================================
# LM-Eval (EleutherAI evaluation harness) for benchmarking
# Explicitly install HF backend for transformers support
lm-eval[hf]==0.4.10
# MathEval utilities
math-eval==0.0.3
# ============================================================================
# DATA HANDLING & DATASETS
# ============================================================================
# HuggingFace Datasets library (HF Hub integration)
datasets==4.5.1
# PyArrow for efficient data processing
pyarrow==17.0.0
# Pandas for data manipulation
pandas==2.2.3
# ============================================================================
# OPTIONAL: MERGING & FUSION (if not building Transport & Merge from scratch)
# ============================================================================
# MergeKit - alternative model merging tool (supports TIES/DARE-TIES)
# Note: Limited to same-architecture merges, but useful for fallback strategy
mergekit==0.0.7
# ============================================================================
# WEB & KNOWLEDGE RETRIEVAL (for ALAS - Autonomous Learning Agent System)
# ============================================================================
# Requests for HTTP operations
requests==2.31.0
# Beautiful Soup for web scraping
beautifulsoup4==4.12.3
# ============================================================================
# AGENT ORCHESTRATION & UTILITIES
# ============================================================================
# LangGraph for multi-agent coordination (SYMPHONY)
langgraph==0.2.7
# LangChain for prompt management & chains
langchain==0.3.9
# Pydantic for data validation
pydantic==2.8.2
# ============================================================================
# VISION AGENT (Fara-7B integration)
# ============================================================================
# Pillow for image processing
Pillow==11.2.0
# OpenCV for computer vision tasks
opencv-python==4.10.1.26
# ============================================================================
# INFERENCE & SERVING
# ============================================================================
# vLLM for fast LLM inference serving
vllm==0.6.4
# ============================================================================
# UTILITIES & LOGGING
# ============================================================================
# PyYAML for config files
PyYAML==6.0.2
# Python-dotenv for environment variable management
python-dotenv==1.0.1
# Tqdm for progress bars
tqdm==4.67.1
# Rich for beautiful terminal output
rich==13.8.1
# ============================================================================
# DEVELOPMENT & TESTING (OPTIONAL)
# ============================================================================
# Pytest for testing
pytest==8.3.2
# IPython for interactive development
ipython==8.20.0
# Jupyter for notebooks
jupyter==1.0.0
# ============================================================================
# VERSION NOTES & COMPATIBILITY MATRIX
# ============================================================================
#
# COMPATIBILITY VERIFIED:
# βœ“ PyTorch 2.4.1 + CUDA 12.4 + RTX 4090 (full support)
# βœ“ Transformers 4.51.0 + Qwen3-8B (latest, required for Qwen3)
# βœ“ Unsloth 2026.2.x + Qwen3 + QLoRA (fast fine-tuning)
# βœ“ BitsAndBytes 0.44.0 + PyTorch 2.4 (4-bit quantization)
# βœ“ PEFT 0.14.0 + BitsAndBytes (8-bit weight merging)
# βœ“ TRL 0.27.2 + GRPO (RL training with group advantage)
# βœ“ POT 0.9.6 + SciPy 1.14.1 (optimal transport)
# βœ“ LM-Eval 0.4.10[hf] + Transformers 4.51.0 (benchmarking)
#
# KNOWN ISSUES & WORKAROUNDS:
# - Flash-Attention-2: Works with Qwen3 but may produce incorrect outputs
# β†’ Use attn_implementation="sdpa" (default) instead
# β†’ DO NOT set attn_implementation="flash_attention_2"
#
# - BitsAndBytes + XFormers: Avoid mixing with older PyTorch versions
# β†’ Use Unsloth bundled installer which pre-handles this
#
# - Thinking Mode Survival: Qwen3's thinking tokens (151668) may be scrambled
# β†’ Freeze thinking token embeddings during Transport & Merge
# β†’ Apply Contrastive Gradient Identification (ReasonAny) to protect reasoning params
# β†’ Post-merge fine-tune on 500-1000 thinking examples
#
# CUDA 12.4 NOTES:
# - RTX 4090 full support (Ada architecture, compute capability 8.9)
# - All libraries compiled for CUDA 12.4 compatibility
# - No need to install system CUDA separately if PyTorch wheels handle it
#
# HARDWARE CHECKLIST:
# βœ“ Dual RTX 4090 (48GB VRAM total) - adequate for full pipeline
# βœ“ 64GB+ system RAM (128GB comfortable)
# βœ“ 1500W+ PSU (handles 1.2kW sustained load)
# βœ“ Gen4+ NVMe SSD (3000+ MB/s write, 2TB minimum)
#
# INSTALLATION:
# 1. Create venv: python3.12 -m venv venv && source venv/bin/activate
# 2. Install PyTorch with CUDA 12.4:
# pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124
# 3. Install this requirements file:
# pip install -r requirements.txt
# 4. Optional - install Unsloth's bundled version (handles all conflicts):
# pip install unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git@main
#
# ESTIMATED INSTALLATION TIME:
# - PyTorch (download): 5-10 min
# - Other packages: 2-5 min
# - Total: 10-15 minutes
#