# TD Merge Pipeline - Complete Python Dependency List
# Python 3.11-3.12 (3.12 preferred)
# CUDA 12.4 (RTX 4090 compatible)
# Updated: February 2026

# ============================================================================
# CORE ML FRAMEWORKS
# ============================================================================

# PyTorch 2.4+ with CUDA 12.4 support (RTX 4090 compatible)
torch==2.4.1
torchvision==0.19.1
torchaudio==2.4.1

# NVIDIA CUDA Toolkit support (already installed on system)
# CUDA 12.4 for RTX 4090 compatibility
# Note: Install via: pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124

# ============================================================================
# TRANSFORMERS & MODEL LOADING
# ============================================================================

# Transformers library - must support Qwen3 (requires 4.51.0+)
transformers==4.51.0

# Safetensors for efficient model serialization
safetensors==0.4.5

# Accelerate for distributed training & multi-GPU support
accelerate==1.2.1

# ============================================================================
# PARAMETER EFFICIENT FINE-TUNING (PEFT/QLoRA)
# ============================================================================

# PEFT (Parameter-Efficient Fine-Tuning) - supports QLoRA
# Must be >= 0.14.0 for 8-bit weight merging
peft==0.14.0

# BitsAndBytes for 4-bit quantization (QLoRA)
# Works with PyTorch 2.4, stable with >= 0.42
bitsandbytes==0.44.0

# ============================================================================
# OPTIMAL TRANSPORT & MODEL MERGING
# ============================================================================

# POT (Python Optimal Transport) - for Transport and Merge algorithm
# Used for activation-aligned cross-architecture weight alignment
POT==0.9.6

# SciPy for optimization & linear algebra (OrthoMerge, LARV)
scipy==1.14.1

# NumPy for numerical operations
numpy==1.26.4

# Lark parser for td_lang DSL
lark>=1.1.0

# Unsloth for fast fine-tuning with 7B models
# Includes pre-quantized Qwen3-8B support, VLLM Standby Mode for concurrent training+inference
unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git@main

# ============================================================================
# REINFORCEMENT LEARNING (RL TRAINING)
# ============================================================================

# TRL (Transformers Reinforcement Learning)
# Provides GRPO (Group Relative Policy Optimization) trainer
# v0.27.2 stable, tested with transformers 4.40+
trl==0.27.2

# ============================================================================
# EVALUATION & BENCHMARKING
# ============================================================================

# LM-Eval (EleutherAI evaluation harness) for benchmarking
# Explicitly install HF backend for transformers support
lm-eval[hf]==0.4.10

# MathEval utilities
math-eval==0.0.3

# ============================================================================
# DATA HANDLING & DATASETS
# ============================================================================

# HuggingFace Datasets library (HF Hub integration)
datasets==4.5.1

# PyArrow for efficient data processing
pyarrow==17.0.0

# Pandas for data manipulation
pandas==2.2.3

# ============================================================================
# OPTIONAL: MERGING & FUSION (if not building Transport & Merge from scratch)
# ============================================================================

# MergeKit - alternative model merging tool (supports TIES/DARE-TIES)
# Note: Limited to same-architecture merges, but useful for fallback strategy
mergekit==0.0.7

# ============================================================================
# WEB & KNOWLEDGE RETRIEVAL (for ALAS - Autonomous Learning Agent System)
# ============================================================================

# Requests for HTTP operations
requests==2.31.0

# Beautiful Soup for web scraping
beautifulsoup4==4.12.3

# ============================================================================
# AGENT ORCHESTRATION & UTILITIES
# ============================================================================

# LangGraph for multi-agent coordination (SYMPHONY)
langgraph==0.2.7

# LangChain for prompt management & chains
langchain==0.3.9

# Pydantic for data validation
pydantic==2.8.2

# ============================================================================
# VISION AGENT (Fara-7B integration)
# ============================================================================

# Pillow for image processing
Pillow==11.2.0

# OpenCV for computer vision tasks
opencv-python==4.10.1.26

# ============================================================================
# INFERENCE & SERVING
# ============================================================================

# vLLM for fast LLM inference serving
vllm==0.6.4

# ============================================================================
# UTILITIES & LOGGING
# ============================================================================

# PyYAML for config files
PyYAML==6.0.2

# Python-dotenv for environment variable management
python-dotenv==1.0.1

# Tqdm for progress bars
tqdm==4.67.1

# Rich for beautiful terminal output
rich==13.8.1

# ============================================================================
# DEVELOPMENT & TESTING (OPTIONAL)
# ============================================================================

# Pytest for testing
pytest==8.3.2

# IPython for interactive development
ipython==8.20.0

# Jupyter for notebooks
jupyter==1.0.0

# ============================================================================
# VERSION NOTES & COMPATIBILITY MATRIX
# ============================================================================
#
# COMPATIBILITY VERIFIED:
# ✓ PyTorch 2.4.1 + CUDA 12.4 + RTX 4090 (full support)
# ✓ Transformers 4.51.0 + Qwen3-8B (latest, required for Qwen3)
# ✓ Unsloth 2026.2.x + Qwen3 + QLoRA (fast fine-tuning)
# ✓ BitsAndBytes 0.44.0 + PyTorch 2.4 (4-bit quantization)
# ✓ PEFT 0.14.0 + BitsAndBytes (8-bit weight merging)
# ✓ TRL 0.27.2 + GRPO (RL training with group advantage)
# ✓ POT 0.9.6 + SciPy 1.14.1 (optimal transport)
# ✓ LM-Eval 0.4.10[hf] + Transformers 4.51.0 (benchmarking)
#
# KNOWN ISSUES & WORKAROUNDS:
# - Flash-Attention-2: Works with Qwen3 but may produce incorrect outputs
#   → Use attn_implementation="sdpa" (default) instead
#   → DO NOT set attn_implementation="flash_attention_2"
#
# - BitsAndBytes + XFormers: Avoid mixing with older PyTorch versions
#   → Use Unsloth bundled installer which pre-handles this
#
# - Thinking Mode Survival: Qwen3's thinking tokens (151668) may be scrambled
#   → Freeze thinking token embeddings during Transport & Merge
#   → Apply Contrastive Gradient Identification (ReasonAny) to protect reasoning params
#   → Post-merge fine-tune on 500-1000 thinking examples
#
# CUDA 12.4 NOTES:
# - RTX 4090 full support (Ada architecture, compute capability 8.9)
# - All libraries compiled for CUDA 12.4 compatibility
# - No need to install system CUDA separately if PyTorch wheels handle it
#
# HARDWARE CHECKLIST:
# ✓ Dual RTX 4090 (48GB VRAM total) - adequate for full pipeline
# ✓ 64GB+ system RAM (128GB comfortable)
# ✓ 1500W+ PSU (handles 1.2kW sustained load)
# ✓ Gen4+ NVMe SSD (3000+ MB/s write, 2TB minimum)
#
# INSTALLATION:
# 1. Create venv: python3.12 -m venv venv && source venv/bin/activate
# 2. Install PyTorch with CUDA 12.4:
#    pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124
# 3. Install this requirements file:
#    pip install -r requirements.txt
# 4. Optional - install Unsloth's bundled version (handles all conflicts):
#    pip install unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git@main
#
# ESTIMATED INSTALLATION TIME:
# - PyTorch (download): 5-10 min
# - Other packages: 2-5 min
# - Total: 10-15 minutes
#