#!/usr/bin/env python3
"""
dLNk Agent V.2 - Machine 2 Training Application
Stage 2: Agent SFT (Supervised Fine-Tuning) - 12 hours
Stage 4: GRPO - 12 hours (after Stage 3 completes)
"""

import os
import gc
import json
import torch
from datetime import datetime
from typing import List, Dict

# HuggingFace imports
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    TrainingArguments,
    Trainer,
    DataCollatorForSeq2Seq
)
from huggingface_hub import HfApi, login
from datasets import load_dataset, concatenate_datasets, Dataset
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from trl import SFTTrainer, SFTConfig

# ============================================================
# CONFIGURATION
# ============================================================

HF_TOKEN = os.environ.get("HF_TOKEN")
BASE_MODEL = "NousResearch/Hermes-3-Llama-3.1-70B"
OUTPUT_MODEL = "dLNk-Agent-V2-SFT-70B"

# SFT Configuration
SFT_CONFIG = {
    "max_seq_length": 8192,
    "learning_rate": 2e-5,
    "num_train_epochs": 3,
    "per_device_train_batch_size": 2,
    "gradient_accumulation_steps": 16,
    "warmup_ratio": 0.03,
    "weight_decay": 0.01,
}

# Dataset Configuration
DATASETS = {
    "function_calling": {
        "name": "glaiveai/glaive-function-calling-v2",
        "split": "train",
        "samples": 100000,
        "weight": 0.25
    },
    "general_instruction": {
        "name": "teknium/OpenHermes-2.5",
        "split": "train",
        "samples": 200000,
        "weight": 0.30
    },
    "coding": {
        "name": "cognitivecomputations/dolphin-coder",
        "split": "train",
        "samples": 50000,
        "weight": 0.20
    }
}

# ============================================================
# DATASET PREPARATION
# ============================================================

def format_chat_template(example: Dict, tokenizer) -> Dict:
    """Format example using chat template"""
    messages = []
    
    # Handle different dataset formats
    if "conversations" in example:
        for conv in example["conversations"]:
            role = "user" if conv.get("from", conv.get("role")) in ["human", "user"] else "assistant"
            content = conv.get("value", conv.get("content", ""))
            messages.append({"role": role, "content": content})
    elif "instruction" in example and "output" in example:
        messages = [
            {"role": "user", "content": example["instruction"]},
            {"role": "assistant", "content": example["output"]}
        ]
    elif "prompt" in example and "response" in example:
        messages = [
            {"role": "user", "content": example["prompt"]},
            {"role": "assistant", "content": example["response"]}
        ]
    else:
        # Fallback
        messages = [
            {"role": "user", "content": str(example.get("input", ""))},
            {"role": "assistant", "content": str(example.get("output", ""))}
        ]
    
    # Apply chat template
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=False
    )
    
    return {"text": text}

def load_and_prepare_datasets(tokenizer) -> Dataset:
    """Load and prepare all training datasets"""
    print("\n📥 Loading datasets...")
    
    all_datasets = []
    
    for name, config in DATASETS.items():
        print(f"  Loading {name}: {config['name']}...")
        
        try:
            ds = load_dataset(config["name"], split=config["split"])
            
            # Sample if needed
            if len(ds) > config["samples"]:
                ds = ds.shuffle(seed=42).select(range(config["samples"]))
            
            # Format dataset
            ds = ds.map(
                lambda x: format_chat_template(x, tokenizer),
                remove_columns=ds.column_names,
                num_proc=4
            )
            
            all_datasets.append(ds)
            print(f"    ✅ Loaded {len(ds)} samples")
            
        except Exception as e:
            print(f"    ⚠️ Failed to load {name}: {e}")
    
    # Concatenate all datasets
    combined = concatenate_datasets(all_datasets)
    combined = combined.shuffle(seed=42)
    
    print(f"\n✅ Total training samples: {len(combined)}")
    
    return combined

# ============================================================
# AGENT-SPECIFIC DATA GENERATION
# ============================================================

def generate_agent_training_data() -> List[Dict]:
    """Generate agent-specific training examples"""
    
    agent_examples = [
        # Tool use examples
        {
            "messages": [
                {"role": "user", "content": "Search for the latest news about AI developments"},
                {"role": "assistant", "content": """I'll search for the latest AI news for you.

<tool_call>
{"name": "web_search", "arguments": {"query": "latest AI developments news 2024"}}
</tool_call>

Based on the search results, here are the latest AI developments:

1. **OpenAI GPT-5 Announcement**: OpenAI has announced plans for their next-generation model...
2. **Google Gemini Updates**: Google has released significant updates to their Gemini model...
3. **Open Source AI Progress**: Meta and other companies continue to advance open-source AI...

Would you like me to search for more specific information about any of these topics?"""}
            ]
        },
        # Code execution examples
        {
            "messages": [
                {"role": "user", "content": "Calculate the compound interest for $10,000 at 5% for 10 years"},
                {"role": "assistant", "content": """I'll calculate the compound interest for you.

<code_execution>
principal = 10000
rate = 0.05
time = 10
n = 12  # Monthly compounding

amount = principal * (1 + rate/n) ** (n * time)
interest = amount - principal

print(f"Principal: ${principal:,.2f}")
print(f"Final Amount: ${amount:,.2f}")
print(f"Interest Earned: ${interest:,.2f}")
</code_execution>

**Results:**
- Principal: $10,000.00
- Final Amount: $16,470.09
- Interest Earned: $6,470.09

With monthly compounding at 5% annual interest rate over 10 years, your $10,000 investment would grow to $16,470.09."""}
            ]
        },
        # Planning examples
        {
            "messages": [
                {"role": "user", "content": "Help me plan a trip to Japan for 2 weeks"},
                {"role": "assistant", "content": """I'd be happy to help you plan a 2-week trip to Japan! Let me create a comprehensive plan.

**Phase 1: Pre-Trip Planning**
1. Best time to visit: Spring (March-May) for cherry blossoms or Fall (September-November) for autumn colors
2. Visa requirements: Check if you need a tourist visa based on your nationality
3. Budget estimation: Approximately $150-300/day depending on travel style

**Phase 2: Itinerary Overview**

**Week 1: Tokyo & Surroundings**
- Days 1-3: Tokyo (Shibuya, Shinjuku, Akihabara, Senso-ji Temple)
- Days 4-5: Day trips (Mt. Fuji, Nikko, or Kamakura)
- Days 6-7: Tokyo continued (Harajuku, Ginza, TeamLab)

**Week 2: Kyoto, Osaka & Beyond**
- Days 8-10: Kyoto (Fushimi Inari, Kinkaku-ji, Arashiyama)
- Days 11-12: Osaka (Dotonbori, Osaka Castle, food tour)
- Days 13-14: Nara or Hiroshima day trip, return to Tokyo

**Phase 3: Logistics**
- JR Pass: Highly recommended for this itinerary (~$280 for 14 days)
- Accommodation: Mix of hotels and traditional ryokan
- Pocket WiFi or SIM card for connectivity

Would you like me to elaborate on any specific part of this plan?"""}
            ]
        }
    ]
    
    return agent_examples

# ============================================================
# MAIN TRAINING PIPELINE
# ============================================================

def log_status(message: str, status_file: str = "/app/status.json"):
    """Log status to file for monitoring"""
    timestamp = datetime.now().isoformat()
    status = {
        "timestamp": timestamp,
        "message": message,
        "machine": "machine2",
        "stage": "agent_sft"
    }
    
    with open(status_file, "w") as f:
        json.dump(status, f, indent=2)
        
    print(f"[{timestamp}] {message}")

def main():
    print("""
╔══════════════════════════════════════════════════════════════╗
║     dLNk Agent V.2 - Machine 2: Agent SFT Training           ║
╚══════════════════════════════════════════════════════════════╝
    """)
    
    # Login to HuggingFace
    login(token=HF_TOKEN)
    log_status("Logged in to HuggingFace")
    
    # Load tokenizer first
    log_status(f"Loading tokenizer: {BASE_MODEL}")
    tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.padding_side = "right"
    
    # Load and prepare datasets
    train_dataset = load_and_prepare_datasets(tokenizer)
    
    # Load model
    log_status(f"Loading model: {BASE_MODEL}")
    
    model = AutoModelForCausalLM.from_pretrained(
        BASE_MODEL,
        torch_dtype=torch.bfloat16,
        device_map="auto",
        trust_remote_code=True,
        attn_implementation="flash_attention_2"
    )
    
    log_status(f"Model loaded: {model.num_parameters() / 1e9:.2f}B parameters")
    
    # Configure training
    training_args = SFTConfig(
        output_dir=f"/app/{OUTPUT_MODEL}",
        max_seq_length=SFT_CONFIG["max_seq_length"],
        learning_rate=SFT_CONFIG["learning_rate"],
        num_train_epochs=SFT_CONFIG["num_train_epochs"],
        per_device_train_batch_size=SFT_CONFIG["per_device_train_batch_size"],
        gradient_accumulation_steps=SFT_CONFIG["gradient_accumulation_steps"],
        warmup_ratio=SFT_CONFIG["warmup_ratio"],
        weight_decay=SFT_CONFIG["weight_decay"],
        bf16=True,
        tf32=True,
        gradient_checkpointing=True,
        logging_steps=10,
        save_steps=500,
        save_total_limit=3,
        push_to_hub=True,
        hub_model_id=OUTPUT_MODEL,
        hub_token=HF_TOKEN,
        hub_private_repo=True,
        report_to="tensorboard",
    )
    
    # Initialize trainer
    trainer = SFTTrainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        tokenizer=tokenizer,
        dataset_text_field="text",
        packing=True,
    )
    
    # Start training
    log_status("Starting SFT training...")
    trainer.train()
    
    # Save final model
    log_status("Saving final model...")
    trainer.save_model()
    tokenizer.save_pretrained(f"/app/{OUTPUT_MODEL}")
    
    # Push to hub
    log_status("Pushing model to HuggingFace Hub...")
    trainer.push_to_hub()
    
    # Mark completion
    completion_status = {
        "status": "complete",
        "stage": "Stage 2: Agent SFT",
        "model": OUTPUT_MODEL,
        "timestamp": datetime.now().isoformat(),
        "next_stage": "Stage 4: GRPO (waiting for Stage 3)"
    }
    
    with open("/app/stage2_complete.json", "w") as f:
        json.dump(completion_status, f, indent=2)
        
    log_status("✅ Stage 2: Agent SFT COMPLETE!")
    
    print(f"""
╔══════════════════════════════════════════════════════════════╗
║                    STAGE 2 COMPLETE!                          ║
║  Model: {OUTPUT_MODEL}
║  Waiting for Stage 3 to complete before starting Stage 4...   ║
╚══════════════════════════════════════════════════════════════╝
    """)

if __name__ == "__main__":
    main()