#!/usr/bin/env python3 """ dLNk Agent V.2 - Machine 2 Training Application Stage 2: Agent SFT (Supervised Fine-Tuning) - 12 hours Stage 4: GRPO - 12 hours (after Stage 3 completes) """ import os import gc import json import torch from datetime import datetime from typing import List, Dict # HuggingFace imports from transformers import ( AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, DataCollatorForSeq2Seq ) from huggingface_hub import HfApi, login from datasets import load_dataset, concatenate_datasets, Dataset from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training from trl import SFTTrainer, SFTConfig # ============================================================ # CONFIGURATION # ============================================================ HF_TOKEN = os.environ.get("HF_TOKEN") BASE_MODEL = "NousResearch/Hermes-3-Llama-3.1-70B" OUTPUT_MODEL = "dLNk-Agent-V2-SFT-70B" # SFT Configuration SFT_CONFIG = { "max_seq_length": 8192, "learning_rate": 2e-5, "num_train_epochs": 3, "per_device_train_batch_size": 2, "gradient_accumulation_steps": 16, "warmup_ratio": 0.03, "weight_decay": 0.01, } # Dataset Configuration DATASETS = { "function_calling": { "name": "glaiveai/glaive-function-calling-v2", "split": "train", "samples": 100000, "weight": 0.25 }, "general_instruction": { "name": "teknium/OpenHermes-2.5", "split": "train", "samples": 200000, "weight": 0.30 }, "coding": { "name": "cognitivecomputations/dolphin-coder", "split": "train", "samples": 50000, "weight": 0.20 } } # ============================================================ # DATASET PREPARATION # ============================================================ def format_chat_template(example: Dict, tokenizer) -> Dict: """Format example using chat template""" messages = [] # Handle different dataset formats if "conversations" in example: for conv in example["conversations"]: role = "user" if conv.get("from", conv.get("role")) in ["human", "user"] else "assistant" content = conv.get("value", conv.get("content", "")) messages.append({"role": role, "content": content}) elif "instruction" in example and "output" in example: messages = [ {"role": "user", "content": example["instruction"]}, {"role": "assistant", "content": example["output"]} ] elif "prompt" in example and "response" in example: messages = [ {"role": "user", "content": example["prompt"]}, {"role": "assistant", "content": example["response"]} ] else: # Fallback messages = [ {"role": "user", "content": str(example.get("input", ""))}, {"role": "assistant", "content": str(example.get("output", ""))} ] # Apply chat template text = tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=False ) return {"text": text} def load_and_prepare_datasets(tokenizer) -> Dataset: """Load and prepare all training datasets""" print("\nšŸ“„ Loading datasets...") all_datasets = [] for name, config in DATASETS.items(): print(f" Loading {name}: {config['name']}...") try: ds = load_dataset(config["name"], split=config["split"]) # Sample if needed if len(ds) > config["samples"]: ds = ds.shuffle(seed=42).select(range(config["samples"])) # Format dataset ds = ds.map( lambda x: format_chat_template(x, tokenizer), remove_columns=ds.column_names, num_proc=4 ) all_datasets.append(ds) print(f" āœ… Loaded {len(ds)} samples") except Exception as e: print(f" āš ļø Failed to load {name}: {e}") # Concatenate all datasets combined = concatenate_datasets(all_datasets) combined = combined.shuffle(seed=42) print(f"\nāœ… Total training samples: {len(combined)}") return combined # ============================================================ # AGENT-SPECIFIC DATA GENERATION # ============================================================ def generate_agent_training_data() -> List[Dict]: """Generate agent-specific training examples""" agent_examples = [ # Tool use examples { "messages": [ {"role": "user", "content": "Search for the latest news about AI developments"}, {"role": "assistant", "content": """I'll search for the latest AI news for you. {"name": "web_search", "arguments": {"query": "latest AI developments news 2024"}} Based on the search results, here are the latest AI developments: 1. **OpenAI GPT-5 Announcement**: OpenAI has announced plans for their next-generation model... 2. **Google Gemini Updates**: Google has released significant updates to their Gemini model... 3. **Open Source AI Progress**: Meta and other companies continue to advance open-source AI... Would you like me to search for more specific information about any of these topics?"""} ] }, # Code execution examples { "messages": [ {"role": "user", "content": "Calculate the compound interest for $10,000 at 5% for 10 years"}, {"role": "assistant", "content": """I'll calculate the compound interest for you. principal = 10000 rate = 0.05 time = 10 n = 12 # Monthly compounding amount = principal * (1 + rate/n) ** (n * time) interest = amount - principal print(f"Principal: ${principal:,.2f}") print(f"Final Amount: ${amount:,.2f}") print(f"Interest Earned: ${interest:,.2f}") **Results:** - Principal: $10,000.00 - Final Amount: $16,470.09 - Interest Earned: $6,470.09 With monthly compounding at 5% annual interest rate over 10 years, your $10,000 investment would grow to $16,470.09."""} ] }, # Planning examples { "messages": [ {"role": "user", "content": "Help me plan a trip to Japan for 2 weeks"}, {"role": "assistant", "content": """I'd be happy to help you plan a 2-week trip to Japan! Let me create a comprehensive plan. **Phase 1: Pre-Trip Planning** 1. Best time to visit: Spring (March-May) for cherry blossoms or Fall (September-November) for autumn colors 2. Visa requirements: Check if you need a tourist visa based on your nationality 3. Budget estimation: Approximately $150-300/day depending on travel style **Phase 2: Itinerary Overview** **Week 1: Tokyo & Surroundings** - Days 1-3: Tokyo (Shibuya, Shinjuku, Akihabara, Senso-ji Temple) - Days 4-5: Day trips (Mt. Fuji, Nikko, or Kamakura) - Days 6-7: Tokyo continued (Harajuku, Ginza, TeamLab) **Week 2: Kyoto, Osaka & Beyond** - Days 8-10: Kyoto (Fushimi Inari, Kinkaku-ji, Arashiyama) - Days 11-12: Osaka (Dotonbori, Osaka Castle, food tour) - Days 13-14: Nara or Hiroshima day trip, return to Tokyo **Phase 3: Logistics** - JR Pass: Highly recommended for this itinerary (~$280 for 14 days) - Accommodation: Mix of hotels and traditional ryokan - Pocket WiFi or SIM card for connectivity Would you like me to elaborate on any specific part of this plan?"""} ] } ] return agent_examples # ============================================================ # MAIN TRAINING PIPELINE # ============================================================ def log_status(message: str, status_file: str = "/app/status.json"): """Log status to file for monitoring""" timestamp = datetime.now().isoformat() status = { "timestamp": timestamp, "message": message, "machine": "machine2", "stage": "agent_sft" } with open(status_file, "w") as f: json.dump(status, f, indent=2) print(f"[{timestamp}] {message}") def main(): print(""" ╔══════════════════════════════════════════════════════════════╗ ā•‘ dLNk Agent V.2 - Machine 2: Agent SFT Training ā•‘ ā•šā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā• """) # Login to HuggingFace login(token=HF_TOKEN) log_status("Logged in to HuggingFace") # Load tokenizer first log_status(f"Loading tokenizer: {BASE_MODEL}") tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL) tokenizer.pad_token = tokenizer.eos_token tokenizer.padding_side = "right" # Load and prepare datasets train_dataset = load_and_prepare_datasets(tokenizer) # Load model log_status(f"Loading model: {BASE_MODEL}") model = AutoModelForCausalLM.from_pretrained( BASE_MODEL, torch_dtype=torch.bfloat16, device_map="auto", trust_remote_code=True, attn_implementation="flash_attention_2" ) log_status(f"Model loaded: {model.num_parameters() / 1e9:.2f}B parameters") # Configure training training_args = SFTConfig( output_dir=f"/app/{OUTPUT_MODEL}", max_seq_length=SFT_CONFIG["max_seq_length"], learning_rate=SFT_CONFIG["learning_rate"], num_train_epochs=SFT_CONFIG["num_train_epochs"], per_device_train_batch_size=SFT_CONFIG["per_device_train_batch_size"], gradient_accumulation_steps=SFT_CONFIG["gradient_accumulation_steps"], warmup_ratio=SFT_CONFIG["warmup_ratio"], weight_decay=SFT_CONFIG["weight_decay"], bf16=True, tf32=True, gradient_checkpointing=True, logging_steps=10, save_steps=500, save_total_limit=3, push_to_hub=True, hub_model_id=OUTPUT_MODEL, hub_token=HF_TOKEN, hub_private_repo=True, report_to="tensorboard", ) # Initialize trainer trainer = SFTTrainer( model=model, args=training_args, train_dataset=train_dataset, tokenizer=tokenizer, dataset_text_field="text", packing=True, ) # Start training log_status("Starting SFT training...") trainer.train() # Save final model log_status("Saving final model...") trainer.save_model() tokenizer.save_pretrained(f"/app/{OUTPUT_MODEL}") # Push to hub log_status("Pushing model to HuggingFace Hub...") trainer.push_to_hub() # Mark completion completion_status = { "status": "complete", "stage": "Stage 2: Agent SFT", "model": OUTPUT_MODEL, "timestamp": datetime.now().isoformat(), "next_stage": "Stage 4: GRPO (waiting for Stage 3)" } with open("/app/stage2_complete.json", "w") as f: json.dump(completion_status, f, indent=2) log_status("āœ… Stage 2: Agent SFT COMPLETE!") print(f""" ╔══════════════════════════════════════════════════════════════╗ ā•‘ STAGE 2 COMPLETE! ā•‘ ā•‘ Model: {OUTPUT_MODEL} ā•‘ Waiting for Stage 3 to complete before starting Stage 4... ā•‘ ā•šā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā• """) if __name__ == "__main__": main()