#!/usr/bin/env python3
"""
dLNk Agent V.2 - Machine 2 Training Application
Stage 2: Agent SFT (Supervised Fine-Tuning) - 12 hours
Stage 4: GRPO - 12 hours (after Stage 3 completes)
"""
import os
import gc
import json
import torch
from datetime import datetime
from typing import List, Dict
# HuggingFace imports
from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
TrainingArguments,
Trainer,
DataCollatorForSeq2Seq
)
from huggingface_hub import HfApi, login
from datasets import load_dataset, concatenate_datasets, Dataset
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from trl import SFTTrainer, SFTConfig
# ============================================================
# CONFIGURATION
# ============================================================
HF_TOKEN = os.environ.get("HF_TOKEN")
BASE_MODEL = "NousResearch/Hermes-3-Llama-3.1-70B"
OUTPUT_MODEL = "dLNk-Agent-V2-SFT-70B"
# SFT Configuration
SFT_CONFIG = {
"max_seq_length": 8192,
"learning_rate": 2e-5,
"num_train_epochs": 3,
"per_device_train_batch_size": 2,
"gradient_accumulation_steps": 16,
"warmup_ratio": 0.03,
"weight_decay": 0.01,
}
# Dataset Configuration
DATASETS = {
"function_calling": {
"name": "glaiveai/glaive-function-calling-v2",
"split": "train",
"samples": 100000,
"weight": 0.25
},
"general_instruction": {
"name": "teknium/OpenHermes-2.5",
"split": "train",
"samples": 200000,
"weight": 0.30
},
"coding": {
"name": "cognitivecomputations/dolphin-coder",
"split": "train",
"samples": 50000,
"weight": 0.20
}
}
# ============================================================
# DATASET PREPARATION
# ============================================================
def format_chat_template(example: Dict, tokenizer) -> Dict:
"""Format example using chat template"""
messages = []
# Handle different dataset formats
if "conversations" in example:
for conv in example["conversations"]:
role = "user" if conv.get("from", conv.get("role")) in ["human", "user"] else "assistant"
content = conv.get("value", conv.get("content", ""))
messages.append({"role": role, "content": content})
elif "instruction" in example and "output" in example:
messages = [
{"role": "user", "content": example["instruction"]},
{"role": "assistant", "content": example["output"]}
]
elif "prompt" in example and "response" in example:
messages = [
{"role": "user", "content": example["prompt"]},
{"role": "assistant", "content": example["response"]}
]
else:
# Fallback
messages = [
{"role": "user", "content": str(example.get("input", ""))},
{"role": "assistant", "content": str(example.get("output", ""))}
]
# Apply chat template
text = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=False
)
return {"text": text}
def load_and_prepare_datasets(tokenizer) -> Dataset:
"""Load and prepare all training datasets"""
print("\nš„ Loading datasets...")
all_datasets = []
for name, config in DATASETS.items():
print(f" Loading {name}: {config['name']}...")
try:
ds = load_dataset(config["name"], split=config["split"])
# Sample if needed
if len(ds) > config["samples"]:
ds = ds.shuffle(seed=42).select(range(config["samples"]))
# Format dataset
ds = ds.map(
lambda x: format_chat_template(x, tokenizer),
remove_columns=ds.column_names,
num_proc=4
)
all_datasets.append(ds)
print(f" ā
Loaded {len(ds)} samples")
except Exception as e:
print(f" ā ļø Failed to load {name}: {e}")
# Concatenate all datasets
combined = concatenate_datasets(all_datasets)
combined = combined.shuffle(seed=42)
print(f"\nā
Total training samples: {len(combined)}")
return combined
# ============================================================
# AGENT-SPECIFIC DATA GENERATION
# ============================================================
def generate_agent_training_data() -> List[Dict]:
"""Generate agent-specific training examples"""
agent_examples = [
# Tool use examples
{
"messages": [
{"role": "user", "content": "Search for the latest news about AI developments"},
{"role": "assistant", "content": """I'll search for the latest AI news for you.
{"name": "web_search", "arguments": {"query": "latest AI developments news 2024"}}
Based on the search results, here are the latest AI developments:
1. **OpenAI GPT-5 Announcement**: OpenAI has announced plans for their next-generation model...
2. **Google Gemini Updates**: Google has released significant updates to their Gemini model...
3. **Open Source AI Progress**: Meta and other companies continue to advance open-source AI...
Would you like me to search for more specific information about any of these topics?"""}
]
},
# Code execution examples
{
"messages": [
{"role": "user", "content": "Calculate the compound interest for $10,000 at 5% for 10 years"},
{"role": "assistant", "content": """I'll calculate the compound interest for you.
principal = 10000
rate = 0.05
time = 10
n = 12 # Monthly compounding
amount = principal * (1 + rate/n) ** (n * time)
interest = amount - principal
print(f"Principal: ${principal:,.2f}")
print(f"Final Amount: ${amount:,.2f}")
print(f"Interest Earned: ${interest:,.2f}")
**Results:**
- Principal: $10,000.00
- Final Amount: $16,470.09
- Interest Earned: $6,470.09
With monthly compounding at 5% annual interest rate over 10 years, your $10,000 investment would grow to $16,470.09."""}
]
},
# Planning examples
{
"messages": [
{"role": "user", "content": "Help me plan a trip to Japan for 2 weeks"},
{"role": "assistant", "content": """I'd be happy to help you plan a 2-week trip to Japan! Let me create a comprehensive plan.
**Phase 1: Pre-Trip Planning**
1. Best time to visit: Spring (March-May) for cherry blossoms or Fall (September-November) for autumn colors
2. Visa requirements: Check if you need a tourist visa based on your nationality
3. Budget estimation: Approximately $150-300/day depending on travel style
**Phase 2: Itinerary Overview**
**Week 1: Tokyo & Surroundings**
- Days 1-3: Tokyo (Shibuya, Shinjuku, Akihabara, Senso-ji Temple)
- Days 4-5: Day trips (Mt. Fuji, Nikko, or Kamakura)
- Days 6-7: Tokyo continued (Harajuku, Ginza, TeamLab)
**Week 2: Kyoto, Osaka & Beyond**
- Days 8-10: Kyoto (Fushimi Inari, Kinkaku-ji, Arashiyama)
- Days 11-12: Osaka (Dotonbori, Osaka Castle, food tour)
- Days 13-14: Nara or Hiroshima day trip, return to Tokyo
**Phase 3: Logistics**
- JR Pass: Highly recommended for this itinerary (~$280 for 14 days)
- Accommodation: Mix of hotels and traditional ryokan
- Pocket WiFi or SIM card for connectivity
Would you like me to elaborate on any specific part of this plan?"""}
]
}
]
return agent_examples
# ============================================================
# MAIN TRAINING PIPELINE
# ============================================================
def log_status(message: str, status_file: str = "/app/status.json"):
"""Log status to file for monitoring"""
timestamp = datetime.now().isoformat()
status = {
"timestamp": timestamp,
"message": message,
"machine": "machine2",
"stage": "agent_sft"
}
with open(status_file, "w") as f:
json.dump(status, f, indent=2)
print(f"[{timestamp}] {message}")
def main():
print("""
āāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāā
ā dLNk Agent V.2 - Machine 2: Agent SFT Training ā
āāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāā
""")
# Login to HuggingFace
login(token=HF_TOKEN)
log_status("Logged in to HuggingFace")
# Load tokenizer first
log_status(f"Loading tokenizer: {BASE_MODEL}")
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
# Load and prepare datasets
train_dataset = load_and_prepare_datasets(tokenizer)
# Load model
log_status(f"Loading model: {BASE_MODEL}")
model = AutoModelForCausalLM.from_pretrained(
BASE_MODEL,
torch_dtype=torch.bfloat16,
device_map="auto",
trust_remote_code=True,
attn_implementation="flash_attention_2"
)
log_status(f"Model loaded: {model.num_parameters() / 1e9:.2f}B parameters")
# Configure training
training_args = SFTConfig(
output_dir=f"/app/{OUTPUT_MODEL}",
max_seq_length=SFT_CONFIG["max_seq_length"],
learning_rate=SFT_CONFIG["learning_rate"],
num_train_epochs=SFT_CONFIG["num_train_epochs"],
per_device_train_batch_size=SFT_CONFIG["per_device_train_batch_size"],
gradient_accumulation_steps=SFT_CONFIG["gradient_accumulation_steps"],
warmup_ratio=SFT_CONFIG["warmup_ratio"],
weight_decay=SFT_CONFIG["weight_decay"],
bf16=True,
tf32=True,
gradient_checkpointing=True,
logging_steps=10,
save_steps=500,
save_total_limit=3,
push_to_hub=True,
hub_model_id=OUTPUT_MODEL,
hub_token=HF_TOKEN,
hub_private_repo=True,
report_to="tensorboard",
)
# Initialize trainer
trainer = SFTTrainer(
model=model,
args=training_args,
train_dataset=train_dataset,
tokenizer=tokenizer,
dataset_text_field="text",
packing=True,
)
# Start training
log_status("Starting SFT training...")
trainer.train()
# Save final model
log_status("Saving final model...")
trainer.save_model()
tokenizer.save_pretrained(f"/app/{OUTPUT_MODEL}")
# Push to hub
log_status("Pushing model to HuggingFace Hub...")
trainer.push_to_hub()
# Mark completion
completion_status = {
"status": "complete",
"stage": "Stage 2: Agent SFT",
"model": OUTPUT_MODEL,
"timestamp": datetime.now().isoformat(),
"next_stage": "Stage 4: GRPO (waiting for Stage 3)"
}
with open("/app/stage2_complete.json", "w") as f:
json.dump(completion_status, f, indent=2)
log_status("ā
Stage 2: Agent SFT COMPLETE!")
print(f"""
āāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāā
ā STAGE 2 COMPLETE! ā
ā Model: {OUTPUT_MODEL}
ā Waiting for Stage 3 to complete before starting Stage 4... ā
āāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāā
""")
if __name__ == "__main__":
main()