Spaces:

aiagentdlnk
/

dLNk-Agent-V2-Machine2-Training

Runtime error

App Files Files Community

dLNk-Agent-V2-Machine2-Training / app.py

aiagentdlnk

Upload app.py with huggingface_hub

b8009cb verified about 2 months ago

raw

history blame contribute delete

12 kB

	#!/usr/bin/env python3
	"""
	dLNk Agent V.2 - Machine 2 Training Application
	Stage 2: Agent SFT (Supervised Fine-Tuning) - 12 hours
	Stage 4: GRPO - 12 hours (after Stage 3 completes)
	"""

	import os
	import gc
	import json
	import torch
	from datetime import datetime
	from typing import List, Dict

	# HuggingFace imports
	from transformers import (
	AutoModelForCausalLM,
	AutoTokenizer,
	TrainingArguments,
	Trainer,
	DataCollatorForSeq2Seq
	)
	from huggingface_hub import HfApi, login
	from datasets import load_dataset, concatenate_datasets, Dataset
	from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
	from trl import SFTTrainer, SFTConfig

	# ============================================================
	# CONFIGURATION
	# ============================================================

	HF_TOKEN = os.environ.get("HF_TOKEN")
	BASE_MODEL = "NousResearch/Hermes-3-Llama-3.1-70B"
	OUTPUT_MODEL = "dLNk-Agent-V2-SFT-70B"

	# SFT Configuration
	SFT_CONFIG = {
	"max_seq_length": 8192,
	"learning_rate": 2e-5,
	"num_train_epochs": 3,
	"per_device_train_batch_size": 2,
	"gradient_accumulation_steps": 16,
	"warmup_ratio": 0.03,
	"weight_decay": 0.01,
	}

	# Dataset Configuration
	DATASETS = {
	"function_calling": {
	"name": "glaiveai/glaive-function-calling-v2",
	"split": "train",
	"samples": 100000,
	"weight": 0.25
	},
	"general_instruction": {
	"name": "teknium/OpenHermes-2.5",
	"split": "train",
	"samples": 200000,
	"weight": 0.30
	},
	"coding": {
	"name": "cognitivecomputations/dolphin-coder",
	"split": "train",
	"samples": 50000,
	"weight": 0.20
	}
	}

	# ============================================================
	# DATASET PREPARATION
	# ============================================================

	def format_chat_template(example: Dict, tokenizer) -> Dict:
	"""Format example using chat template"""
	messages = []

	# Handle different dataset formats
	if "conversations" in example:
	for conv in example["conversations"]:
	role = "user" if conv.get("from", conv.get("role")) in ["human", "user"] else "assistant"
	content = conv.get("value", conv.get("content", ""))
	messages.append({"role": role, "content": content})
	elif "instruction" in example and "output" in example:
	messages = [
	{"role": "user", "content": example["instruction"]},
	{"role": "assistant", "content": example["output"]}
	]
	elif "prompt" in example and "response" in example:
	messages = [
	{"role": "user", "content": example["prompt"]},
	{"role": "assistant", "content": example["response"]}
	]
	else:
	# Fallback
	messages = [
	{"role": "user", "content": str(example.get("input", ""))},
	{"role": "assistant", "content": str(example.get("output", ""))}
	]

	# Apply chat template
	text = tokenizer.apply_chat_template(
	messages,
	tokenize=False,
	add_generation_prompt=False
	)

	return {"text": text}

	def load_and_prepare_datasets(tokenizer) -> Dataset:
	"""Load and prepare all training datasets"""
	print("\n📥 Loading datasets...")

	all_datasets = []

	for name, config in DATASETS.items():
	print(f" Loading {name}: {config['name']}...")

	try:
	ds = load_dataset(config["name"], split=config["split"])

	# Sample if needed
	if len(ds) > config["samples"]:
	ds = ds.shuffle(seed=42).select(range(config["samples"]))

	# Format dataset
	ds = ds.map(
	lambda x: format_chat_template(x, tokenizer),
	remove_columns=ds.column_names,
	num_proc=4
	)

	all_datasets.append(ds)
	print(f" ✅ Loaded {len(ds)} samples")

	except Exception as e:
	print(f" ⚠️ Failed to load {name}: {e}")

	# Concatenate all datasets
	combined = concatenate_datasets(all_datasets)
	combined = combined.shuffle(seed=42)

	print(f"\n✅ Total training samples: {len(combined)}")

	return combined

	# ============================================================
	# AGENT-SPECIFIC DATA GENERATION
	# ============================================================

	def generate_agent_training_data() -> List[Dict]:
	"""Generate agent-specific training examples"""

	agent_examples = [
	# Tool use examples
	{
	"messages": [
	{"role": "user", "content": "Search for the latest news about AI developments"},
	{"role": "assistant", "content": """I'll search for the latest AI news for you.

	<tool_call>
	{"name": "web_search", "arguments": {"query": "latest AI developments news 2024"}}
	</tool_call>

	Based on the search results, here are the latest AI developments:

	1. OpenAI GPT-5 Announcement: OpenAI has announced plans for their next-generation model...
	2. Google Gemini Updates: Google has released significant updates to their Gemini model...
	3. Open Source AI Progress: Meta and other companies continue to advance open-source AI...

	Would you like me to search for more specific information about any of these topics?"""}
	]
	},
	# Code execution examples
	{
	"messages": [
	{"role": "user", "content": "Calculate the compound interest for $10,000 at 5% for 10 years"},
	{"role": "assistant", "content": """I'll calculate the compound interest for you.

	<code_execution>
	principal = 10000
	rate = 0.05
	time = 10
	n = 12 # Monthly compounding

	amount = principal * (1 + rate/n) ** (n * time)
	interest = amount - principal

	print(f"Principal: ${principal:,.2f}")
	print(f"Final Amount: ${amount:,.2f}")
	print(f"Interest Earned: ${interest:,.2f}")
	</code_execution>

	Results:
	- Principal: $10,000.00
	- Final Amount: $16,470.09
	- Interest Earned: $6,470.09

	With monthly compounding at 5% annual interest rate over 10 years, your $10,000 investment would grow to $16,470.09."""}
	]
	},
	# Planning examples
	{
	"messages": [
	{"role": "user", "content": "Help me plan a trip to Japan for 2 weeks"},
	{"role": "assistant", "content": """I'd be happy to help you plan a 2-week trip to Japan! Let me create a comprehensive plan.

	Phase 1: Pre-Trip Planning
	1. Best time to visit: Spring (March-May) for cherry blossoms or Fall (September-November) for autumn colors
	2. Visa requirements: Check if you need a tourist visa based on your nationality
	3. Budget estimation: Approximately $150-300/day depending on travel style

	Phase 2: Itinerary Overview

	Week 1: Tokyo & Surroundings
	- Days 1-3: Tokyo (Shibuya, Shinjuku, Akihabara, Senso-ji Temple)
	- Days 4-5: Day trips (Mt. Fuji, Nikko, or Kamakura)
	- Days 6-7: Tokyo continued (Harajuku, Ginza, TeamLab)

	Week 2: Kyoto, Osaka & Beyond
	- Days 8-10: Kyoto (Fushimi Inari, Kinkaku-ji, Arashiyama)
	- Days 11-12: Osaka (Dotonbori, Osaka Castle, food tour)
	- Days 13-14: Nara or Hiroshima day trip, return to Tokyo

	Phase 3: Logistics
	- JR Pass: Highly recommended for this itinerary (~$280 for 14 days)
	- Accommodation: Mix of hotels and traditional ryokan
	- Pocket WiFi or SIM card for connectivity

	Would you like me to elaborate on any specific part of this plan?"""}
	]
	}
	]

	return agent_examples

	# ============================================================
	# MAIN TRAINING PIPELINE
	# ============================================================

	def log_status(message: str, status_file: str = "/app/status.json"):
	"""Log status to file for monitoring"""
	timestamp = datetime.now().isoformat()
	status = {
	"timestamp": timestamp,
	"message": message,
	"machine": "machine2",
	"stage": "agent_sft"
	}

	with open(status_file, "w") as f:
	json.dump(status, f, indent=2)

	print(f"[{timestamp}] {message}")

	def main():
	print("""
	╔══════════════════════════════════════════════════════════════╗
	║ dLNk Agent V.2 - Machine 2: Agent SFT Training ║
	╚══════════════════════════════════════════════════════════════╝
	""")

	# Login to HuggingFace
	login(token=HF_TOKEN)
	log_status("Logged in to HuggingFace")

	# Load tokenizer first
	log_status(f"Loading tokenizer: {BASE_MODEL}")
	tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
	tokenizer.pad_token = tokenizer.eos_token
	tokenizer.padding_side = "right"

	# Load and prepare datasets
	train_dataset = load_and_prepare_datasets(tokenizer)

	# Load model
	log_status(f"Loading model: {BASE_MODEL}")

	model = AutoModelForCausalLM.from_pretrained(
	BASE_MODEL,
	torch_dtype=torch.bfloat16,
	device_map="auto",
	trust_remote_code=True,
	attn_implementation="flash_attention_2"
	)

	log_status(f"Model loaded: {model.num_parameters() / 1e9:.2f}B parameters")

	# Configure training
	training_args = SFTConfig(
	output_dir=f"/app/{OUTPUT_MODEL}",
	max_seq_length=SFT_CONFIG["max_seq_length"],
	learning_rate=SFT_CONFIG["learning_rate"],
	num_train_epochs=SFT_CONFIG["num_train_epochs"],
	per_device_train_batch_size=SFT_CONFIG["per_device_train_batch_size"],
	gradient_accumulation_steps=SFT_CONFIG["gradient_accumulation_steps"],
	warmup_ratio=SFT_CONFIG["warmup_ratio"],
	weight_decay=SFT_CONFIG["weight_decay"],
	bf16=True,
	tf32=True,
	gradient_checkpointing=True,
	logging_steps=10,
	save_steps=500,
	save_total_limit=3,
	push_to_hub=True,
	hub_model_id=OUTPUT_MODEL,
	hub_token=HF_TOKEN,
	hub_private_repo=True,
	report_to="tensorboard",
	)

	# Initialize trainer
	trainer = SFTTrainer(
	model=model,
	args=training_args,
	train_dataset=train_dataset,
	tokenizer=tokenizer,
	dataset_text_field="text",
	packing=True,
	)

	# Start training
	log_status("Starting SFT training...")
	trainer.train()

	# Save final model
	log_status("Saving final model...")
	trainer.save_model()
	tokenizer.save_pretrained(f"/app/{OUTPUT_MODEL}")

	# Push to hub
	log_status("Pushing model to HuggingFace Hub...")
	trainer.push_to_hub()

	# Mark completion
	completion_status = {
	"status": "complete",
	"stage": "Stage 2: Agent SFT",
	"model": OUTPUT_MODEL,
	"timestamp": datetime.now().isoformat(),
	"next_stage": "Stage 4: GRPO (waiting for Stage 3)"
	}

	with open("/app/stage2_complete.json", "w") as f:
	json.dump(completion_status, f, indent=2)

	log_status("✅ Stage 2: Agent SFT COMPLETE!")

	print(f"""
	╔══════════════════════════════════════════════════════════════╗
	║ STAGE 2 COMPLETE! ║
	║ Model: {OUTPUT_MODEL}
	║ Waiting for Stage 3 to complete before starting Stage 4... ║
	╚══════════════════════════════════════════════════════════════╝
	""")

	if __name__ == "__main__":
	main()