Spaces:
Runtime error
Runtime error
| #!/usr/bin/env python3 | |
| """ | |
| dLNk Agent V.2 - Machine 2 Training Application | |
| Stage 2: Agent SFT (Supervised Fine-Tuning) - 12 hours | |
| Stage 4: GRPO - 12 hours (after Stage 3 completes) | |
| """ | |
| import os | |
| import gc | |
| import json | |
| import torch | |
| from datetime import datetime | |
| from typing import List, Dict | |
| # HuggingFace imports | |
| from transformers import ( | |
| AutoModelForCausalLM, | |
| AutoTokenizer, | |
| TrainingArguments, | |
| Trainer, | |
| DataCollatorForSeq2Seq | |
| ) | |
| from huggingface_hub import HfApi, login | |
| from datasets import load_dataset, concatenate_datasets, Dataset | |
| from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training | |
| from trl import SFTTrainer, SFTConfig | |
| # ============================================================ | |
| # CONFIGURATION | |
| # ============================================================ | |
| HF_TOKEN = os.environ.get("HF_TOKEN") | |
| BASE_MODEL = "NousResearch/Hermes-3-Llama-3.1-70B" | |
| OUTPUT_MODEL = "dLNk-Agent-V2-SFT-70B" | |
| # SFT Configuration | |
| SFT_CONFIG = { | |
| "max_seq_length": 8192, | |
| "learning_rate": 2e-5, | |
| "num_train_epochs": 3, | |
| "per_device_train_batch_size": 2, | |
| "gradient_accumulation_steps": 16, | |
| "warmup_ratio": 0.03, | |
| "weight_decay": 0.01, | |
| } | |
| # Dataset Configuration | |
| DATASETS = { | |
| "function_calling": { | |
| "name": "glaiveai/glaive-function-calling-v2", | |
| "split": "train", | |
| "samples": 100000, | |
| "weight": 0.25 | |
| }, | |
| "general_instruction": { | |
| "name": "teknium/OpenHermes-2.5", | |
| "split": "train", | |
| "samples": 200000, | |
| "weight": 0.30 | |
| }, | |
| "coding": { | |
| "name": "cognitivecomputations/dolphin-coder", | |
| "split": "train", | |
| "samples": 50000, | |
| "weight": 0.20 | |
| } | |
| } | |
| # ============================================================ | |
| # DATASET PREPARATION | |
| # ============================================================ | |
| def format_chat_template(example: Dict, tokenizer) -> Dict: | |
| """Format example using chat template""" | |
| messages = [] | |
| # Handle different dataset formats | |
| if "conversations" in example: | |
| for conv in example["conversations"]: | |
| role = "user" if conv.get("from", conv.get("role")) in ["human", "user"] else "assistant" | |
| content = conv.get("value", conv.get("content", "")) | |
| messages.append({"role": role, "content": content}) | |
| elif "instruction" in example and "output" in example: | |
| messages = [ | |
| {"role": "user", "content": example["instruction"]}, | |
| {"role": "assistant", "content": example["output"]} | |
| ] | |
| elif "prompt" in example and "response" in example: | |
| messages = [ | |
| {"role": "user", "content": example["prompt"]}, | |
| {"role": "assistant", "content": example["response"]} | |
| ] | |
| else: | |
| # Fallback | |
| messages = [ | |
| {"role": "user", "content": str(example.get("input", ""))}, | |
| {"role": "assistant", "content": str(example.get("output", ""))} | |
| ] | |
| # Apply chat template | |
| text = tokenizer.apply_chat_template( | |
| messages, | |
| tokenize=False, | |
| add_generation_prompt=False | |
| ) | |
| return {"text": text} | |
| def load_and_prepare_datasets(tokenizer) -> Dataset: | |
| """Load and prepare all training datasets""" | |
| print("\nπ₯ Loading datasets...") | |
| all_datasets = [] | |
| for name, config in DATASETS.items(): | |
| print(f" Loading {name}: {config['name']}...") | |
| try: | |
| ds = load_dataset(config["name"], split=config["split"]) | |
| # Sample if needed | |
| if len(ds) > config["samples"]: | |
| ds = ds.shuffle(seed=42).select(range(config["samples"])) | |
| # Format dataset | |
| ds = ds.map( | |
| lambda x: format_chat_template(x, tokenizer), | |
| remove_columns=ds.column_names, | |
| num_proc=4 | |
| ) | |
| all_datasets.append(ds) | |
| print(f" β Loaded {len(ds)} samples") | |
| except Exception as e: | |
| print(f" β οΈ Failed to load {name}: {e}") | |
| # Concatenate all datasets | |
| combined = concatenate_datasets(all_datasets) | |
| combined = combined.shuffle(seed=42) | |
| print(f"\nβ Total training samples: {len(combined)}") | |
| return combined | |
| # ============================================================ | |
| # AGENT-SPECIFIC DATA GENERATION | |
| # ============================================================ | |
| def generate_agent_training_data() -> List[Dict]: | |
| """Generate agent-specific training examples""" | |
| agent_examples = [ | |
| # Tool use examples | |
| { | |
| "messages": [ | |
| {"role": "user", "content": "Search for the latest news about AI developments"}, | |
| {"role": "assistant", "content": """I'll search for the latest AI news for you. | |
| <tool_call> | |
| {"name": "web_search", "arguments": {"query": "latest AI developments news 2024"}} | |
| </tool_call> | |
| Based on the search results, here are the latest AI developments: | |
| 1. **OpenAI GPT-5 Announcement**: OpenAI has announced plans for their next-generation model... | |
| 2. **Google Gemini Updates**: Google has released significant updates to their Gemini model... | |
| 3. **Open Source AI Progress**: Meta and other companies continue to advance open-source AI... | |
| Would you like me to search for more specific information about any of these topics?"""} | |
| ] | |
| }, | |
| # Code execution examples | |
| { | |
| "messages": [ | |
| {"role": "user", "content": "Calculate the compound interest for $10,000 at 5% for 10 years"}, | |
| {"role": "assistant", "content": """I'll calculate the compound interest for you. | |
| <code_execution> | |
| principal = 10000 | |
| rate = 0.05 | |
| time = 10 | |
| n = 12 # Monthly compounding | |
| amount = principal * (1 + rate/n) ** (n * time) | |
| interest = amount - principal | |
| print(f"Principal: ${principal:,.2f}") | |
| print(f"Final Amount: ${amount:,.2f}") | |
| print(f"Interest Earned: ${interest:,.2f}") | |
| </code_execution> | |
| **Results:** | |
| - Principal: $10,000.00 | |
| - Final Amount: $16,470.09 | |
| - Interest Earned: $6,470.09 | |
| With monthly compounding at 5% annual interest rate over 10 years, your $10,000 investment would grow to $16,470.09."""} | |
| ] | |
| }, | |
| # Planning examples | |
| { | |
| "messages": [ | |
| {"role": "user", "content": "Help me plan a trip to Japan for 2 weeks"}, | |
| {"role": "assistant", "content": """I'd be happy to help you plan a 2-week trip to Japan! Let me create a comprehensive plan. | |
| **Phase 1: Pre-Trip Planning** | |
| 1. Best time to visit: Spring (March-May) for cherry blossoms or Fall (September-November) for autumn colors | |
| 2. Visa requirements: Check if you need a tourist visa based on your nationality | |
| 3. Budget estimation: Approximately $150-300/day depending on travel style | |
| **Phase 2: Itinerary Overview** | |
| **Week 1: Tokyo & Surroundings** | |
| - Days 1-3: Tokyo (Shibuya, Shinjuku, Akihabara, Senso-ji Temple) | |
| - Days 4-5: Day trips (Mt. Fuji, Nikko, or Kamakura) | |
| - Days 6-7: Tokyo continued (Harajuku, Ginza, TeamLab) | |
| **Week 2: Kyoto, Osaka & Beyond** | |
| - Days 8-10: Kyoto (Fushimi Inari, Kinkaku-ji, Arashiyama) | |
| - Days 11-12: Osaka (Dotonbori, Osaka Castle, food tour) | |
| - Days 13-14: Nara or Hiroshima day trip, return to Tokyo | |
| **Phase 3: Logistics** | |
| - JR Pass: Highly recommended for this itinerary (~$280 for 14 days) | |
| - Accommodation: Mix of hotels and traditional ryokan | |
| - Pocket WiFi or SIM card for connectivity | |
| Would you like me to elaborate on any specific part of this plan?"""} | |
| ] | |
| } | |
| ] | |
| return agent_examples | |
| # ============================================================ | |
| # MAIN TRAINING PIPELINE | |
| # ============================================================ | |
| def log_status(message: str, status_file: str = "/app/status.json"): | |
| """Log status to file for monitoring""" | |
| timestamp = datetime.now().isoformat() | |
| status = { | |
| "timestamp": timestamp, | |
| "message": message, | |
| "machine": "machine2", | |
| "stage": "agent_sft" | |
| } | |
| with open(status_file, "w") as f: | |
| json.dump(status, f, indent=2) | |
| print(f"[{timestamp}] {message}") | |
| def main(): | |
| print(""" | |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| β dLNk Agent V.2 - Machine 2: Agent SFT Training β | |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| """) | |
| # Login to HuggingFace | |
| login(token=HF_TOKEN) | |
| log_status("Logged in to HuggingFace") | |
| # Load tokenizer first | |
| log_status(f"Loading tokenizer: {BASE_MODEL}") | |
| tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL) | |
| tokenizer.pad_token = tokenizer.eos_token | |
| tokenizer.padding_side = "right" | |
| # Load and prepare datasets | |
| train_dataset = load_and_prepare_datasets(tokenizer) | |
| # Load model | |
| log_status(f"Loading model: {BASE_MODEL}") | |
| model = AutoModelForCausalLM.from_pretrained( | |
| BASE_MODEL, | |
| torch_dtype=torch.bfloat16, | |
| device_map="auto", | |
| trust_remote_code=True, | |
| attn_implementation="flash_attention_2" | |
| ) | |
| log_status(f"Model loaded: {model.num_parameters() / 1e9:.2f}B parameters") | |
| # Configure training | |
| training_args = SFTConfig( | |
| output_dir=f"/app/{OUTPUT_MODEL}", | |
| max_seq_length=SFT_CONFIG["max_seq_length"], | |
| learning_rate=SFT_CONFIG["learning_rate"], | |
| num_train_epochs=SFT_CONFIG["num_train_epochs"], | |
| per_device_train_batch_size=SFT_CONFIG["per_device_train_batch_size"], | |
| gradient_accumulation_steps=SFT_CONFIG["gradient_accumulation_steps"], | |
| warmup_ratio=SFT_CONFIG["warmup_ratio"], | |
| weight_decay=SFT_CONFIG["weight_decay"], | |
| bf16=True, | |
| tf32=True, | |
| gradient_checkpointing=True, | |
| logging_steps=10, | |
| save_steps=500, | |
| save_total_limit=3, | |
| push_to_hub=True, | |
| hub_model_id=OUTPUT_MODEL, | |
| hub_token=HF_TOKEN, | |
| hub_private_repo=True, | |
| report_to="tensorboard", | |
| ) | |
| # Initialize trainer | |
| trainer = SFTTrainer( | |
| model=model, | |
| args=training_args, | |
| train_dataset=train_dataset, | |
| tokenizer=tokenizer, | |
| dataset_text_field="text", | |
| packing=True, | |
| ) | |
| # Start training | |
| log_status("Starting SFT training...") | |
| trainer.train() | |
| # Save final model | |
| log_status("Saving final model...") | |
| trainer.save_model() | |
| tokenizer.save_pretrained(f"/app/{OUTPUT_MODEL}") | |
| # Push to hub | |
| log_status("Pushing model to HuggingFace Hub...") | |
| trainer.push_to_hub() | |
| # Mark completion | |
| completion_status = { | |
| "status": "complete", | |
| "stage": "Stage 2: Agent SFT", | |
| "model": OUTPUT_MODEL, | |
| "timestamp": datetime.now().isoformat(), | |
| "next_stage": "Stage 4: GRPO (waiting for Stage 3)" | |
| } | |
| with open("/app/stage2_complete.json", "w") as f: | |
| json.dump(completion_status, f, indent=2) | |
| log_status("β Stage 2: Agent SFT COMPLETE!") | |
| print(f""" | |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| β STAGE 2 COMPLETE! β | |
| β Model: {OUTPUT_MODEL} | |
| β Waiting for Stage 3 to complete before starting Stage 4... β | |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| """) | |
| if __name__ == "__main__": | |
| main() | |