stmasson
/

training-scripts

Model card Files Files and versions

xet

Community

stmasson commited on Dec 25, 2025

Commit

285df1b

verified ·

1 Parent(s): e81fa0f

Upload scripts/train_alizee_v2_stage3_merge.py with huggingface_hub

Browse files

Files changed (1) hide show

scripts/train_alizee_v2_stage3_merge.py +222 -0

scripts/train_alizee_v2_stage3_merge.py ADDED Viewed

	@@ -0,0 +1,222 @@

+#!/usr/bin/env python3
+# /// script
+# dependencies = [
+#     "peft>=0.14.0",
+#     "transformers>=4.48.0",
+#     "accelerate>=0.35.0",
+#     "torch>=2.2.0",
+#     "huggingface_hub>=0.25.0",
+# ]
+# ///
+"""
+Stage 3: Adapter Merging and Final Model Publication
+Merges LoRA adapters from Stage 1 (and optionally Stage 2) into the base model
+and pushes the final merged model as alizee-coder-devstral-2-small.
+Options:
+1. Merge Stage 1 only (if skipping DPO)
+2. Merge Stage 1 + Stage 2 (if DPO was applied)
+"""
+import os
+import torch
+from peft import PeftModel, AutoPeftModelForCausalLM
+from transformers import AutoTokenizer, AutoModelForCausalLM
+from huggingface_hub import HfApi, create_repo
+# Configuration
+BASE_MODEL = "mistralai/Devstral-Small-2505"
+STAGE1_MODEL = "stmasson/alizee-coder-devstral-2-small-stage1"
+STAGE2_MODEL = "stmasson/alizee-coder-devstral-2-small-stage2"  # Optional
+FINAL_REPO = "stmasson/alizee-coder-devstral-2-small"
+# Set this based on whether you ran Stage 2
+USE_STAGE2 = os.environ.get("USE_STAGE2", "false").lower() == "true"
+print("=" * 60)
+print("Stage 3: Adapter Merging and Final Model Publication")
+print("=" * 60)
+print(f"Base model: {BASE_MODEL}")
+print(f"Stage 1 adapter: {STAGE1_MODEL}")
+print(f"Stage 2 adapter: {STAGE2_MODEL if USE_STAGE2 else 'SKIPPED'}")
+print(f"Final output: {FINAL_REPO}")
+print("=" * 60)
+# Determine which model to merge
+source_model = STAGE2_MODEL if USE_STAGE2 else STAGE1_MODEL
+print(f"\n🔄 Loading model from: {source_model}")
+# Load tokenizer
+print("\n📝 Loading tokenizer...")
+tokenizer = AutoTokenizer.from_pretrained(source_model, trust_remote_code=True)
+# Load PEFT model and merge
+print("\n🔗 Loading and merging adapters...")
+print("   This may take several minutes for a 24B model...")
+# Method 1: If the model was saved with adapters
+try:
+    model = AutoPeftModelForCausalLM.from_pretrained(
+        source_model,
+        torch_dtype=torch.bfloat16,
+        device_map="auto",
+        trust_remote_code=True,
+    )
+    print("   Merging LoRA weights into base model...")
+    model = model.merge_and_unload()
+    print("   ✓ Adapters merged successfully")
+except Exception as e:
+    print(f"   AutoPeftModel failed: {e}")
+    print("   Trying alternative loading method...")
+    # Method 2: Load base + adapters separately
+    base_model = AutoModelForCausalLM.from_pretrained(
+        BASE_MODEL,
+        torch_dtype=torch.bfloat16,
+        device_map="auto",
+        trust_remote_code=True,
+    )
+    model = PeftModel.from_pretrained(
+        base_model,
+        source_model,
+        torch_dtype=torch.bfloat16,
+    )
+    print("   Merging LoRA weights into base model...")
+    model = model.merge_and_unload()
+    print("   ✓ Adapters merged successfully")
+# Create output repository
+print(f"\n📁 Creating repository: {FINAL_REPO}")
+api = HfApi()
+try:
+    create_repo(FINAL_REPO, repo_type="model", exist_ok=True)
+except Exception as e:
+    print(f"   Repository exists or error: {e}")
+# Push to Hub
+print(f"\n💾 Pushing merged model to Hub...")
+print("   This will take a while for a 24B model...")
+model.push_to_hub(
+    FINAL_REPO,
+    commit_message="Alizee-Coder-Devstral-2-Small: Reasoning-enhanced coding model",
+    safe_serialization=True,
+)
+tokenizer.push_to_hub(
+    FINAL_REPO,
+    commit_message="Add tokenizer",
+)
+# Create model card
+print("\n📄 Creating model card...")
+model_card = f"""---
+license: apache-2.0
+base_model: {BASE_MODEL}
+tags:
+  - code
+  - reasoning
+  - devstral
+  - fine-tuned
+  - qlora
+datasets:
+  - nvidia/OpenCodeReasoning
+  - bigcode/starcoderdata
+  - {"RLHFlow/CodeUltraFeedback-standard" if USE_STAGE2 else ""}
+pipeline_tag: text-generation
+---
+# Alizee-Coder-Devstral-2-Small
+A reasoning-enhanced coding model fine-tuned from [stmasson/alizee-coder-devstral-1-small](https://huggingface.co/stmasson/alizee-coder-devstral-1-small).
+## Training Pipeline
+This model was trained using a {"three" if USE_STAGE2 else "two"}-stage approach:
+### Stage 1: Reasoning Distillation via SFT
+- **Dataset**: nvidia/OpenCodeReasoning (736K samples) + bigcode/starcoderdata (15% mix)
+- **Method**: QLoRA (r=64, alpha=128)
+- **Config**: lr=5e-5, batch_size=256, epochs=2, warmup=5%, cosine scheduler
+- **Context**: 32K tokens
+{"### Stage 2: Light DPO Refresh" if USE_STAGE2 else ""}
+{f"- **Dataset**: RLHFlow/CodeUltraFeedback-standard" if USE_STAGE2 else ""}
+{f"- **Method**: Conservative DPO (beta=0.1, lr=5e-6)" if USE_STAGE2 else ""}
+{f"- **Purpose**: Restore alignment after reasoning SFT" if USE_STAGE2 else ""}
+### Stage 3: Adapter Merging
+- Merged LoRA adapters into base model
+- Full precision model for inference
+## Model Details
+| Parameter | Value |
+|-----------|-------|
+| **Base Model** | mistralai/Devstral-Small-2505 |
+| **Parameters** | ~24B |
+| **Architecture** | Mistral Small |
+| **Context Length** | 32K tokens |
+| **Training Data** | ~860K samples (736K reasoning + 124K coding) |
+## Usage
+```python
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained(
+    "stmasson/alizee-coder-devstral-2-small",
+    torch_dtype="auto",
+    device_map="auto",
+)
+tokenizer = AutoTokenizer.from_pretrained("stmasson/alizee-coder-devstral-2-small")
+messages = [
+    {{"role": "user", "content": "Solve this problem step by step: Find the longest palindromic substring."}}
+]
+inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to(model.device)
+outputs = model.generate(inputs, max_new_tokens=2048)
+print(tokenizer.decode(outputs[0], skip_special_tokens=True))
+```
+## Training Methodology
+Based on NVIDIA's OpenCodeReasoning research findings:
+- Performance improves linearly from 25K to 736K samples
+- Execution filtering is crucial - only solutions that pass tests
+- Data mixing (85% reasoning, 15% coding) preserves capabilities
+- QLoRA enables efficient training of 24B models
+## Citation
+```bibtex
+@misc{{alizee-coder-v2,
+  author = {{stmasson}},
+  title = {{Alizee-Coder-Devstral-2-Small}},
+  year = {{2025}},
+  publisher = {{Hugging Face}},
+  howpublished = {{\\url{{https://huggingface.co/stmasson/alizee-coder-devstral-2-small}}}}
+}}
+```
+"""
+api.upload_file(
+    path_or_fileobj=model_card.encode(),
+    path_in_repo="README.md",
+    repo_id=FINAL_REPO,
+    repo_type="model",
+)
+print("\n" + "=" * 60)
+print("✅ Stage 3 Complete!")
+print(f"   Final model: https://huggingface.co/{FINAL_REPO}")
+print("=" * 60)
+print("\n🎯 Recommended next steps:")
+print("   1. Evaluate on LiveCodeBench, HumanEval, SWE-Bench")
+print("   2. Compare with v1 baseline")
+print("   3. Test reasoning quality on sample problems")
+print("=" * 60)