Upload folder using huggingface_hub

Browse files

Files changed (6) hide show

WELCOME +15 -0
autorun.sh +43 -0
config.yaml +63 -0
config_template.yaml +29 -0
configure.py +126 -0
requirements.txt +1 -0

WELCOME ADDED Viewed

	@@ -0,0 +1,15 @@

+────────────────────────────────────
+🚀 WELCOME TO RUNPOD FINE-TUNING! 🤖
+────────────────────────────────────
+You've successfully configured your training environment! 🎉
+💡 Next Steps: /workspace/fine-tuning/
+1️⃣  Familiarize yourself with the examples/ and outputs/ directories.
+2️⃣  Carefully review your config.yaml settings, verifying both format and values. As a best practice, ensure that all hyperparameters are tuned according to your specific use case to prevent potential errors.
+3️⃣  Start fine-tuning when you're ready with `axolotl train config.yaml`
+────────────────────────────────────
+✨ POWERED BY AXOLOTL 🦎
+────────────────────────────────────
+📄 Documentation: https://axolotl-ai-cloud.github.io/axolotl/docs/config.html

autorun.sh ADDED Viewed

	@@ -0,0 +1,43 @@

+#!/bin/bash
+set -e  # Exit script on first error
+sleep 5 # Wait for the pod to fully start
+if [ -n "$RUNPOD_POD_ID" ]; then
+    if [ ! -L "examples" ]; then
+        echo "📦 Linking examples folder..."
+        ln -s /workspace/axolotl/examples .
+    fi
+    if [ -n "$HF_TOKEN" ]; then
+        echo "🔑 Logging in to Hugging Face..."
+        huggingface-cli login --token "$HF_TOKEN" --add-to-git-credential
+    else
+        echo "⚠️ Warning: HF_TOKEN is not set. Skipping Hugging Face login."
+    fi
+    if [ ! -L "outputs" ]; then
+        echo "📦 Linking outputs folder..."
+        ln -s /workspace/data/axolotl-artifacts .
+        mv axolotl-artifacts outputs
+    fi
+else
+    if [ ! -d "outputs" ]; then
+        echo "📦 Creating outputs folder..."
+        mkdir outputs
+    fi
+fi
+# check if any env var starting with "AXOLOTL_" is set
+if [ -n "$(env | grep '^AXOLOTL_')" ]; then
+    echo "⌛ Preparing..."
+    if ! python3 configure.py --template config_template.yaml --output config.yaml; then
+        echo "❌ Configuration failed!"
+    fi
+fi
+# show message of the day at the Pod logs
+cat /etc/motd
+# Keeps the container running
+sleep infinity

config.yaml ADDED Viewed

	@@ -0,0 +1,63 @@

+adapter: lora
+base_model: meta-llama/Llama-3.1-8B-Instruct
+bf16: auto
+dataset_processes: 32
+datasets:
+- message_property_mappings:
+    content: content
+    role: role
+  path: Jammies-io/livestockllama
+  trust_remote_code: false
+gradient_accumulation_steps: 1
+gradient_checkpointing: false
+learning_rate: 0.0002
+lisa_layers_attribute: model.layers
+load_best_model_at_end: false
+load_in_4bit: false
+load_in_8bit: true
+lora_alpha: 16
+lora_dropout: 0.05
+lora_r: 8
+lora_target_modules:
+- q_proj
+- v_proj
+- k_proj
+- o_proj
+- gate_proj
+- down_proj
+- up_proj
+loraplus_lr_embedding: 1.0e-06
+lr_scheduler: cosine
+max_prompt_len: 512
+mean_resizing_embeddings: false
+micro_batch_size: 16
+num_epochs: 1.0
+optimizer: adamw_bnb_8bit
+output_dir: ./outputs/mymodel
+pretrain_multipack_attn: true
+pretrain_multipack_buffer_size: 10000
+qlora_sharded_model_loading: false
+ray_num_workers: 1
+resources_per_worker:
+  GPU: 1
+sample_packing_bin_size: 200
+sample_packing_group_size: 100000
+save_only_model: false
+save_safetensors: true
+sequence_len: 4096
+shuffle_merged_datasets: true
+skip_prepare_dataset: false
+strict: false
+train_on_inputs: false
+trl:
+  log_completions: false
+  ref_model_mixup_alpha: 0.9
+  ref_model_sync_steps: 64
+  sync_ref_model: false
+  use_vllm: false
+  vllm_device: auto
+  vllm_dtype: auto
+  vllm_gpu_memory_utilization: 0.9
+use_ray: false
+val_set_size: 0.0
+weight_decay: 0.0

config_template.yaml ADDED Viewed

	@@ -0,0 +1,29 @@

+base_model: TinyLlama/TinyLlama_v1.1
+datasets:
+  - path: mhenrichsen/alpaca_2k_test
+    type: alpaca
+output_dir: ./outputs/mymodel
+sequence_len: 4096
+adapter: lora
+lora_r: 8
+lora_alpha: 16
+lora_dropout: 0.05
+lora_target_modules:
+  - q_proj
+  - v_proj
+  - k_proj
+  - o_proj
+  - gate_proj
+  - down_proj
+  - up_proj
+gradient_accumulation_steps: 1
+micro_batch_size: 16
+num_epochs: 1
+optimizer: adamw_bnb_8bit
+learning_rate: 0.0002
+load_in_8bit: true
+train_on_inputs: false
+bf16: auto

configure.py ADDED Viewed

	@@ -0,0 +1,126 @@

+import argparse
+from typing import Any, Optional
+import os
+import json
+import yaml
+from axolotl.utils.config.models.input.v0_4_1 import AxolotlInputConfig
+"""
+Example:
+[ENV VARS]
+AXOLOTL_BASE_MODEL=TinyLlama/TinyLlama_v1.1
+AXOLOTL_DATASETS='[{"path":"mhenrichsen/alpaca_2k_test","type":"alpaca"}]'
+AXOLOTL_OUTPUT_DIR=./outputs/my_training
+[Usage]
+config = load_config_with_overrides("config_template.yml")
+save_config(config, "config.yml")
+"""
+DEFAULT_PREFIX = "AXOLOTL_"
+def parse_env_value(value: str) -> Any:
+    """Parse a string value that could be JSON into appropriate Python type."""
+    try:
+        return json.loads(value)
+    except json.JSONDecodeError:
+        return value
+def get_env_override(key: str, prefix: str = "") -> Optional[Any]:
+    """
+    Get environment variable override for a config key.
+    Handles JSON structures for nested configs.
+    """
+    env_key = f"{prefix}{key.upper()}"
+    value = os.environ.get(env_key)
+    if value is None:
+        return None
+    return parse_env_value(value)
+def load_config_with_overrides(
+    config_path: str, env_prefix: str = DEFAULT_PREFIX
+) -> AxolotlInputConfig:
+    """
+    Load and parse the YAML config file, applying any environment variable overrides.
+    Uses the Pydantic AxolotlInputConfig for validation and parsing.
+    Args:
+        config_path: Path to the YAML config file
+        env_prefix: Prefix for environment variables to override config values
+    Returns:
+        AxolotlInputConfig object with merged configuration
+    """
+    # Load base config from YAML
+    if not config_path.startswith("/"):
+        # absolute path
+        config_path = os.path.join(os.path.dirname(__file__), config_path)
+    with open(config_path, "r") as f:
+        print(f"🛠️ Generating from template: {config_path}")
+        config_dict = yaml.safe_load(f)
+    # Get all fields from the Pydantic model
+    model_fields = AxolotlInputConfig.model_fields
+    # Apply environment overrides
+    for field_name in model_fields:
+        if env_value := get_env_override(field_name, env_prefix):
+            config_dict[field_name] = env_value
+    # Create and validate the config
+    return AxolotlInputConfig.model_validate(config_dict)
+def save_config(config: AxolotlInputConfig, output_path: str) -> None:
+    """
+    Save the configuration to a YAML file.
+    """
+    # Convert to dict and remove null values
+    config_dict = config.model_dump(mode="json", exclude_none=True)
+    if not output_path.startswith("/"):
+        # absolute path
+        output_path = os.path.join(os.path.dirname(__file__), output_path)
+    # Ensure output directory exists
+    if output_dir := os.path.dirname(output_path):
+        os.makedirs(output_dir, exist_ok=True)
+    # Save to YAML
+    with open(output_path, "w") as f:
+        yaml.safe_dump(config_dict, f, sort_keys=True, default_flow_style=False)
+    print(f"💾 Saved configuration to: {output_path}")
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Generate an Axolotl training configuration based on the template and environment variables."
+    )
+    parser.add_argument(
+        "--template", type=str, required=True, help="Path to the template YAML file."
+    )
+    parser.add_argument(
+        "--output", type=str, required=True, help="Path to save the output YAML file."
+    )
+    if len(os.sys.argv) == 1:
+        parser.print_help()
+        os.sys.exit(1)
+    args = parser.parse_args()
+    try:
+        config = load_config_with_overrides(args.template)
+        save_config(config, args.output)
+    except Exception as e:
+        print(f"❌ Error processing configuration: {str(e)}")
+        raise

requirements.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ runpod~=1.7.0