Jammies-io commited on
Commit
8086ace
Β·
verified Β·
1 Parent(s): 9a92c67

Upload folder using huggingface_hub

Browse files
Files changed (6) hide show
  1. WELCOME +15 -0
  2. autorun.sh +43 -0
  3. config.yaml +63 -0
  4. config_template.yaml +29 -0
  5. configure.py +126 -0
  6. requirements.txt +1 -0
WELCOME ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ────────────────────────────────────
2
+ πŸš€ WELCOME TO RUNPOD FINE-TUNING! πŸ€–
3
+ ────────────────────────────────────
4
+
5
+ You've successfully configured your training environment! πŸŽ‰
6
+
7
+ πŸ’‘ Next Steps: /workspace/fine-tuning/
8
+ 1️⃣ Familiarize yourself with the examples/ and outputs/ directories.
9
+ 2️⃣ Carefully review your config.yaml settings, verifying both format and values. As a best practice, ensure that all hyperparameters are tuned according to your specific use case to prevent potential errors.
10
+ 3️⃣ Start fine-tuning when you're ready with `axolotl train config.yaml`
11
+
12
+ ────────────────────────────────────
13
+ ✨ POWERED BY AXOLOTL 🦎
14
+ ────────────────────────────────────
15
+ πŸ“„ Documentation: https://axolotl-ai-cloud.github.io/axolotl/docs/config.html
autorun.sh ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ set -e # Exit script on first error
3
+ sleep 5 # Wait for the pod to fully start
4
+
5
+ if [ -n "$RUNPOD_POD_ID" ]; then
6
+ if [ ! -L "examples" ]; then
7
+ echo "πŸ“¦ Linking examples folder..."
8
+ ln -s /workspace/axolotl/examples .
9
+ fi
10
+
11
+ if [ -n "$HF_TOKEN" ]; then
12
+ echo "πŸ”‘ Logging in to Hugging Face..."
13
+ huggingface-cli login --token "$HF_TOKEN" --add-to-git-credential
14
+ else
15
+ echo "⚠️ Warning: HF_TOKEN is not set. Skipping Hugging Face login."
16
+ fi
17
+
18
+ if [ ! -L "outputs" ]; then
19
+ echo "πŸ“¦ Linking outputs folder..."
20
+ ln -s /workspace/data/axolotl-artifacts .
21
+ mv axolotl-artifacts outputs
22
+ fi
23
+ else
24
+ if [ ! -d "outputs" ]; then
25
+ echo "πŸ“¦ Creating outputs folder..."
26
+ mkdir outputs
27
+ fi
28
+ fi
29
+
30
+ # check if any env var starting with "AXOLOTL_" is set
31
+ if [ -n "$(env | grep '^AXOLOTL_')" ]; then
32
+ echo "βŒ› Preparing..."
33
+
34
+ if ! python3 configure.py --template config_template.yaml --output config.yaml; then
35
+ echo "❌ Configuration failed!"
36
+ fi
37
+ fi
38
+
39
+ # show message of the day at the Pod logs
40
+ cat /etc/motd
41
+
42
+ # Keeps the container running
43
+ sleep infinity
config.yaml ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ adapter: lora
2
+ base_model: meta-llama/Llama-3.1-8B-Instruct
3
+ bf16: auto
4
+ dataset_processes: 32
5
+ datasets:
6
+ - message_property_mappings:
7
+ content: content
8
+ role: role
9
+ path: Jammies-io/livestockllama
10
+ trust_remote_code: false
11
+ gradient_accumulation_steps: 1
12
+ gradient_checkpointing: false
13
+ learning_rate: 0.0002
14
+ lisa_layers_attribute: model.layers
15
+ load_best_model_at_end: false
16
+ load_in_4bit: false
17
+ load_in_8bit: true
18
+ lora_alpha: 16
19
+ lora_dropout: 0.05
20
+ lora_r: 8
21
+ lora_target_modules:
22
+ - q_proj
23
+ - v_proj
24
+ - k_proj
25
+ - o_proj
26
+ - gate_proj
27
+ - down_proj
28
+ - up_proj
29
+ loraplus_lr_embedding: 1.0e-06
30
+ lr_scheduler: cosine
31
+ max_prompt_len: 512
32
+ mean_resizing_embeddings: false
33
+ micro_batch_size: 16
34
+ num_epochs: 1.0
35
+ optimizer: adamw_bnb_8bit
36
+ output_dir: ./outputs/mymodel
37
+ pretrain_multipack_attn: true
38
+ pretrain_multipack_buffer_size: 10000
39
+ qlora_sharded_model_loading: false
40
+ ray_num_workers: 1
41
+ resources_per_worker:
42
+ GPU: 1
43
+ sample_packing_bin_size: 200
44
+ sample_packing_group_size: 100000
45
+ save_only_model: false
46
+ save_safetensors: true
47
+ sequence_len: 4096
48
+ shuffle_merged_datasets: true
49
+ skip_prepare_dataset: false
50
+ strict: false
51
+ train_on_inputs: false
52
+ trl:
53
+ log_completions: false
54
+ ref_model_mixup_alpha: 0.9
55
+ ref_model_sync_steps: 64
56
+ sync_ref_model: false
57
+ use_vllm: false
58
+ vllm_device: auto
59
+ vllm_dtype: auto
60
+ vllm_gpu_memory_utilization: 0.9
61
+ use_ray: false
62
+ val_set_size: 0.0
63
+ weight_decay: 0.0
config_template.yaml ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: TinyLlama/TinyLlama_v1.1
2
+ datasets:
3
+ - path: mhenrichsen/alpaca_2k_test
4
+ type: alpaca
5
+ output_dir: ./outputs/mymodel
6
+
7
+ sequence_len: 4096
8
+ adapter: lora
9
+
10
+ lora_r: 8
11
+ lora_alpha: 16
12
+ lora_dropout: 0.05
13
+ lora_target_modules:
14
+ - q_proj
15
+ - v_proj
16
+ - k_proj
17
+ - o_proj
18
+ - gate_proj
19
+ - down_proj
20
+ - up_proj
21
+
22
+ gradient_accumulation_steps: 1
23
+ micro_batch_size: 16
24
+ num_epochs: 1
25
+ optimizer: adamw_bnb_8bit
26
+ learning_rate: 0.0002
27
+ load_in_8bit: true
28
+ train_on_inputs: false
29
+ bf16: auto
configure.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ from typing import Any, Optional
3
+ import os
4
+ import json
5
+ import yaml
6
+ from axolotl.utils.config.models.input.v0_4_1 import AxolotlInputConfig
7
+
8
+ """
9
+ Example:
10
+
11
+ [ENV VARS]
12
+ AXOLOTL_BASE_MODEL=TinyLlama/TinyLlama_v1.1
13
+ AXOLOTL_DATASETS='[{"path":"mhenrichsen/alpaca_2k_test","type":"alpaca"}]'
14
+ AXOLOTL_OUTPUT_DIR=./outputs/my_training
15
+
16
+ [Usage]
17
+ config = load_config_with_overrides("config_template.yml")
18
+ save_config(config, "config.yml")
19
+ """
20
+
21
+ DEFAULT_PREFIX = "AXOLOTL_"
22
+
23
+
24
+ def parse_env_value(value: str) -> Any:
25
+ """Parse a string value that could be JSON into appropriate Python type."""
26
+ try:
27
+ return json.loads(value)
28
+ except json.JSONDecodeError:
29
+ return value
30
+
31
+
32
+ def get_env_override(key: str, prefix: str = "") -> Optional[Any]:
33
+ """
34
+ Get environment variable override for a config key.
35
+ Handles JSON structures for nested configs.
36
+ """
37
+ env_key = f"{prefix}{key.upper()}"
38
+ value = os.environ.get(env_key)
39
+
40
+ if value is None:
41
+ return None
42
+
43
+ return parse_env_value(value)
44
+
45
+
46
+ def load_config_with_overrides(
47
+ config_path: str, env_prefix: str = DEFAULT_PREFIX
48
+ ) -> AxolotlInputConfig:
49
+ """
50
+ Load and parse the YAML config file, applying any environment variable overrides.
51
+ Uses the Pydantic AxolotlInputConfig for validation and parsing.
52
+
53
+ Args:
54
+ config_path: Path to the YAML config file
55
+ env_prefix: Prefix for environment variables to override config values
56
+
57
+ Returns:
58
+ AxolotlInputConfig object with merged configuration
59
+ """
60
+ # Load base config from YAML
61
+ if not config_path.startswith("/"):
62
+ # absolute path
63
+ config_path = os.path.join(os.path.dirname(__file__), config_path)
64
+
65
+ with open(config_path, "r") as f:
66
+ print(f"πŸ› οΈ Generating from template: {config_path}")
67
+ config_dict = yaml.safe_load(f)
68
+
69
+ # Get all fields from the Pydantic model
70
+ model_fields = AxolotlInputConfig.model_fields
71
+
72
+ # Apply environment overrides
73
+ for field_name in model_fields:
74
+ if env_value := get_env_override(field_name, env_prefix):
75
+ config_dict[field_name] = env_value
76
+
77
+ # Create and validate the config
78
+ return AxolotlInputConfig.model_validate(config_dict)
79
+
80
+
81
+ def save_config(config: AxolotlInputConfig, output_path: str) -> None:
82
+ """
83
+ Save the configuration to a YAML file.
84
+ """
85
+ # Convert to dict and remove null values
86
+ config_dict = config.model_dump(mode="json", exclude_none=True)
87
+
88
+ if not output_path.startswith("/"):
89
+ # absolute path
90
+ output_path = os.path.join(os.path.dirname(__file__), output_path)
91
+
92
+ # Ensure output directory exists
93
+ if output_dir := os.path.dirname(output_path):
94
+ os.makedirs(output_dir, exist_ok=True)
95
+
96
+ # Save to YAML
97
+ with open(output_path, "w") as f:
98
+ yaml.safe_dump(config_dict, f, sort_keys=True, default_flow_style=False)
99
+
100
+ print(f"πŸ’Ύ Saved configuration to: {output_path}")
101
+
102
+
103
+ if __name__ == "__main__":
104
+ parser = argparse.ArgumentParser(
105
+ description="Generate an Axolotl training configuration based on the template and environment variables."
106
+ )
107
+ parser.add_argument(
108
+ "--template", type=str, required=True, help="Path to the template YAML file."
109
+ )
110
+ parser.add_argument(
111
+ "--output", type=str, required=True, help="Path to save the output YAML file."
112
+ )
113
+
114
+ if len(os.sys.argv) == 1:
115
+ parser.print_help()
116
+ os.sys.exit(1)
117
+
118
+ args = parser.parse_args()
119
+
120
+ try:
121
+ config = load_config_with_overrides(args.template)
122
+ save_config(config, args.output)
123
+
124
+ except Exception as e:
125
+ print(f"❌ Error processing configuration: {str(e)}")
126
+ raise
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ runpod~=1.7.0