| | |
| | """AutoTrain_LLM.ipynb |
| | Automatically generated by Colab. |
| | Original file is located at |
| | https://colab.research.google.com/github/huggingface/autotrain-advanced/blob/main/colabs/AutoTrain_LLM.ipynb |
| | """ |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | import os |
| | !pip install -U autotrain-advanced > install_logs.txt |
| | !autotrain setup --colab > setup_logs.txt |
| |
|
| | |
| | |
| | |
| | project_name = 'my-autotrain-llm' |
| | model_name = 'abhishek/llama-2-7b-hf-small-shards' |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | push_to_hub = False |
| | hf_token = "hf_XXX" |
| | hf_username = "abc" |
| |
|
| | |
| | |
| | learning_rate = 2e-4 |
| | num_epochs = 1 |
| | batch_size = 1 |
| | block_size = 1024 |
| | trainer = "sft" |
| | warmup_ratio = 0.1 |
| | weight_decay = 0.01 |
| | gradient_accumulation = 4 |
| | mixed_precision = "fp16" |
| | peft = True |
| | quantization = "int4" |
| | lora_r = 16 |
| | lora_alpha = 32 |
| | lora_dropout = 0.05 |
| |
|
| | os.environ["PROJECT_NAME"] = project_name |
| | os.environ["MODEL_NAME"] = model_name |
| | os.environ["PUSH_TO_HUB"] = str(push_to_hub) |
| | os.environ["HF_TOKEN"] = hf_token |
| | os.environ["LEARNING_RATE"] = str(learning_rate) |
| | os.environ["NUM_EPOCHS"] = str(num_epochs) |
| | os.environ["BATCH_SIZE"] = str(batch_size) |
| | os.environ["BLOCK_SIZE"] = str(block_size) |
| | os.environ["WARMUP_RATIO"] = str(warmup_ratio) |
| | os.environ["WEIGHT_DECAY"] = str(weight_decay) |
| | os.environ["GRADIENT_ACCUMULATION"] = str(gradient_accumulation) |
| | os.environ["MIXED_PRECISION"] = str(mixed_precision) |
| | os.environ["PEFT"] = str(peft) |
| | os.environ["QUANTIZATION"] = str(quantization) |
| | os.environ["LORA_R"] = str(lora_r) |
| | os.environ["LORA_ALPHA"] = str(lora_alpha) |
| | os.environ["LORA_DROPOUT"] = str(lora_dropout) |
| | os.environ["HF_USERNAME"] = hf_username |
| | os.environ["TRAINER"] = trainer |
| |
|
| | !autotrain llm \ |
| | --train \ |
| | --model ${MODEL_NAME} \ |
| | --project-name ${PROJECT_NAME} \ |
| | --data-path data/ \ |
| | --text-column text \ |
| | --lr ${LEARNING_RATE} \ |
| | --batch-size ${BATCH_SIZE} \ |
| | --epochs ${NUM_EPOCHS} \ |
| | --block-size ${BLOCK_SIZE} \ |
| | --warmup-ratio ${WARMUP_RATIO} \ |
| | --lora-r ${LORA_R} \ |
| | --lora-alpha ${LORA_ALPHA} \ |
| | --lora-dropout ${LORA_DROPOUT} \ |
| | --weight-decay ${WEIGHT_DECAY} \ |
| | --gradient-accumulation ${GRADIENT_ACCUMULATION} \ |
| | --quantization ${QUANTIZATION} \ |
| | --mixed-precision ${MIXED_PRECISION} \ |
| | --username ${HF_USERNAME} \ |
| | --trainer ${TRAINER} \ |
| | $( [[ "$PEFT" == "True" ]] && echo "--peft" ) \ |
| | $( [[ "$PUSH_TO_HUB" == "True" ]] && echo "--push-to-hub --token ${HF_TOKEN}" ) |