Elinnos
/

codellama-fine-tuning

Model card Files Files and versions

xet

Community

Prithvik-1 commited on Nov 25, 2025

Commit

c6dbcac

verified ·

1 Parent(s): 170941e

Upload start_training_chat_format.sh with huggingface_hub

Browse files

Files changed (1) hide show

start_training_chat_format.sh +60 -0

start_training_chat_format.sh ADDED Viewed

	@@ -0,0 +1,60 @@

+#!/bin/bash
+# Start CodeLlama fine-tuning with chat format dataset
+set -e
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+cd "$SCRIPT_DIR"
+# Activate virtual environment
+source /venv/main/bin/activate
+echo "======================================================================"
+echo "🚀 Starting CodeLlama Fine-tuning with Chat Format Dataset"
+echo "======================================================================"
+# Configuration
+BASE_MODEL="models/base-models/CodeLlama-7B-Instruct"
+TRAIN_DATASET="datasets/processed/split_chat_format/train.jsonl"
+VAL_DATASET="datasets/processed/split_chat_format/val.jsonl"
+OUTPUT_DIR="training-outputs/codellama-fifo-v2-chat"
+# Check if datasets exist
+if [ ! -f "$TRAIN_DATASET" ]; then
+    echo "❌ Error: Training dataset not found: $TRAIN_DATASET"
+    exit 1
+fi
+if [ ! -f "$VAL_DATASET" ]; then
+    echo "❌ Error: Validation dataset not found: $VAL_DATASET"
+    exit 1
+fi
+echo "📊 Configuration:"
+echo "   Base Model: $BASE_MODEL"
+echo "   Train Dataset: $TRAIN_DATASET"
+echo "   Val Dataset: $VAL_DATASET"
+echo "   Output Directory: $OUTPUT_DIR"
+echo ""
+# Start training
+# Note: val-dataset is auto-detected if val.jsonl exists in same directory as train.jsonl
+python3 scripts/training/finetune_codellama.py \
+    --base-model "$BASE_MODEL" \
+    --dataset "$TRAIN_DATASET" \
+    --output-dir "$OUTPUT_DIR" \
+    --max-length 1536 \
+    --num-epochs 5 \
+    --learning-rate 2e-5 \
+    --batch-size 4 \
+    --gradient-accumulation 4 \
+    --lora-r 48 \
+    --lora-alpha 96 \
+    --lora-dropout 0.15 \
+    --resume-from-checkpoint auto
+echo ""
+echo "======================================================================"
+echo "✅ Training started!"
+echo "======================================================================"