File size: 1,806 Bytes
c6dbcac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#!/bin/bash
# Start CodeLlama fine-tuning with chat format dataset

set -e

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
cd "$SCRIPT_DIR"

# Activate virtual environment
source /venv/main/bin/activate

echo "======================================================================"
echo "๐Ÿš€ Starting CodeLlama Fine-tuning with Chat Format Dataset"
echo "======================================================================"

# Configuration
BASE_MODEL="models/base-models/CodeLlama-7B-Instruct"
TRAIN_DATASET="datasets/processed/split_chat_format/train.jsonl"
VAL_DATASET="datasets/processed/split_chat_format/val.jsonl"
OUTPUT_DIR="training-outputs/codellama-fifo-v2-chat"

# Check if datasets exist
if [ ! -f "$TRAIN_DATASET" ]; then
    echo "โŒ Error: Training dataset not found: $TRAIN_DATASET"
    exit 1
fi

if [ ! -f "$VAL_DATASET" ]; then
    echo "โŒ Error: Validation dataset not found: $VAL_DATASET"
    exit 1
fi

echo "๐Ÿ“Š Configuration:"
echo "   Base Model: $BASE_MODEL"
echo "   Train Dataset: $TRAIN_DATASET"
echo "   Val Dataset: $VAL_DATASET"
echo "   Output Directory: $OUTPUT_DIR"
echo ""

# Start training
# Note: val-dataset is auto-detected if val.jsonl exists in same directory as train.jsonl
python3 scripts/training/finetune_codellama.py \
    --base-model "$BASE_MODEL" \
    --dataset "$TRAIN_DATASET" \
    --output-dir "$OUTPUT_DIR" \
    --max-length 1536 \
    --num-epochs 5 \
    --learning-rate 2e-5 \
    --batch-size 4 \
    --gradient-accumulation 4 \
    --lora-r 48 \
    --lora-alpha 96 \
    --lora-dropout 0.15 \
    --resume-from-checkpoint auto

echo ""
echo "======================================================================"
echo "โœ… Training started!"
echo "======================================================================"