|
|
# okto_version: "1.2"
|
|
|
|
|
|
# Teste 4: Flan-T5 Completo - Todos os Blocos
|
|
|
# Modelo: google/flan-t5-base
|
|
|
# Objetivo: Testar todos os blocos avançados juntos
|
|
|
|
|
|
PROJECT "test_flan_t5_complete"
|
|
|
DESCRIPTION "Teste completo Flan-T5 com todos os blocos v1.2"
|
|
|
|
|
|
ENV {
|
|
|
accelerator: "gpu"
|
|
|
min_memory: "8GB"
|
|
|
precision: "fp16"
|
|
|
backend: "oktoseek"
|
|
|
install_missing: true
|
|
|
}
|
|
|
|
|
|
DATASET {
|
|
|
train: "dataset/train.jsonl"
|
|
|
validation: "dataset/val.jsonl"
|
|
|
}
|
|
|
|
|
|
MODEL {
|
|
|
base: "google/flan-t5-base"
|
|
|
device: "auto"
|
|
|
}
|
|
|
|
|
|
TRAIN {
|
|
|
epochs: 5
|
|
|
batch_size: 16
|
|
|
learning_rate: 0.0001
|
|
|
device: "auto"
|
|
|
}
|
|
|
|
|
|
MONITOR {
|
|
|
metrics: [
|
|
|
"loss",
|
|
|
"val_loss",
|
|
|
"accuracy",
|
|
|
"perplexity",
|
|
|
"gpu_usage",
|
|
|
"ram_usage",
|
|
|
"throughput",
|
|
|
"latency",
|
|
|
"confidence"
|
|
|
]
|
|
|
notify_if {
|
|
|
loss > 2.0
|
|
|
val_loss > 2.5
|
|
|
gpu_usage > 90%
|
|
|
ram_usage > 80%
|
|
|
}
|
|
|
log_to: "logs/training_complete.log"
|
|
|
}
|
|
|
|
|
|
CONTROL {
|
|
|
on_step_end {
|
|
|
LOG loss
|
|
|
}
|
|
|
|
|
|
on_epoch_end {
|
|
|
SAVE model
|
|
|
LOG "Epoch completed"
|
|
|
|
|
|
IF loss > 1.5 {
|
|
|
SET LR
|
|
|
LOG
|
|
|
}
|
|
|
|
|
|
IF
|
|
|
SAVE
|
|
|
LOG
|
|
|
}
|
|
|
}
|
|
|
|
|
|
validate_every: 200
|
|
|
|
|
|
IF
|
|
|
SET
|
|
|
LOG
|
|
|
}
|
|
|
|
|
|
IF
|
|
|
STOP_TRAINING
|
|
|
LOG
|
|
|
}
|
|
|
|
|
|
WHEN
|
|
|
SET
|
|
|
LOG
|
|
|
}
|
|
|
|
|
|
EVERY
|
|
|
SAVE
|
|
|
LOG
|
|
|
}
|
|
|
}
|
|
|
|
|
|
STABILITY
|
|
|
stop_if_nan: true
|
|
|
stop_if_diverges: true
|
|
|
min_improvement: 0.001
|
|
|
}
|
|
|
|
|
|
EXPORT
|
|
|
format: ["okm", "safetensors"]
|
|
|
path: "export/"
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|