Spaces:
Build error
Build error
| set -e | |
| if [ `id -u` != 0 ]; then | |
| echo "Calling sudo to gain root for this shell. (Needed to clear caches.)" | |
| sudo echo "Success" | |
| fi | |
| SCRIPT_DIR=`dirname "$BASH_SOURCE"` | |
| export PYTHONPATH="${SCRIPT_DIR}/../../" | |
| MAIN_SCRIPT="ncf_estimator_main.py" | |
| DATASET="ml-20m" | |
| BUCKET=${BUCKET:-""} | |
| ROOT_DIR="${BUCKET:-/tmp}/MLPerf_NCF" | |
| echo "Root directory: ${ROOT_DIR}" | |
| if [[ -z ${BUCKET} ]]; then | |
| LOCAL_ROOT=${ROOT_DIR} | |
| else | |
| LOCAL_ROOT="/tmp/MLPerf_NCF" | |
| mkdir -p ${LOCAL_ROOT} | |
| echo "Local root (for files which cannot use GCS): ${LOCAL_ROOT}" | |
| fi | |
| DATE=$(date '+%Y-%m-%d_%H:%M:%S') | |
| TEST_DIR="${ROOT_DIR}/${DATE}" | |
| LOCAL_TEST_DIR="${LOCAL_ROOT}/${DATE}" | |
| mkdir -p ${LOCAL_TEST_DIR} | |
| TPU=${TPU:-""} | |
| if [[ -z ${TPU} ]]; then | |
| DEVICE_FLAG="--num_gpus -1" # --use_xla_for_gpu" | |
| else | |
| DEVICE_FLAG="--tpu ${TPU} --num_gpus 0" | |
| fi | |
| DATA_DIR="${ROOT_DIR}/movielens_data" | |
| python "${SCRIPT_DIR}/movielens.py" --data_dir ${DATA_DIR} --dataset ${DATASET} | |
| if [ "$1" == "keras" ] | |
| then | |
| MAIN_SCRIPT="ncf_keras_main.py" | |
| BATCH_SIZE=99000 | |
| DEVICE_FLAG="--num_gpus 1" | |
| else | |
| BATCH_SIZE=98340 | |
| fi | |
| { | |
| for i in `seq 0 4`; | |
| do | |
| START_TIME=$(date +%s) | |
| MODEL_DIR="${TEST_DIR}/model_dir_${i}" | |
| RUN_LOG="${LOCAL_TEST_DIR}/run_${i}.log" | |
| export COMPLIANCE_FILE="${LOCAL_TEST_DIR}/run_${i}_compliance_raw.log" | |
| export STITCHED_COMPLIANCE_FILE="${LOCAL_TEST_DIR}/run_${i}_compliance_submission.log" | |
| echo "" | |
| echo "Beginning run ${i}" | |
| echo " Complete output logs are in ${RUN_LOG}" | |
| echo " Compliance logs: (submission log is created after run.)" | |
| echo " ${COMPLIANCE_FILE}" | |
| echo " ${STITCHED_COMPLIANCE_FILE}" | |
| # To reduce variation set the seed flag: | |
| # --seed ${i} | |
| python -u "${SCRIPT_DIR}/${MAIN_SCRIPT}" \ | |
| --model_dir ${MODEL_DIR} \ | |
| --data_dir ${DATA_DIR} \ | |
| --dataset ${DATASET} --hooks "" \ | |
| ${DEVICE_FLAG} \ | |
| --clean \ | |
| --train_epochs 14 \ | |
| --batch_size ${BATCH_SIZE} \ | |
| --eval_batch_size 160000 \ | |
| --learning_rate 0.00382059 \ | |
| --beta1 0.783529 \ | |
| --beta2 0.909003 \ | |
| --epsilon 1.45439e-07 \ | |
| --layers 256,256,128,64 --num_factors 64 \ | |
| --hr_threshold 0.635 \ | |
| --ml_perf \ | |
| |& tee ${RUN_LOG} \ | |
| | grep --line-buffered -E --regexp="(Iteration [0-9]+: HR = [0-9\.]+, NDCG = [0-9\.]+, Loss = [0-9\.]+)|(pipeline_hash)|(MLPerf time:)" | |
| END_TIME=$(date +%s) | |
| echo "Run ${i} complete: $(( $END_TIME - $START_TIME )) seconds." | |
| # Don't fill up the local hard drive. | |
| if [[ -z ${BUCKET} ]]; then | |
| echo "Removing model directory to save space." | |
| rm -r ${MODEL_DIR} | |
| fi | |
| done | |
| } |& tee "${LOCAL_TEST_DIR}/summary.log" | |