#!/bin/bash
#SBATCH --job-name=openwebtext-evaluation
#SBATCH --partition=kempner_requeue
#SBATCH --account=kempner_albergo_lab
#SBATCH --nodes=1
#SBATCH --gpus-per-node=1
#SBATCH --cpus-per-task=1
#SBATCH --ntasks-per-node=1
#SBATCH --mem=200GB
#SBATCH --time=03:00:00
#SBATCH -o slurm_logs/openwebtext/job-%j.out
#SBATCH -e slurm_logs/openwebtext/job-%j.err
#SBATCH --constraint h100
#SBATCH --mail-type=END,FAIL
#SBATCH --mail-user=brianlee.lck@gmail.com
#SBATCH --signal=SIGUSR1@90

source /n/netscratch/albergo_lab/Lab/brianlck/interpretable-flow/.venv/bin/activate

export HF_HOME=/n/netscratch/albergo_lab/Everyone/hf_cache
export HF_HUB_ENABLE_HF_TRANSFER=1

# Directory to search for JSON files
SEARCH_DIR="/n/netscratch/albergo_lab/Lab/brianlck/interpretable-flow"

# Directories to search
DIRS_TO_SEARCH=(
    "${SEARCH_DIR}"
    "${SEARCH_DIR}/tmp/owt/euler"
    "${SEARCH_DIR}/tmp/owt/tau-leaping"
)

# Loop through each directory
for dir in "${DIRS_TO_SEARCH[@]}"; do
    echo "Searching directory: $dir"
    
    # Find all JSON files matching the pattern and evaluate them
    for json_file in ${dir}/*_generated_samples_*.json; do
        if [ -f "$json_file" ]; then
            echo "Start processing: $json_file"
            
            # Extract filename without path and extension
            filename=$(basename "$json_file" .json)
            
            # Extract method and number from filename pattern {method}_generated_samples_{number}
            if [[ $filename =~ ^(.+)_generated_samples_([0-9]+)$ ]]; then
                method="${BASH_REMATCH[1]}"
                number="${BASH_REMATCH[2]}"
                
                # Create unique output filenames
                output_plot="${dir}/gpt_chunk_length_plot_${filename}.png"
                output_result="${dir}/gpt_chunk_${method}_eval_result_${number}.json"
                
                srun python evaluate_samples.py \
                    --input-json "$json_file" \
                    --batch-size 32 \
                    --length-plot-output "$output_plot" \
                    --results-output "$output_result" \
                    --eval-mode "chunk" \
                    --model-type "gpt2-xl"
                
                echo "Finished processing: $json_file"
            fi
        fi
    done
done