File size: 1,436 Bytes
08be4b2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 | #!/bin/bash
# Evaluate GUI-Shift model on GUI benchmarks
set -e
# Default values
MODEL_PATH=""
DATASET=""
BENCHMARK="androidcontrol_low"
OUTPUT="evaluation_results.json"
DEVICE="cuda"
# Parse arguments
while [[ $# -gt 0 ]]; do
case $1 in
--model_path)
MODEL_PATH="$2"
shift 2
;;
--dataset)
DATASET="$2"
shift 2
;;
--benchmark)
BENCHMARK="$2"
shift 2
;;
--output)
OUTPUT="$2"
shift 2
;;
--device)
DEVICE="$2"
shift 2
;;
*)
echo "Unknown option: $1"
exit 1
;;
esac
done
if [ -z "$MODEL_PATH" ] || [ -z "$DATASET" ]; then
echo "Error: --model_path and --dataset are required"
echo "Usage: bash scripts/evaluate.sh --model_path ./checkpoints/gui-shift --dataset ./data/eval/androidcontrol_test.jsonl --benchmark androidcontrol_low"
exit 1
fi
echo "=== GUI-Shift Evaluation ==="
echo "Model: $MODEL_PATH"
echo "Dataset: $DATASET"
echo "Benchmark: $BENCHMARK"
echo "Output: $OUTPUT"
echo "Device: $DEVICE"
echo ""
python src/evaluation/eval_gui.py \
--model_path "$MODEL_PATH" \
--dataset "$DATASET" \
--benchmark "$BENCHMARK" \
--output "$OUTPUT" \
--device "$DEVICE"
echo ""
echo "Evaluation complete! Results saved to: $OUTPUT"
|