luanns commited on
Commit
274c457
·
verified ·
1 Parent(s): 08be4b2

Upload scripts/filter_data.sh

Browse files
Files changed (1) hide show
  1. scripts/filter_data.sh +80 -0
scripts/filter_data.sh ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ # Data Filtering Pipeline for GUI-Shift
3
+
4
+ set -e
5
+
6
+ # Default values
7
+ INPUT_FILE=""
8
+ OUTPUT_FILE=""
9
+ MODEL_PATH="Qwen/Qwen2.5-VL-7B-Instruct"
10
+ NUM_GENERATIONS=8
11
+ TEMPERATURE=0.9
12
+ DEVICE="cuda"
13
+ SEED=42
14
+
15
+ # Parse arguments
16
+ while [[ $# -gt 0 ]]; do
17
+ case $1 in
18
+ --input_file)
19
+ INPUT_FILE="$2"
20
+ shift 2
21
+ ;;
22
+ --output_file)
23
+ OUTPUT_FILE="$2"
24
+ shift 2
25
+ ;;
26
+ --model_path)
27
+ MODEL_PATH="$2"
28
+ shift 2
29
+ ;;
30
+ --num_generations)
31
+ NUM_GENERATIONS="$2"
32
+ shift 2
33
+ ;;
34
+ --temperature)
35
+ TEMPERATURE="$2"
36
+ shift 2
37
+ ;;
38
+ --device)
39
+ DEVICE="$2"
40
+ shift 2
41
+ ;;
42
+ --seed)
43
+ SEED="$2"
44
+ shift 2
45
+ ;;
46
+ *)
47
+ echo "Unknown option: $1"
48
+ exit 1
49
+ ;;
50
+ esac
51
+ done
52
+
53
+ if [ -z "$INPUT_FILE" ] || [ -z "$OUTPUT_FILE" ]; then
54
+ echo "Error: --input_file and --output_file are required"
55
+ echo "Usage: bash scripts/filter_data.sh --input_file ./data/k1_transition.jsonl --output_file ./data/filtered/k1_transition_filtered.jsonl --model_path Qwen/Qwen2.5-VL-7B-Instruct"
56
+ exit 1
57
+ fi
58
+
59
+ echo "=== GUI-Shift Data Filtering ==="
60
+ echo "Input: $INPUT_FILE"
61
+ echo "Output: $OUTPUT_FILE"
62
+ echo "Model: $MODEL_PATH"
63
+ echo "N generations: $NUM_GENERATIONS"
64
+ echo "Temperature: $TEMPERATURE"
65
+ echo "Device: $DEVICE"
66
+ echo ""
67
+
68
+ mkdir -p "$(dirname "$OUTPUT_FILE")"
69
+
70
+ python src/filtering/filter_data.py \
71
+ --input_file "$INPUT_FILE" \
72
+ --output_file "$OUTPUT_FILE" \
73
+ --model_path "$MODEL_PATH" \
74
+ --num_generations "$NUM_GENERATIONS" \
75
+ --temperature "$TEMPERATURE" \
76
+ --device "$DEVICE" \
77
+ --seed "$SEED"
78
+
79
+ echo ""
80
+ echo "Filtering complete! Output: $OUTPUT_FILE"