File size: 2,076 Bytes
e74a796
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#!/usr/bin/env bash
set -euo pipefail
export ALL_PROXY=
export all_proxy=

MODEL_NAME="${MODEL_NAME:-Qwen/Qwen3.5-9B}"
OUTPUT_DIR="${OUTPUT_DIR:-outputs}"
ADAPTER_DIR="${ADAPTER_DIR:-outputs/qwen35_9b_lora}"
EVAL_MAX_SAMPLES="${EVAL_MAX_SAMPLES:-}"

python3 scripts/prepare_data.py \
  --train train.jsonl \
  --val val.jsonl \
  --out-dir data/processed

LIMIT_ARGS=()
if [[ -n "${EVAL_MAX_SAMPLES}" ]]; then
  LIMIT_ARGS=(--max-samples "${EVAL_MAX_SAMPLES}")
fi

python3 scripts/evaluate.py \
  --model-name "${MODEL_NAME}" \
  --input-file data/processed/val_struct.jsonl \
  --task-type struct \
  --output-dir "${OUTPUT_DIR}" \
  --run-name base \
  "${LIMIT_ARGS[@]}"

python3 scripts/evaluate.py \
  --model-name "${MODEL_NAME}" \
  --input-file data/processed/val_qa.jsonl \
  --task-type qa \
  --output-dir "${OUTPUT_DIR}" \
  --run-name base \
  "${LIMIT_ARGS[@]}" \
  --max-new-tokens 512

python3 scripts/train_qlora.py \
  --model-name "${MODEL_NAME}" \
  --train-file data/processed/train_mixed.jsonl \
  --val-file data/processed/val_mixed.jsonl \
  --output-dir "${ADAPTER_DIR}"

python3 scripts/evaluate.py \
  --model-name "${MODEL_NAME}" \
  --adapter-dir "${ADAPTER_DIR}" \
  --input-file data/processed/val_struct.jsonl \
  --task-type struct \
  --output-dir "${OUTPUT_DIR}" \
  --run-name finetuned \
  "${LIMIT_ARGS[@]}"

python3 scripts/evaluate.py \
  --model-name "${MODEL_NAME}" \
  --adapter-dir "${ADAPTER_DIR}" \
  --input-file data/processed/val_qa.jsonl \
  --task-type qa \
  --output-dir "${OUTPUT_DIR}" \
  --run-name finetuned \
  "${LIMIT_ARGS[@]}" \
  --max-new-tokens 512

python3 scripts/visualize_results.py \
  --metrics \
    "${OUTPUT_DIR}/metrics/base_struct_metrics.json" \
    "${OUTPUT_DIR}/metrics/base_qa_metrics.json" \
    "${OUTPUT_DIR}/metrics/finetuned_struct_metrics.json" \
    "${OUTPUT_DIR}/metrics/finetuned_qa_metrics.json" \
  --predictions \
    "${OUTPUT_DIR}/predictions/base_struct_predictions.jsonl" \
    "${OUTPUT_DIR}/predictions/finetuned_struct_predictions.jsonl" \
  --out-dir "${OUTPUT_DIR}/figures"