File size: 1,366 Bytes
40a04d4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#!/usr/bin/env bash
set -euo pipefail

# json2jsonl.sh
# Usage: ./json2jsonl.sh input.json
# Creates: input.jsonl (must not already exist)

if [[ $# -ne 1 ]]; then
  echo "Usage: $0 INPUT.json" >&2
  exit 2
fi

in="$1"
if [[ ! -r "$in" ]]; then
  echo "Error: cannot read '$in'" >&2
  exit 1
fi

if ! command -v jq >/dev/null 2>&1; then
  echo "Error: 'jq' not found. Install jq and retry." >&2
  exit 1
fi

# Derive output name: replace final .json with .jsonl (or just append if no .json)
base="${in%.*}"
ext="${in##*.}"
if [[ "$ext" == "json" ]]; then
  out="${base}.jsonl"
else
  out="${in}.jsonl"
fi

if [[ -e "$out" ]]; then
  echo "Error: output file already exists: $out" >&2
  exit 1
fi

# Transform:
# - Carry over systemInstruction (if present)
# - Split .contents into user/model pairs
# - Only keep valid (user, model) pairs
tmp="$(mktemp)"
trap 'rm -f "$tmp"' EXIT

jq -c '(.systemInstruction // empty) as $sys
| [ .contents[] | {role,parts} ] as $c
| [ range(0; ($c|length))
    | select(. % 2 == 0 and ($c[.].role=="user") and ($c[. + 1].role=="model"))
    | {systemInstruction:$sys, contents:[ $c[.], $c[. + 1] ]}
  ] | .[]' "$in" > "$tmp"

# Sanity check: did we emit anything?
if [[ ! -s "$tmp" ]]; then
  echo "Error: produced empty JSONL. Check that '.contents' has even user/model turns." >&2
  exit 1
fi

mv "$tmp" "$out"
echo "Wrote: $out"