Site / src /json2jsonl.sh
GalaxyTab's picture
Added Frozone Stuff
40a04d4
#!/usr/bin/env bash
set -euo pipefail
# json2jsonl.sh
# Usage: ./json2jsonl.sh input.json
# Creates: input.jsonl (must not already exist)
if [[ $# -ne 1 ]]; then
echo "Usage: $0 INPUT.json" >&2
exit 2
fi
in="$1"
if [[ ! -r "$in" ]]; then
echo "Error: cannot read '$in'" >&2
exit 1
fi
if ! command -v jq >/dev/null 2>&1; then
echo "Error: 'jq' not found. Install jq and retry." >&2
exit 1
fi
# Derive output name: replace final .json with .jsonl (or just append if no .json)
base="${in%.*}"
ext="${in##*.}"
if [[ "$ext" == "json" ]]; then
out="${base}.jsonl"
else
out="${in}.jsonl"
fi
if [[ -e "$out" ]]; then
echo "Error: output file already exists: $out" >&2
exit 1
fi
# Transform:
# - Carry over systemInstruction (if present)
# - Split .contents into user/model pairs
# - Only keep valid (user, model) pairs
tmp="$(mktemp)"
trap 'rm -f "$tmp"' EXIT
jq -c '(.systemInstruction // empty) as $sys
| [ .contents[] | {role,parts} ] as $c
| [ range(0; ($c|length))
| select(. % 2 == 0 and ($c[.].role=="user") and ($c[. + 1].role=="model"))
| {systemInstruction:$sys, contents:[ $c[.], $c[. + 1] ]}
] | .[]' "$in" > "$tmp"
# Sanity check: did we emit anything?
if [[ ! -s "$tmp" ]]; then
echo "Error: produced empty JSONL. Check that '.contents' has even user/model turns." >&2
exit 1
fi
mv "$tmp" "$out"
echo "Wrote: $out"