| #!/bin/bash
|
|
|
|
|
|
|
|
|
|
|
| set -euo pipefail
|
|
|
| root=$(dirname $0)
|
|
|
| lang_map_path=$root/utils.map_token_lang.tsv
|
|
|
| usage () {
|
| echo "usage: $0 lang" >&2
|
| exit 1
|
| }
|
|
|
| [ $# -eq 1 ] || usage
|
|
|
| lang=$1
|
|
|
| declare -A lang_map
|
|
|
| while read line; do
|
| key=$(cut -f1 <<< "$line")
|
| val=$(cut -f2 <<< "$line")
|
| lang_map[$key]=$val
|
| done < $lang_map_path
|
|
|
| if [ -v "lang_map[$lang]" ]; then
|
| lang=${lang_map[$lang]}
|
| elif [ -v "lang_map[${lang:0:3}]" ]; then
|
| lang=${lang_map[${lang:0:3}]}
|
| else
|
| echo "undefined mapping: ${lang}, falling back to: en" >&2
|
| lang=en
|
| fi
|
|
|
| perl $root/normalize-punctuation.perl $lang
|
|
|