File size: 1,504 Bytes
b20701a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 | '''
Utility script for converting label mapping (label -> integer) in the
file 'label_mapping.pkl' to json format.
Reason: some platform like Hugging Face mark pickle file as dangerous,
so converting the mapping to JSON format is safer and more portable
'''
from pathlib import Path
import pickle
import json
import argparse
from src.data_utils import nfc_normalize
ROOT = Path(__file__).parents[1] / "data/dataset"
DEFAULT_PKL = ROOT / "label_mapping.pkl"
DEFAULT_JSON = ROOT / "label_mapping.json"
def parse_args():
parser = argparse.ArgumentParser(description="Convert label_mapping.pkl to .json")
parser.add_argument(
"--pkl_path",
type=str,
default=DEFAULT_PKL,
help="Path to the original label_mapping.pkl"
)
parser.add_argument(
"--json_path",
type=str,
default=DEFAULT_JSON,
help="Path to output JSON file"
)
return parser.parse_args()
def convert_pkl_to_json(pkl_path, json_path):
with open(pkl_path, "rb") as f:
data = pickle.load(f)
data = {nfc_normalize(k): v for k, v in data.items()}
with open(json_path, "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=4)
print(f"Saved json to {json_path}")
def main():
args = parse_args()
convert_pkl_to_json(pkl_path=args.pkl_path, json_path=args.json_path)
if __name__ == "__main__":
main() |