import jsonlines import re def extract_fen(puzzle_data): text = puzzle_data["text"] fen = re.search('(?<=is )[0-9A-Za-z]+/[0-9A-Za-z]+/[0-9A-Za-z]+/[0-9A-Za-z]+/[0-9A-Za-z]+/[0-9A-Za-z]+/[0-9A-Za-z]+/.+?[,.]', text).group(0)[:-1] return fen for i in range(16): read_file = 'chess_puzzle-data.jsonl-000' + str(i).zfill(2) + '-of-00016' with jsonlines.open(read_file) as reader: write_file = 'puzzle' + str(i).zfill(2) + '.jsonl' with jsonlines.open(write_file, mode='w') as writer: # count = 0 for puzzle in reader: # count += 1 writer.write(extract_fen(puzzle)) # for key, value in puzzle.items(): # writer.write("key: " + key + " value: " + str(value)) # writer.write("-----") # if count == 10: # break