msblob / chess /process_jsonlines.py
IshiKura-a
chessdata
d91fa39
import jsonlines
import re
def extract_fen(puzzle_data):
text = puzzle_data["text"]
fen = re.search('(?<=is )[0-9A-Za-z]+/[0-9A-Za-z]+/[0-9A-Za-z]+/[0-9A-Za-z]+/[0-9A-Za-z]+/[0-9A-Za-z]+/[0-9A-Za-z]+/.+?[,.]', text).group(0)[:-1]
return fen
for i in range(16):
read_file = 'chess_puzzle-data.jsonl-000' + str(i).zfill(2) + '-of-00016'
with jsonlines.open(read_file) as reader:
write_file = 'puzzle' + str(i).zfill(2) + '.jsonl'
with jsonlines.open(write_file, mode='w') as writer:
# count = 0
for puzzle in reader:
# count += 1
writer.write(extract_fen(puzzle))
# for key, value in puzzle.items():
# writer.write("key: " + key + " value: " + str(value))
# writer.write("-----")
# if count == 10:
# break