|
|
import jsonlines
|
|
|
import re
|
|
|
|
|
|
def extract_fen(puzzle_data):
|
|
|
text = puzzle_data["text"]
|
|
|
fen = re.search('(?<=is )[0-9A-Za-z]+/[0-9A-Za-z]+/[0-9A-Za-z]+/[0-9A-Za-z]+/[0-9A-Za-z]+/[0-9A-Za-z]+/[0-9A-Za-z]+/.+?[,.]', text).group(0)[:-1]
|
|
|
|
|
|
return fen
|
|
|
|
|
|
|
|
|
for i in range(16):
|
|
|
read_file = 'chess_puzzle-data.jsonl-000' + str(i).zfill(2) + '-of-00016'
|
|
|
with jsonlines.open(read_file) as reader:
|
|
|
write_file = 'puzzle' + str(i).zfill(2) + '.jsonl'
|
|
|
with jsonlines.open(write_file, mode='w') as writer:
|
|
|
|
|
|
for puzzle in reader:
|
|
|
|
|
|
writer.write(extract_fen(puzzle))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|