File size: 914 Bytes
d91fa39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
import jsonlines
import re

def extract_fen(puzzle_data):
    text = puzzle_data["text"]
    fen = re.search('(?<=is )[0-9A-Za-z]+/[0-9A-Za-z]+/[0-9A-Za-z]+/[0-9A-Za-z]+/[0-9A-Za-z]+/[0-9A-Za-z]+/[0-9A-Za-z]+/.+?[,.]', text).group(0)[:-1]

    return fen


for i in range(16): 
    read_file = 'chess_puzzle-data.jsonl-000' + str(i).zfill(2) + '-of-00016'
    with jsonlines.open(read_file) as reader:
        write_file = 'puzzle' + str(i).zfill(2) + '.jsonl'
        with jsonlines.open(write_file, mode='w') as writer: 
            # count = 0
            for puzzle in reader:
                # count += 1
                writer.write(extract_fen(puzzle))
                # for key, value in puzzle.items():
                #     writer.write("key: " + key + " value: " + str(value))
                # writer.write("-----")
                # if count == 10:
                    # break