| """ |
| A function to parse an lm_eval text outputs into json format |
| """ |
| import os |
| import json |
|
|
|
|
| def txt2json(file): |
| """Convert lm_eval text file to json format""" |
| with open(file) as fh: |
| lang = file.split('_')[-1].split('.txt')[0] |
| data = fh.read().split('hf (')[1:] |
| print(len(data)) |
| for evaluation in data: |
| metadata = {} |
| results = {} |
| lines = evaluation.split('\n') |
| header, batch_size = lines[0].split('batch_size: ') |
| metadata['batch_size'] = batch_size |
| header = header.split(',') |
| for entry in header: |
| entry = entry.strip() |
| if not entry: continue |
| if '=' in entry: |
| key, value = entry.split('=') |
| elif 'batch_size: ' in entry: |
| key = 'batch_size' |
| value = entry.split('batch_size: ')[1] |
| print(key, value) |
| else: |
| key, value = entry.split(': ') |
| if value.endswith(')'): |
| value = value[:-1] |
| if value.startswith('('): |
| value = value[1:] |
| if not key == 'dtype': |
| try: |
| value = eval(value) |
| except Exception as e: |
| pass |
| if key == 'pretrained': |
| value = value.split('/')[-1] |
| pretrained = value |
| metadata[key] = value |
| print(metadata) |
| task = '' |
| alias = '' |
| for line in lines[1:]: |
| if line.startswith('|'): |
| columns = line.split('|') |
| _, tasks, version, filter, nshot, metric, _1, value, _2, stderr, _3 = columns |
| tasks = columns[1].strip() |
| if tasks == 'Tasks': continue |
| if '--' in tasks: continue |
| if tasks == 'Groups': continue |
| aliases = tasks |
| tasks = tasks.split('- ')[-1] |
| if tasks: |
| task = tasks |
| alias = aliases |
| results[task] = {} |
| |
| |
| |
| if version.strip(): |
| try: |
| results[task]['version'] = float(version.strip()) |
| except Exception as e: |
| print(e) |
| if nshot.strip(): |
| results[task]['nshot'] = int(nshot.strip()) |
| |
| metric = metric.strip() |
| value = value.strip() |
| filter = filter.strip() |
| |
| results[task]['alias'] = alias |
| results[task][f'{metric},{filter}'] = float(value.strip()) |
| results[task]['stderr'] = float(stderr.strip()) |
| |
| output = {'config': metadata, "results": results} |
| print(output) |
| pretrained = pretrained[0].upper()+pretrained[1:-1]+ pretrained[-1].upper() |
| with open(f'{pretrained}_{lang}.json', 'w') as f: |
| json.dump(output, f, ensure_ascii=False, indent=2) |