import json data = None with open('en-ja_full.txt', 'rt') as f: data = f.readlines() data = [d.split('\n')[0].replace('\t', ' ').replace('"', '\"').replace('”', '\"').replace('“', '\"') for d in data] data = [json.dumps({"text":d}, ensure_ascii=False) for d in data] data = '\n'.join(data) print(data[:10000]) #print(data[:100]) with open('en-ja100.jsonl', 'wt') as f: f.write(data)