koichi12's picture
Add files using upload-large-folder tool
dc55567 verified
import json
data = None
with open('en-ja_full.txt', 'rt') as f:
data = f.readlines()
data = [d.split('\n')[0].replace('\t', ' ').replace('"', '\"').replace('”', '\"').replace('“', '\"') for d in data]
data = [json.dumps({"text":d}, ensure_ascii=False) for d in data]
data = '\n'.join(data)
print(data[:10000])
#print(data[:100])
with open('en-ja100.jsonl', 'wt') as f:
f.write(data)