| import random | |
| fp = open("train.csv") | |
| exs = [] | |
| for line in fp: | |
| exs.append(line.strip()) | |
| fp.close() | |
| ''' | |
| fp = open("old/train.csv") | |
| old_exs = [] | |
| for line in fp: | |
| old_exs.append(line.strip()) | |
| fp.close() | |
| exs = [ex for ex in exs if ex in old_exs] | |
| ''' | |
| fp = open("train_20.csv") | |
| exs_20 = [] | |
| for line in fp: | |
| exs_20.append(line.strip()) | |
| fp.close() | |
| print("Train: ", str(len(exs))) | |
| exs = [ex for ex in exs if ex not in exs_20] | |
| print("Remaining: ", str(len(exs))) | |
| random.shuffle(exs) | |
| fp = open("train_60.csv", "w") | |
| for ex in exs: | |
| fp.write(ex + "\n") | |
| fp.close() | |