lynn-twinkl
commited on
Commit
·
bd08591
1
Parent(s):
0944554
Added cli args
Browse files
ner-training/remove_non_context_labels.py
CHANGED
|
@@ -2,16 +2,18 @@ import json
|
|
| 2 |
import sys
|
| 3 |
|
| 4 |
file_to_filter = sys.argv[1]
|
|
|
|
|
|
|
| 5 |
|
| 6 |
with open(file_to_filter, 'r') as input_file:
|
| 7 |
dataset = json.load(input_file)
|
| 8 |
|
| 9 |
def filter_context_labels(dataset):
|
| 10 |
for item in dataset:
|
| 11 |
-
item['label'] = [l for l in item['label'] if
|
| 12 |
return dataset
|
| 13 |
|
| 14 |
filtered_data = filter_context_labels(dataset)
|
| 15 |
|
| 16 |
-
with open(
|
| 17 |
json.dump(filtered_data, output_file, indent=2)
|
|
|
|
| 2 |
import sys
|
| 3 |
|
| 4 |
file_to_filter = sys.argv[1]
|
| 5 |
+
tag_to_keep = sys.argv[2]
|
| 6 |
+
outpath = sys.argv[3]
|
| 7 |
|
| 8 |
with open(file_to_filter, 'r') as input_file:
|
| 9 |
dataset = json.load(input_file)
|
| 10 |
|
| 11 |
def filter_context_labels(dataset):
|
| 12 |
for item in dataset:
|
| 13 |
+
item['label'] = [l for l in item['label'] if tag_to_keep in l['labels']]
|
| 14 |
return dataset
|
| 15 |
|
| 16 |
filtered_data = filter_context_labels(dataset)
|
| 17 |
|
| 18 |
+
with open(outpath, 'w') as output_file:
|
| 19 |
json.dump(filtered_data, output_file, indent=2)
|