lynn-twinkl commited on
Commit
bd08591
·
1 Parent(s): 0944554

Added cli args

Browse files
ner-training/remove_non_context_labels.py CHANGED
@@ -2,16 +2,18 @@ import json
2
  import sys
3
 
4
  file_to_filter = sys.argv[1]
 
 
5
 
6
  with open(file_to_filter, 'r') as input_file:
7
  dataset = json.load(input_file)
8
 
9
  def filter_context_labels(dataset):
10
  for item in dataset:
11
- item['label'] = [l for l in item['label'] if 'Context' in l['labels']]
12
  return dataset
13
 
14
  filtered_data = filter_context_labels(dataset)
15
 
16
- with open('context-only-labels.json', 'w') as output_file:
17
  json.dump(filtered_data, output_file, indent=2)
 
2
  import sys
3
 
4
  file_to_filter = sys.argv[1]
5
+ tag_to_keep = sys.argv[2]
6
+ outpath = sys.argv[3]
7
 
8
  with open(file_to_filter, 'r') as input_file:
9
  dataset = json.load(input_file)
10
 
11
  def filter_context_labels(dataset):
12
  for item in dataset:
13
+ item['label'] = [l for l in item['label'] if tag_to_keep in l['labels']]
14
  return dataset
15
 
16
  filtered_data = filter_context_labels(dataset)
17
 
18
+ with open(outpath, 'w') as output_file:
19
  json.dump(filtered_data, output_file, indent=2)