Spaces:

KarishmaShirsath
/

PIIMasking

Runtime error

App Files Files Community

PIIMasking / conlleval.py

KarishmaShirsath

Upload 5 files

8850a9d verified over 1 year ago

raw

history blame contribute delete

7.5 kB

	"""
	This script applies to IOB2 or IOBES tagging scheme.
	If you are using a different scheme, please convert to IOB2 or IOBES.

	IOB2:
	- B = begin,
	- I = inside but not the first,
	- O = outside

	e.g.
	John lives in New York City .
	B-PER O O B-LOC I-LOC I-LOC O

	IOBES:
	- B = begin,
	- E = end,
	- S = singleton,
	- I = inside but not the first or the last,
	- O = outside

	e.g.
	John lives in New York City .
	S-PER O O B-LOC I-LOC E-LOC O

	prefix: IOBES
	chunk_type: PER, LOC, etc.
	"""
	from __future__ import division, print_function, unicode_literals

	import sys
	from collections import defaultdict

	def split_tag(chunk_tag):
	"""
	split chunk tag into IOBES prefix and chunk_type
	e.g.
	B-PER -> (B, PER)
	O -> (O, None)
	"""
	if chunk_tag == 'O':
	return ('O', None)
	return chunk_tag.split('-', maxsplit=1)

	def is_chunk_end(prev_tag, tag):
	"""
	check if the previous chunk ended between the previous and current word
	e.g.
	(B-PER, I-PER) -> False
	(B-LOC, O) -> True

	Note: in case of contradicting tags, e.g. (B-PER, I-LOC)
	this is considered as (B-PER, B-LOC)
	"""
	prefix1, chunk_type1 = split_tag(prev_tag)
	prefix2, chunk_type2 = split_tag(tag)

	if prefix1 == 'O':
	return False
	if prefix2 == 'O':
	return prefix1 != 'O'

	if chunk_type1 != chunk_type2:
	return True

	return prefix2 in ['B', 'S'] or prefix1 in ['E', 'S']

	def is_chunk_start(prev_tag, tag):
	"""
	check if a new chunk started between the previous and current word
	"""
	prefix1, chunk_type1 = split_tag(prev_tag)
	prefix2, chunk_type2 = split_tag(tag)

	if prefix2 == 'O':
	return False
	if prefix1 == 'O':
	return prefix2 != 'O'

	if chunk_type1 != chunk_type2:
	return True

	return prefix2 in ['B', 'S'] or prefix1 in ['E', 'S']


	def calc_metrics(tp, p, t, percent=True):
	"""
	compute overall precision, recall and FB1 (default values are 0.0)
	if percent is True, return 100 * original decimal value
	"""
	precision = tp / p if p else 0
	recall = tp / t if t else 0
	fb1 = 2 * precision * recall / (precision + recall) if precision + recall else 0
	if percent:
	return 100 * precision, 100 * recall, 100 * fb1
	else:
	return precision, recall, fb1


	def count_chunks(true_seqs, pred_seqs):
	"""
	true_seqs: a list of true tags
	pred_seqs: a list of predicted tags

	return:
	correct_chunks: a dict (counter),
	key = chunk types,
	value = number of correctly identified chunks per type
	true_chunks: a dict, number of true chunks per type
	pred_chunks: a dict, number of identified chunks per type

	correct_counts, true_counts, pred_counts: similar to above, but for tags
	"""
	correct_chunks = defaultdict(int)
	true_chunks = defaultdict(int)
	pred_chunks = defaultdict(int)

	correct_counts = defaultdict(int)
	true_counts = defaultdict(int)
	pred_counts = defaultdict(int)

	prev_true_tag, prev_pred_tag = 'O', 'O'
	correct_chunk = None

	for true_tag, pred_tag in zip(true_seqs, pred_seqs):
	if true_tag == pred_tag:
	correct_counts[true_tag] += 1
	true_counts[true_tag] += 1
	pred_counts[pred_tag] += 1

	_, true_type = split_tag(true_tag)
	_, pred_type = split_tag(pred_tag)

	if correct_chunk is not None:
	true_end = is_chunk_end(prev_true_tag, true_tag)
	pred_end = is_chunk_end(prev_pred_tag, pred_tag)

	if pred_end and true_end:
	correct_chunks[correct_chunk] += 1
	correct_chunk = None
	elif pred_end != true_end or true_type != pred_type:
	correct_chunk = None

	true_start = is_chunk_start(prev_true_tag, true_tag)
	pred_start = is_chunk_start(prev_pred_tag, pred_tag)

	if true_start and pred_start and true_type == pred_type:
	correct_chunk = true_type
	if true_start:
	true_chunks[true_type] += 1
	if pred_start:
	pred_chunks[pred_type] += 1

	prev_true_tag, prev_pred_tag = true_tag, pred_tag
	if correct_chunk is not None:
	correct_chunks[correct_chunk] += 1

	return (correct_chunks, true_chunks, pred_chunks,
	correct_counts, true_counts, pred_counts)

	def get_result(correct_chunks, true_chunks, pred_chunks,
	correct_counts, true_counts, pred_counts, verbose=True):
	"""
	if verbose, print overall performance, as well as preformance per chunk type;
	otherwise, simply return overall prec, rec, f1 scores
	"""
	# sum counts
	sum_correct_chunks = sum(correct_chunks.values())
	sum_true_chunks = sum(true_chunks.values())
	sum_pred_chunks = sum(pred_chunks.values())

	sum_correct_counts = sum(correct_counts.values())
	sum_true_counts = sum(true_counts.values())

	nonO_correct_counts = sum(v for k, v in correct_counts.items() if k != 'O')
	nonO_true_counts = sum(v for k, v in true_counts.items() if k != 'O')

	chunk_types = sorted(list(set(list(true_chunks) + list(pred_chunks))))

	# compute overall precision, recall and FB1 (default values are 0.0)
	prec, rec, f1 = calc_metrics(sum_correct_chunks, sum_pred_chunks, sum_true_chunks)
	res = (prec, rec, f1)
	if not verbose:
	return res

	# print overall performance, and performance per chunk type

	print("processed %i tokens with %i phrases; " % (sum_true_counts, sum_true_chunks), end='')
	print("found: %i phrases; correct: %i.\n" % (sum_pred_chunks, sum_correct_chunks), end='')

	print("accuracy: %6.2f%%; (non-O)" % (100*nonO_correct_counts/nonO_true_counts))
	print("accuracy: %6.2f%%; " % (100*sum_correct_counts/sum_true_counts), end='')
	print("precision: %6.2f%%; recall: %6.2f%%; FB1: %6.2f" % (prec, rec, f1))

	# for each chunk type, compute precision, recall and FB1 (default values are 0.0)
	for t in chunk_types:
	prec, rec, f1 = calc_metrics(correct_chunks[t], pred_chunks[t], true_chunks[t])
	print("%17s: " %t , end='')
	print("precision: %6.2f%%; recall: %6.2f%%; FB1: %6.2f" %
	(prec, rec, f1), end='')
	print(" %d" % pred_chunks[t])

	return res
	# you can generate LaTeX output for tables like in
	# http://cnts.uia.ac.be/conll2003/ner/example.tex
	# but I'm not implementing this

	def evaluate(true_seqs, pred_seqs, verbose=True):
	(correct_chunks, true_chunks, pred_chunks,
	correct_counts, true_counts, pred_counts) = count_chunks(true_seqs, pred_seqs)
	result = get_result(correct_chunks, true_chunks, pred_chunks,
	correct_counts, true_counts, pred_counts, verbose=verbose)
	return result

	def evaluate_conll_file(fileIterator):
	true_seqs, pred_seqs = [], []

	for line in fileIterator:
	cols = line.strip().split()
	# each non-empty line must contain >= 3 columns
	if not cols:
	true_seqs.append('O')
	pred_seqs.append('O')
	elif len(cols) < 3:
	raise IOError("conlleval: too few columns in line %s\n" % line)
	else:
	# extract tags from last 2 columns
	true_seqs.append(cols[-2])
	pred_seqs.append(cols[-1])
	return evaluate(true_seqs, pred_seqs)

	if __name__ == '__main__':
	"""
	usage: conlleval < file
	"""
	evaluate_conll_file(sys.stdin)