Spaces:

Nomio4640
/

NLP-intelligence

Running

App Files Files Community

NLP-intelligence / eval /evaluate.py

Nomio4640

NER finetune

e1c327f 2 days ago

raw

history blame contribute delete

5.1 kB

	import os
	import sys
	import logging

	# Add the project root to the python path so we can import nlp_core
	sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

	from nlp_core.ner_engine import NEREngine
	from nlp_core.preprocessing import Preprocessor

	def extract_entities_from_conll(lines):
	"""
	Extracts entities from a list of CoNLL-formatted lines for a single sentence.
	Returns the reconstructed text and a list of entities: (type, string).
	"""
	words = []
	entities = []
	current_entity_type = None
	current_entity_words = []

	for line in lines:
	parts = line.strip().split()
	if len(parts) < 4:
	continue
	word = parts[0]
	tag = parts[-1]

	words.append(word)

	if tag.startswith("B-"):
	if current_entity_type:
	entities.append((current_entity_type, " ".join(current_entity_words)))
	current_entity_type = tag[2:]
	current_entity_words = [word]
	elif tag.startswith("I-"):
	if current_entity_type == tag[2:]:
	current_entity_words.append(word)
	else:
	if current_entity_type:
	entities.append((current_entity_type, " ".join(current_entity_words)))
	current_entity_type = tag[2:]
	current_entity_words = [word]
	else:
	if current_entity_type:
	entities.append((current_entity_type, " ".join(current_entity_words)))
	current_entity_type = None
	current_entity_words = []

	if current_entity_type:
	entities.append((current_entity_type, " ".join(current_entity_words)))

	text = " ".join(words)
	return text, entities

	def evaluate_ner(test_file_path, limit=None):
	print(f"Loading test data from {test_file_path}...")

	with open(test_file_path, "r", encoding="utf-8") as f:
	blocks = f.read().split("\n\n")

	sentences = []
	for block in blocks:
	if not block.strip():
	continue
	text, true_ents = extract_entities_from_conll(block.split("\n"))
	if text:
	sentences.append((text, true_ents))

	if limit:
	sentences = sentences[:limit]

	print(f"Loaded {len(sentences)} test sentences.")

	preprocessor = Preprocessor()
	ner = NEREngine()

	true_positives = 0
	false_positives = 0
	false_negatives = 0

	print("Running NER evaluation (this may take a while)...")
	for i, (text, true_ents) in enumerate(sentences):
	if i > 0 and i % 50 == 0:
	print(f"Processed {i}/{len(sentences)} sentences...")

	# Clean text specifically for NER
	clean_text = preprocessor.preprocess_nlp(text)

	predicted_results = ner.recognize(clean_text)

	# Format predictions into (type, string) lowercased for fair comparison
	# Strip dots so Д.Гантулга and Д. Гантулга both normalize to дгантулга
	pred_ents = [(res.entity_group, res.word.replace(" ", "").replace(".", "").lower())
	for res in predicted_results]

	# Format true entities similarly — skip MISC since the fine-tuned model
	# does not produce MISC labels (removed from training set)
	true_ents_formatted = [
	(t, w.replace(" ", "").replace(".", "").lower())
	for t, w in true_ents
	if t != "MISC"
	]

	# Calculate overlaps
	for true_e in true_ents_formatted:
	if true_e in pred_ents:
	true_positives += 1
	pred_ents.remove(true_e)
	else:
	false_negatives += 1

	# Whatever is left in pred_ents are false positives
	false_positives += len(pred_ents)

	precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) > 0 else 0
	recall = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0
	f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0

	print("\n" + "="*40)
	print("NER EVALUATION RESULTS (Entity-Level Exact Match)")
	print("="*40)
	print(f"Sentences Evaluated: {len(sentences)}")
	print(f"True Positives: {true_positives}")
	print(f"False Positives: {false_positives}")
	print(f"False Negatives: {false_negatives}")
	print("-" * 40)
	print(f"Precision: {precision:.4f}")
	print(f"Recall: {recall:.4f}")
	print(f"F1 Score: {f1:.4f}")
	print("="*40)

	if __name__ == "__main__":
	test_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "data", "test.txt")
	if not os.path.exists(test_path):
	print(f"Error: Could not find CoNLL test file at {test_path}")
	else:
	# Run on the first 500 sentences to get a quick estimate.
	# Change limit=None to run on the entire test set.
	evaluate_ner(test_path, limit=500)