Spaces:

suryateja008
/

Financial-News-Entity-Extraction

Running

App Files Files Community

Financial-News-Entity-Extraction / evaluate_model.py

suryateja008

Upload 10 files

a19c885 verified about 2 months ago

raw

history blame contribute delete

2.02 kB

	import spacy
	import json
	from spacy.training.example import Example
	from spacy.scorer import Scorer

	# 1. LOAD THE TRAINED MODEL
	# Make sure this points to the folder where you saved the model
	model_path = "financial_ner_model"
	print(f"Loading model from: {model_path}...")
	try:
	nlp = spacy.load(model_path)
	except OSError:
	print(f"Error: Could not find model at '{model_path}'. Did you run the training script?")
	exit()

	# 2. LOAD THE DEV DATA
	# This is the 20% of data we held back specifically for this moment
	data_path = "dev_financial_ner.json"
	print(f"Loading validation data from: {data_path}...")
	try:
	with open(data_path, "r") as f:
	dev_data = json.load(f)
	except FileNotFoundError:
	print(f"Error: Could not find '{data_path}'. Did you run the dataset script?")
	exit()

	# 3. PREPARE FOR EVALUATION
	examples = []
	print(f"Evaluating on {len(dev_data)} examples...")

	for text, annotations in dev_data:
	# Create a Doc from the text (Prediction)
	doc_pred = nlp.make_doc(text)

	# Create an Example object which holds Prediction + Truth
	# This is required by SpaCy v3
	example = Example.from_dict(doc_pred, annotations)
	examples.append(example)

	# 4. RUN EVALUATION
	# nlp.evaluate() runs the model on all examples and calculates scores
	scores = nlp.evaluate(examples)

	# 5. DISPLAY RESULTS
	print("\n" + "="*40)
	print(f"{'METRIC':<15} \| {'SCORE':<10}")
	print("="*40)
	print(f"{'Precision':<15} \| {scores['ents_p']:.2%}")
	print(f"{'Recall':<15} \| {scores['ents_r']:.2%}")
	print(f"{'F1-Score':<15} \| {scores['ents_f']:.2%}")
	print("="*40)

	# 6. BREAKDOWN BY ENTITY TYPE
	print("\nBreakdown by Entity Type:")
	print(f"{'ENTITY':<12} \| {'PRECISION':<10} \| {'RECALL':<10} \| {'F1':<10}")
	print("-" * 48)

	for entity_type, metrics in sorted(scores['ents_per_type'].items()):
	p = metrics['p']
	r = metrics['r']
	f = metrics['f']
	print(f"{entity_type:<12} \| {p:.2%} \| {r:.2%} \| {f:.2%}")