import spacy import json from spacy.training.example import Example from spacy.scorer import Scorer # 1. LOAD THE TRAINED MODEL # Make sure this points to the folder where you saved the model model_path = "financial_ner_model" print(f"Loading model from: {model_path}...") try: nlp = spacy.load(model_path) except OSError: print(f"Error: Could not find model at '{model_path}'. Did you run the training script?") exit() # 2. LOAD THE DEV DATA # This is the 20% of data we held back specifically for this moment data_path = "dev_financial_ner.json" print(f"Loading validation data from: {data_path}...") try: with open(data_path, "r") as f: dev_data = json.load(f) except FileNotFoundError: print(f"Error: Could not find '{data_path}'. Did you run the dataset script?") exit() # 3. PREPARE FOR EVALUATION examples = [] print(f"Evaluating on {len(dev_data)} examples...") for text, annotations in dev_data: # Create a Doc from the text (Prediction) doc_pred = nlp.make_doc(text) # Create an Example object which holds Prediction + Truth # This is required by SpaCy v3 example = Example.from_dict(doc_pred, annotations) examples.append(example) # 4. RUN EVALUATION # nlp.evaluate() runs the model on all examples and calculates scores scores = nlp.evaluate(examples) # 5. DISPLAY RESULTS print("\n" + "="*40) print(f"{'METRIC':<15} | {'SCORE':<10}") print("="*40) print(f"{'Precision':<15} | {scores['ents_p']:.2%}") print(f"{'Recall':<15} | {scores['ents_r']:.2%}") print(f"{'F1-Score':<15} | {scores['ents_f']:.2%}") print("="*40) # 6. BREAKDOWN BY ENTITY TYPE print("\nBreakdown by Entity Type:") print(f"{'ENTITY':<12} | {'PRECISION':<10} | {'RECALL':<10} | {'F1':<10}") print("-" * 48) for entity_type, metrics in sorted(scores['ents_per_type'].items()): p = metrics['p'] r = metrics['r'] f = metrics['f'] print(f"{entity_type:<12} | {p:.2%} | {r:.2%} | {f:.2%}")