Spaces:
Running
Running
| import os | |
| import sys | |
| import logging | |
| # Add the project root to the python path so we can import nlp_core | |
| sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | |
| from nlp_core.ner_engine import NEREngine | |
| from nlp_core.preprocessing import Preprocessor | |
| def extract_entities_from_conll(lines): | |
| """ | |
| Extracts entities from a list of CoNLL-formatted lines for a single sentence. | |
| Returns the reconstructed text and a list of entities: (type, string). | |
| """ | |
| words = [] | |
| entities = [] | |
| current_entity_type = None | |
| current_entity_words = [] | |
| for line in lines: | |
| parts = line.strip().split() | |
| if len(parts) < 4: | |
| continue | |
| word = parts[0] | |
| tag = parts[-1] | |
| words.append(word) | |
| if tag.startswith("B-"): | |
| if current_entity_type: | |
| entities.append((current_entity_type, " ".join(current_entity_words))) | |
| current_entity_type = tag[2:] | |
| current_entity_words = [word] | |
| elif tag.startswith("I-"): | |
| if current_entity_type == tag[2:]: | |
| current_entity_words.append(word) | |
| else: | |
| if current_entity_type: | |
| entities.append((current_entity_type, " ".join(current_entity_words))) | |
| current_entity_type = tag[2:] | |
| current_entity_words = [word] | |
| else: | |
| if current_entity_type: | |
| entities.append((current_entity_type, " ".join(current_entity_words))) | |
| current_entity_type = None | |
| current_entity_words = [] | |
| if current_entity_type: | |
| entities.append((current_entity_type, " ".join(current_entity_words))) | |
| text = " ".join(words) | |
| return text, entities | |
| def evaluate_ner(test_file_path, limit=None): | |
| print(f"Loading test data from {test_file_path}...") | |
| with open(test_file_path, "r", encoding="utf-8") as f: | |
| blocks = f.read().split("\n\n") | |
| sentences = [] | |
| for block in blocks: | |
| if not block.strip(): | |
| continue | |
| text, true_ents = extract_entities_from_conll(block.split("\n")) | |
| if text: | |
| sentences.append((text, true_ents)) | |
| if limit: | |
| sentences = sentences[:limit] | |
| print(f"Loaded {len(sentences)} test sentences.") | |
| preprocessor = Preprocessor() | |
| ner = NEREngine() | |
| true_positives = 0 | |
| false_positives = 0 | |
| false_negatives = 0 | |
| print("Running NER evaluation (this may take a while)...") | |
| for i, (text, true_ents) in enumerate(sentences): | |
| if i > 0 and i % 50 == 0: | |
| print(f"Processed {i}/{len(sentences)} sentences...") | |
| # Clean text specifically for NER | |
| clean_text = preprocessor.preprocess_nlp(text) | |
| predicted_results = ner.recognize(clean_text) | |
| # Format predictions into (type, string) lowercased for fair comparison | |
| # Strip dots so Д.Гантулга and Д. Гантулга both normalize to дгантулга | |
| pred_ents = [(res.entity_group, res.word.replace(" ", "").replace(".", "").lower()) | |
| for res in predicted_results] | |
| # Format true entities similarly — skip MISC since the fine-tuned model | |
| # does not produce MISC labels (removed from training set) | |
| true_ents_formatted = [ | |
| (t, w.replace(" ", "").replace(".", "").lower()) | |
| for t, w in true_ents | |
| if t != "MISC" | |
| ] | |
| # Calculate overlaps | |
| for true_e in true_ents_formatted: | |
| if true_e in pred_ents: | |
| true_positives += 1 | |
| pred_ents.remove(true_e) | |
| else: | |
| false_negatives += 1 | |
| # Whatever is left in pred_ents are false positives | |
| false_positives += len(pred_ents) | |
| precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) > 0 else 0 | |
| recall = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0 | |
| f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0 | |
| print("\n" + "="*40) | |
| print("NER EVALUATION RESULTS (Entity-Level Exact Match)") | |
| print("="*40) | |
| print(f"Sentences Evaluated: {len(sentences)}") | |
| print(f"True Positives: {true_positives}") | |
| print(f"False Positives: {false_positives}") | |
| print(f"False Negatives: {false_negatives}") | |
| print("-" * 40) | |
| print(f"Precision: {precision:.4f}") | |
| print(f"Recall: {recall:.4f}") | |
| print(f"F1 Score: {f1:.4f}") | |
| print("="*40) | |
| if __name__ == "__main__": | |
| test_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "data", "test.txt") | |
| if not os.path.exists(test_path): | |
| print(f"Error: Could not find CoNLL test file at {test_path}") | |
| else: | |
| # Run on the first 500 sentences to get a quick estimate. | |
| # Change limit=None to run on the entire test set. | |
| evaluate_ner(test_path, limit=500) | |