import os
import sys
import logging

# Add the project root to the python path so we can import nlp_core
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from nlp_core.ner_engine import NEREngine
from nlp_core.preprocessing import Preprocessor

def extract_entities_from_conll(lines):
    """
    Extracts entities from a list of CoNLL-formatted lines for a single sentence.
    Returns the reconstructed text and a list of entities: (type, string).
    """
    words = []
    entities = []
    current_entity_type = None
    current_entity_words = []

    for line in lines:
        parts = line.strip().split()
        if len(parts) < 4:
            continue
        word = parts[0]
        tag = parts[-1]
        
        words.append(word)

        if tag.startswith("B-"):
            if current_entity_type:
                entities.append((current_entity_type, " ".join(current_entity_words)))
            current_entity_type = tag[2:]
            current_entity_words = [word]
        elif tag.startswith("I-"):
            if current_entity_type == tag[2:]:
                current_entity_words.append(word)
            else:
                if current_entity_type:
                    entities.append((current_entity_type, " ".join(current_entity_words)))
                current_entity_type = tag[2:]
                current_entity_words = [word]
        else:
            if current_entity_type:
                entities.append((current_entity_type, " ".join(current_entity_words)))
                current_entity_type = None
                current_entity_words = []

    if current_entity_type:
        entities.append((current_entity_type, " ".join(current_entity_words)))

    text = " ".join(words)
    return text, entities

def evaluate_ner(test_file_path, limit=None):
    print(f"Loading test data from {test_file_path}...")
    
    with open(test_file_path, "r", encoding="utf-8") as f:
        blocks = f.read().split("\n\n")

    sentences = []
    for block in blocks:
        if not block.strip():
            continue
        text, true_ents = extract_entities_from_conll(block.split("\n"))
        if text:
            sentences.append((text, true_ents))

    if limit:
        sentences = sentences[:limit]

    print(f"Loaded {len(sentences)} test sentences.")
    
    preprocessor = Preprocessor()
    ner = NEREngine()
    
    true_positives = 0
    false_positives = 0
    false_negatives = 0

    print("Running NER evaluation (this may take a while)...")
    for i, (text, true_ents) in enumerate(sentences):
        if i > 0 and i % 50 == 0:
            print(f"Processed {i}/{len(sentences)} sentences...")
            
        # Clean text specifically for NER
        clean_text = preprocessor.preprocess_nlp(text)
        
        predicted_results = ner.recognize(clean_text)
        
        # Format predictions into (type, string) lowercased for fair comparison
        # Strip dots so Д.Гантулга and Д. Гантулга both normalize to дгантулга
        pred_ents = [(res.entity_group, res.word.replace(" ", "").replace(".", "").lower())
                     for res in predicted_results]

        # Format true entities similarly — skip MISC since the fine-tuned model
        # does not produce MISC labels (removed from training set)
        true_ents_formatted = [
            (t, w.replace(" ", "").replace(".", "").lower())
            for t, w in true_ents
            if t != "MISC"
        ]
        
        # Calculate overlaps
        for true_e in true_ents_formatted:
            if true_e in pred_ents:
                true_positives += 1
                pred_ents.remove(true_e)
            else:
                false_negatives += 1
                
        # Whatever is left in pred_ents are false positives
        false_positives += len(pred_ents)

    precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) > 0 else 0
    recall = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0
    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0

    print("\n" + "="*40)
    print("NER EVALUATION RESULTS (Entity-Level Exact Match)")
    print("="*40)
    print(f"Sentences Evaluated: {len(sentences)}")
    print(f"True Positives:      {true_positives}")
    print(f"False Positives:     {false_positives}")
    print(f"False Negatives:     {false_negatives}")
    print("-" * 40)
    print(f"Precision:           {precision:.4f}")
    print(f"Recall:              {recall:.4f}")
    print(f"F1 Score:            {f1:.4f}")
    print("="*40)

if __name__ == "__main__":
    test_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "data", "test.txt")
    if not os.path.exists(test_path):
        print(f"Error: Could not find CoNLL test file at {test_path}")
    else:
        # Run on the first 500 sentences to get a quick estimate. 
        # Change limit=None to run on the entire test set.
        evaluate_ner(test_path, limit=500)