from fastapi import FastAPI
from pydantic import BaseModel
import spacy
from contextlib import asynccontextmanager

# Global variable to store the model
nlp = None


@asynccontextmanager
async def lifespan(app: FastAPI):
    await load_model()
    yield
    await cleanup()

app = FastAPI(lifespan=lifespan)

async def load_model():
    """
    Event to load the NLP model into memory on application startup.
    """
    global nlp
    nlp = spacy.load("en_core_web_sm")
    print("NLP model loaded successfully.")


async def cleanup():
    """
    Event to clean up resources on application shutdown (if needed).
    """
    global nlp
    nlp = None
    print("NLP model unloaded.")


class NERRequest(BaseModel):
    chunks: list[str]


@app.post("/process")
async def process_text(request: NERRequest):
    global nlp
    metadata_records = []

    # Dictionary to keep track of already seen entities
    seen_entities = {}

    for text in request.chunks:
        doc = nlp(text)

        for ent in doc.ents:
            print(f"{ent.text} - {ent.label_}")

            # Check if we've seen this entity before
            if ent.text in seen_entities:
                # Use the existing redacted word
                continue

            metadata_record = {
                "personal_info": ent.text,
                "redaction_type": ent.label_,
            }

            redacted_word = f"REDACTED_{ent.label_}"

            # Store for future reference
            seen_entities[ent.text] = redacted_word

            metadata_record["redacted_word"] = redacted_word
            metadata_records.append(metadata_record)

    return {
        "metadata": metadata_records,
    }