ner / app.py
delwinn's picture
remove chunks from response.
78f775e verified
from fastapi import FastAPI
from pydantic import BaseModel
import spacy
from contextlib import asynccontextmanager
# Global variable to store the model
nlp = None
@asynccontextmanager
async def lifespan(app: FastAPI):
await load_model()
yield
await cleanup()
app = FastAPI(lifespan=lifespan)
async def load_model():
"""
Event to load the NLP model into memory on application startup.
"""
global nlp
nlp = spacy.load("en_core_web_sm")
print("NLP model loaded successfully.")
async def cleanup():
"""
Event to clean up resources on application shutdown (if needed).
"""
global nlp
nlp = None
print("NLP model unloaded.")
class NERRequest(BaseModel):
chunks: list[str]
@app.post("/process")
async def process_text(request: NERRequest):
global nlp
metadata_records = []
# Dictionary to keep track of already seen entities
seen_entities = {}
for text in request.chunks:
doc = nlp(text)
for ent in doc.ents:
print(f"{ent.text} - {ent.label_}")
# Check if we've seen this entity before
if ent.text in seen_entities:
# Use the existing redacted word
continue
metadata_record = {
"personal_info": ent.text,
"redaction_type": ent.label_,
}
redacted_word = f"REDACTED_{ent.label_}"
# Store for future reference
seen_entities[ent.text] = redacted_word
metadata_record["redacted_word"] = redacted_word
metadata_records.append(metadata_record)
return {
"metadata": metadata_records,
}