File size: 1,691 Bytes
736fded 2382f1d 736fded 1437552 2382f1d 89ec2fe 2382f1d 1437552 2382f1d 1437552 2382f1d 1437552 2382f1d 1437552 736fded 1437552 f5c57f1 736fded f5c57f1 736fded 78f775e 736fded f5c57f1 736fded f5c57f1 736fded f5c57f1 736fded |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
from fastapi import FastAPI
from pydantic import BaseModel
import spacy
from contextlib import asynccontextmanager
# Global variable to store the model
nlp = None
@asynccontextmanager
async def lifespan(app: FastAPI):
await load_model()
yield
await cleanup()
app = FastAPI(lifespan=lifespan)
async def load_model():
"""
Event to load the NLP model into memory on application startup.
"""
global nlp
nlp = spacy.load("en_core_web_sm")
print("NLP model loaded successfully.")
async def cleanup():
"""
Event to clean up resources on application shutdown (if needed).
"""
global nlp
nlp = None
print("NLP model unloaded.")
class NERRequest(BaseModel):
chunks: list[str]
@app.post("/process")
async def process_text(request: NERRequest):
global nlp
metadata_records = []
# Dictionary to keep track of already seen entities
seen_entities = {}
for text in request.chunks:
doc = nlp(text)
for ent in doc.ents:
print(f"{ent.text} - {ent.label_}")
# Check if we've seen this entity before
if ent.text in seen_entities:
# Use the existing redacted word
continue
metadata_record = {
"personal_info": ent.text,
"redaction_type": ent.label_,
}
redacted_word = f"REDACTED_{ent.label_}"
# Store for future reference
seen_entities[ent.text] = redacted_word
metadata_record["redacted_word"] = redacted_word
metadata_records.append(metadata_record)
return {
"metadata": metadata_records,
}
|