Delwin Mathew commited on
Commit
736fded
·
1 Parent(s): 923a315

add ner sample

Browse files
Files changed (4) hide show
  1. .gitignore +1 -0
  2. Dockerfile +17 -0
  3. app.py +44 -0
  4. requirements.txt +0 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ /venv/
Dockerfile ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
+ # you will also find guides on how best to write your Dockerfile
3
+
4
+ FROM python:3.11
5
+
6
+ RUN useradd -m -u 1000 user
7
+ USER user
8
+ ENV PATH="/home/user/.local/bin:$PATH"
9
+
10
+ WORKDIR /app
11
+
12
+ COPY --chown=user ./requirements.txt requirements.txt
13
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
14
+
15
+ COPY --chown=user . /app
16
+ EXPOSE 7860
17
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from pydantic import BaseModel
3
+ import spacy
4
+
5
+ app = FastAPI()
6
+
7
+
8
+ class NERRequest(BaseModel):
9
+ chunks: list[str]
10
+
11
+
12
+ @app.post("/process")
13
+ async def process_text(request: NERRequest):
14
+ nlp = spacy.load("en_legal_ner_trf")
15
+ redacted_chunks = []
16
+ supa_clien = []
17
+ redacted_count = 0
18
+ for text in request.chunks:
19
+ doc = nlp(text)
20
+
21
+ for ent in doc.ents:
22
+ print(f"{ent.text} - {ent.label_}")
23
+ metadata_record = {
24
+ "personal_info": ent.text,
25
+ "redaction_type": ent.label_,
26
+ }
27
+
28
+ # insert into the table to get the primary key
29
+ query = supa_clien.append(metadata_record)
30
+
31
+ # Form the redacted word with label and primary key (REDACTED_PERSON_1)
32
+ redacted_count += 1
33
+ redacted_word = f"REDACTED_{ent.label_}_{redacted_count}"
34
+
35
+ metadata_record["redacted_word"] = redacted_word
36
+
37
+ text = text.replace(ent.text, redacted_word)
38
+
39
+ redacted_chunks.append(text)
40
+
41
+ return {
42
+ "redacted_chunks": redacted_chunks,
43
+ "metadata": supa_clien,
44
+ }
requirements.txt ADDED
Binary file (7.82 kB). View file