sitammeur commited on
Commit
13c5d4f
Β·
verified Β·
1 Parent(s): d3a5ffd

Upload 4 files

Browse files
Files changed (4) hide show
  1. app.py +131 -0
  2. exception.py +50 -0
  3. logger.py +21 -0
  4. ner.py +38 -0
app.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Necessary imports
2
+ import warnings
3
+
4
+ warnings.filterwarnings("ignore")
5
+
6
+ import gradio as gr
7
+ from ner import pii_ner
8
+
9
+
10
+ # Examples to display in the interface
11
+ examples = [
12
+ [
13
+ "Hi support, I can't log in! My account username is 'johndoe88'. Every time I try, it says 'invalid credentials'. Please reset my password. You can reach me at (555) 123-4567 or johnd@example.com",
14
+ "email, phone_number, user_name",
15
+ 0.3,
16
+ False,
17
+ False,
18
+ ],
19
+ [
20
+ "Patient John Doe, MRN 123456, diagnosed with diabetes.",
21
+ "name, medical record number, condition",
22
+ 0.3,
23
+ False,
24
+ False,
25
+ ],
26
+ [
27
+ "Client Jane Doe vs. Corporation ABC, case #2024-CV-001",
28
+ "name, organization, case number",
29
+ 0.3,
30
+ False,
31
+ False,
32
+ ],
33
+ ]
34
+
35
+
36
+ # Launch the gradio UI
37
+ with gr.Blocks(title="GLiNER-PII", theme=gr.themes.Soft()) as demo:
38
+ gr.Markdown(
39
+ """
40
+ # GLiNER-PII
41
+ GLiNER-PII is a successor to the Gretel GLiNER PII/PHI models. Built on the GLiNER bi-large base, it detects and classifies a broad range of Personally Identifiable Information (PII) and Protected Health Information (PHI) in structured and unstructured text. It is non-generative and produces span-level entity annotations with confidence scores across 55+ categories. This model was developed by NVIDIA.
42
+
43
+ ## Links
44
+ * Model: https://huggingface.co/nvidia/gliner-pii
45
+ * Training dataset: https://huggingface.co/datasets/nvidia/nemotron-pii
46
+ * GLiNER library: https://pypi.org/project/gliner/
47
+ """
48
+ )
49
+
50
+ # Text input
51
+ input_text = gr.Textbox(
52
+ value=examples[0][0], label="Text input", placeholder="Enter your text here"
53
+ )
54
+
55
+ # Labels, threshold, nested NER
56
+ with gr.Row() as row:
57
+ labels = gr.Textbox(
58
+ value=examples[0][1],
59
+ label="Labels",
60
+ placeholder="Enter your PII/PHI labels here (comma separated)",
61
+ scale=2,
62
+ )
63
+ threshold = gr.Slider(
64
+ 0,
65
+ 1,
66
+ value=0.5,
67
+ step=0.01,
68
+ label="Threshold",
69
+ info="Lower the threshold to increase how many entities get predicted.",
70
+ scale=1,
71
+ )
72
+ nested_ner = gr.Checkbox(
73
+ value=False,
74
+ label="Nested NER",
75
+ info="Allow for nested NER?",
76
+ scale=0,
77
+ )
78
+ multi_label = gr.Checkbox(
79
+ value=False,
80
+ label="Multi-label",
81
+ info="Allow for multi-label?",
82
+ scale=0,
83
+ )
84
+
85
+ # Output
86
+ output = gr.HighlightedText(label="Predicted Entities")
87
+
88
+ # Submit button
89
+ submit_btn = gr.Button("Submit")
90
+ examples = gr.Examples(
91
+ examples,
92
+ fn=pii_ner,
93
+ inputs=[input_text, labels, threshold, nested_ner, multi_label],
94
+ outputs=output,
95
+ cache_examples=True,
96
+ cache_mode="lazy",
97
+ )
98
+
99
+ # Events when submitting
100
+ input_text.submit(
101
+ fn=pii_ner,
102
+ inputs=[input_text, labels, threshold, nested_ner, multi_label],
103
+ outputs=output,
104
+ )
105
+ labels.submit(
106
+ fn=pii_ner,
107
+ inputs=[input_text, labels, threshold, nested_ner, multi_label],
108
+ outputs=output,
109
+ )
110
+ threshold.release(
111
+ fn=pii_ner,
112
+ inputs=[input_text, labels, threshold, nested_ner, multi_label],
113
+ outputs=output,
114
+ )
115
+ submit_btn.click(
116
+ fn=pii_ner,
117
+ inputs=[input_text, labels, threshold, nested_ner, multi_label],
118
+ outputs=output,
119
+ )
120
+ nested_ner.change(
121
+ fn=pii_ner,
122
+ inputs=[input_text, labels, threshold, nested_ner, multi_label],
123
+ outputs=output,
124
+ )
125
+ multi_label.change(
126
+ fn=pii_ner,
127
+ inputs=[input_text, labels, threshold, nested_ner, multi_label],
128
+ outputs=output,
129
+ )
130
+
131
+ demo.launch(debug=False)
exception.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ This module defines a custom exception handling class and a function to get error message with details of the error.
3
+ """
4
+
5
+ # Standard Library
6
+ import sys
7
+
8
+ # Local imports
9
+ from logger import logging
10
+
11
+
12
+ # Function Definition to get error message with details of the error (file name and line number) when an error occurs in the program
13
+ def get_error_message(error, error_detail: sys):
14
+ """
15
+ Get error message with details of the error.
16
+
17
+ Args:
18
+ - error (Exception): The error that occurred.
19
+ - error_detail (sys): The details of the error.
20
+
21
+ Returns:
22
+ str: A string containing the error message along with the file name and line number where the error occurred.
23
+ """
24
+ _, _, exc_tb = error_detail.exc_info()
25
+
26
+ # Get error details
27
+ file_name = exc_tb.tb_frame.f_code.co_filename
28
+ return "Error occured in python script name [{0}] line number [{1}] error message[{2}]".format(
29
+ file_name, exc_tb.tb_lineno, str(error)
30
+ )
31
+
32
+
33
+ # Custom Exception Handling Class Definition
34
+ class CustomExceptionHandling(Exception):
35
+ """
36
+ Custom Exception Handling:
37
+ This class defines a custom exception that can be raised when an error occurs in the program.
38
+ It takes an error message and an error detail as input and returns a formatted error message when the exception is raised.
39
+ """
40
+
41
+ # Constructor
42
+ def __init__(self, error_message, error_detail: sys):
43
+ """Initialize the exception"""
44
+ super().__init__(error_message)
45
+
46
+ self.error_message = get_error_message(error_message, error_detail=error_detail)
47
+
48
+ def __str__(self):
49
+ """String representation of the exception"""
50
+ return self.error_message
logger.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Importing the required modules
2
+ import os
3
+ import logging
4
+ from datetime import datetime
5
+
6
+ # Creating a log file with the current date and time as the name of the file
7
+ LOG_FILE = f"{datetime.now().strftime('%m_%d_%Y_%H_%M_%S')}.log"
8
+
9
+ # Creating a logs folder if it does not exist
10
+ logs_path = os.path.join(os.getcwd(), "logs")
11
+ os.makedirs(logs_path, exist_ok=True)
12
+
13
+ # Setting the log file path and the log level
14
+ LOG_FILE_PATH = os.path.join(logs_path, LOG_FILE)
15
+
16
+ # Configuring the logger
17
+ logging.basicConfig(
18
+ filename=LOG_FILE_PATH,
19
+ format="[ %(asctime)s ] %(lineno)d %(name)s - %(levelname)s - %(message)s",
20
+ level=logging.INFO,
21
+ )
ner.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ from typing import Dict, Union
3
+ from exception import CustomExceptionHandling
4
+ from logger import logging
5
+ from gliner import GLiNER
6
+
7
+ # Load the model
8
+ model = GLiNER.from_pretrained("nvidia/gliner-PII")
9
+
10
+
11
+ def pii_ner(
12
+ text, labels: str, threshold: float, nested_ner: bool, multi_label: bool
13
+ ) -> Dict[str, Union[str, int, float]]:
14
+ """Perform PII detection on the given text."""
15
+ try:
16
+ labels = labels.split(",")
17
+ logging.info("PII detection completed successfully")
18
+ return {
19
+ "text": text,
20
+ "entities": [
21
+ {
22
+ "entity": entity["label"],
23
+ "word": entity["text"],
24
+ "start": entity["start"],
25
+ "end": entity["end"],
26
+ "score": 0,
27
+ }
28
+ for entity in model.predict_entities(
29
+ text,
30
+ labels,
31
+ flat_ner=not nested_ner,
32
+ threshold=threshold,
33
+ multi_label=multi_label,
34
+ )
35
+ ],
36
+ }
37
+ except Exception as e:
38
+ raise CustomExceptionHandling(e, sys) from e