earlsab
commited on
Commit
路
c1b7ccc
1
Parent(s):
2d6f3d7
remove special characters
Browse files- handler.py +7 -2
handler.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
from typing import Dict, List, Any
|
| 2 |
from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline
|
| 3 |
import torch
|
|
@@ -26,10 +27,14 @@ class EndpointHandler():
|
|
| 26 |
Return:
|
| 27 |
A :obj:`list` | `dict`: will be serialized and returned
|
| 28 |
"""
|
| 29 |
-
start_date = self.extract_start_date(data["inputs"])
|
| 30 |
-
end_date = self.extract_end_date(data["inputs"])
|
| 31 |
return {"start_date": start_date, "end_date": end_date}
|
| 32 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
def extract_start_date(self, text):
|
| 34 |
question = "What is the start date?"
|
| 35 |
|
|
|
|
| 1 |
+
import re
|
| 2 |
from typing import Dict, List, Any
|
| 3 |
from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline
|
| 4 |
import torch
|
|
|
|
| 27 |
Return:
|
| 28 |
A :obj:`list` | `dict`: will be serialized and returned
|
| 29 |
"""
|
| 30 |
+
start_date = self.remove_special_characters(self.extract_start_date(data["inputs"]))
|
| 31 |
+
end_date = self.remove_special_characters(self.extract_end_date(data["inputs"]))
|
| 32 |
return {"start_date": start_date, "end_date": end_date}
|
| 33 |
|
| 34 |
+
def remove_special_characters(self, s):
|
| 35 |
+
return re.sub(r'(?<!\d)[^\w\s/]+|[^\w\s/]+(?!\d)', '', s).strip()
|
| 36 |
+
|
| 37 |
+
|
| 38 |
def extract_start_date(self, text):
|
| 39 |
question = "What is the start date?"
|
| 40 |
|