earlsab commited on
Commit
c1b7ccc
1 Parent(s): 2d6f3d7

remove special characters

Browse files
Files changed (1) hide show
  1. handler.py +7 -2
handler.py CHANGED
@@ -1,3 +1,4 @@
 
1
  from typing import Dict, List, Any
2
  from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline
3
  import torch
@@ -26,10 +27,14 @@ class EndpointHandler():
26
  Return:
27
  A :obj:`list` | `dict`: will be serialized and returned
28
  """
29
- start_date = self.extract_start_date(data["inputs"])
30
- end_date = self.extract_end_date(data["inputs"])
31
  return {"start_date": start_date, "end_date": end_date}
32
 
 
 
 
 
33
  def extract_start_date(self, text):
34
  question = "What is the start date?"
35
 
 
1
+ import re
2
  from typing import Dict, List, Any
3
  from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline
4
  import torch
 
27
  Return:
28
  A :obj:`list` | `dict`: will be serialized and returned
29
  """
30
+ start_date = self.remove_special_characters(self.extract_start_date(data["inputs"]))
31
+ end_date = self.remove_special_characters(self.extract_end_date(data["inputs"]))
32
  return {"start_date": start_date, "end_date": end_date}
33
 
34
+ def remove_special_characters(self, s):
35
+ return re.sub(r'(?<!\d)[^\w\s/]+|[^\w\s/]+(?!\d)', '', s).strip()
36
+
37
+
38
  def extract_start_date(self, text):
39
  question = "What is the start date?"
40