aamirtaymoor commited on
Commit
ffdedb5
·
verified ·
1 Parent(s): a44b3dc

Update ml_service.py

Browse files
Files changed (1) hide show
  1. ml_service.py +19 -12
ml_service.py CHANGED
@@ -20,9 +20,13 @@ sentiment_model_file = f"ml_models/sentiment_model/model.ft"
20
 
21
 
22
  class MlProcessing:
23
- def __init__(self, comment_dict):
24
  self.comment_dict = comment_dict
25
  self.is_cleaned = False
 
 
 
 
26
 
27
  def remove_prefix(self, label):
28
  return label.split('-')[-1]
@@ -148,7 +152,8 @@ class MlProcessing:
148
  return {'label': label, 'score': prob}
149
 
150
  def apply_sentiment_model(self, review_dict_entities):
151
- nlp = spacy.load('en_core_web_sm')
 
152
  sentence_finder = SentenceBoundsFinder(nlp)
153
  positive_sentiment_matcher = self.configure_matcher(nlp, POSITIVE_SENTIMENT_PATTERNS)
154
  sentiment_model = self.load_sentiment_model()
@@ -176,7 +181,8 @@ class MlProcessing:
176
  return self.comment_dict
177
 
178
  def load_sentiment_model(self):
179
- return fasttext.load_model(sentiment_model_file)
 
180
 
181
  def get_sentence_start(self, sentence_bounds, position):
182
  for start, end in sentence_bounds:
@@ -185,20 +191,20 @@ class MlProcessing:
185
 
186
  raise RuntimeError('Failed to get sentence bound')
187
 
188
- def load_ner_model(self, max_seq_len=500, use_multiprocessing=False):
189
  args = {'overwrite_output_dir': False, 'reprocess_input_data': True, 'num_train_epochs': 30,
190
  'evaluation_strategy': 'epoch', 'evaluate_during_training': True, 'silent': True,
191
  'max_seq_length': max_seq_len, 'use_multiprocessing': use_multiprocessing,
192
  'use_multiprocessing_for_evaluation': use_multiprocessing, 'fp16': True}
193
 
194
- with open(labels_file) as f:
195
- labels = json.load(f)
196
 
197
  return NERModel('longformer', ner_model_directory, args=args, use_cuda=False, labels=labels)
198
 
199
  def apply_ner_model(self):
200
- nlp = spacy.load('en_core_web_sm')
201
- nlp.add_pipe('sentencizer')
 
202
 
203
  regex = re.compile('(\(original.{0,3}\).+)', re.IGNORECASE | re.MULTILINE | re.DOTALL)
204
  if self.comment_dict['skip']:
@@ -299,8 +305,9 @@ class MlProcessing:
299
 
300
  class SentenceBoundsFinder:
301
  def __init__(self, nlp=None):
302
- self._nlp = nlp or spacy.load('en_core_web_sm')
303
- self._nlp.add_pipe('sentencizer')
 
304
 
305
  def __call__(self, text):
306
  bounds = []
@@ -384,8 +391,8 @@ class ReviewsCleaner:
384
  text = text[:-1]
385
  return text
386
 
387
- def process_single_comment(raw_data):
388
- ml = MlProcessing(comment_dict=raw_data)
389
  processed_data = ml.main()
390
  spans = processed_data.get('spans', list())
391
  has_sentiments = True
 
20
 
21
 
22
  class MlProcessing:
23
+ def __init__(self, comment_dict, language_model, sentiment_model, labels):
24
  self.comment_dict = comment_dict
25
  self.is_cleaned = False
26
+ self.language_model = language_model
27
+ self.sentiment_model = sentiment_model
28
+ self.labels = labels
29
+
30
 
31
  def remove_prefix(self, label):
32
  return label.split('-')[-1]
 
152
  return {'label': label, 'score': prob}
153
 
154
  def apply_sentiment_model(self, review_dict_entities):
155
+ # nlp = spacy.load('en_core_web_sm')
156
+ nlp = self.sentiment_model
157
  sentence_finder = SentenceBoundsFinder(nlp)
158
  positive_sentiment_matcher = self.configure_matcher(nlp, POSITIVE_SENTIMENT_PATTERNS)
159
  sentiment_model = self.load_sentiment_model()
 
181
  return self.comment_dict
182
 
183
  def load_sentiment_model(self):
184
+ # return fasttext.load_model(sentiment_model_file)
185
+ return self.sentiment_model
186
 
187
  def get_sentence_start(self, sentence_bounds, position):
188
  for start, end in sentence_bounds:
 
191
 
192
  raise RuntimeError('Failed to get sentence bound')
193
 
194
+ def load_ner_model(self, max_seq_len=500, use_multiprocessing=True):
195
  args = {'overwrite_output_dir': False, 'reprocess_input_data': True, 'num_train_epochs': 30,
196
  'evaluation_strategy': 'epoch', 'evaluate_during_training': True, 'silent': True,
197
  'max_seq_length': max_seq_len, 'use_multiprocessing': use_multiprocessing,
198
  'use_multiprocessing_for_evaluation': use_multiprocessing, 'fp16': True}
199
 
200
+ labels = self.labels
 
201
 
202
  return NERModel('longformer', ner_model_directory, args=args, use_cuda=False, labels=labels)
203
 
204
  def apply_ner_model(self):
205
+ nlp = self.language_model
206
+ # nlp = spacy.load('en_core_web_sm')
207
+ # nlp.add_pipe('sentencizer')
208
 
209
  regex = re.compile('(\(original.{0,3}\).+)', re.IGNORECASE | re.MULTILINE | re.DOTALL)
210
  if self.comment_dict['skip']:
 
305
 
306
  class SentenceBoundsFinder:
307
  def __init__(self, nlp=None):
308
+ # self._nlp = nlp or spacy.load('en_core_web_sm')
309
+ # self._nlp.add_pipe('sentencizer')
310
+ self._nlp = nlp or self.language_model
311
 
312
  def __call__(self, text):
313
  bounds = []
 
391
  text = text[:-1]
392
  return text
393
 
394
+ def process_single_comment(raw_data, LANGUAGE_MODEL, SENTIMENT_MODEL, LABELS ):
395
+ ml = MlProcessing(comment_dict=raw_data, language_model=LANGUAGE_MODEL, sentiment_model=SENTIMENT_MODEL, labels=LABELS )
396
  processed_data = ml.main()
397
  spans = processed_data.get('spans', list())
398
  has_sentiments = True