Spaces:
Sleeping
Sleeping
Update ml_service.py
Browse files- ml_service.py +19 -12
ml_service.py
CHANGED
|
@@ -20,9 +20,13 @@ sentiment_model_file = f"ml_models/sentiment_model/model.ft"
|
|
| 20 |
|
| 21 |
|
| 22 |
class MlProcessing:
|
| 23 |
-
def __init__(self, comment_dict):
|
| 24 |
self.comment_dict = comment_dict
|
| 25 |
self.is_cleaned = False
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
|
| 27 |
def remove_prefix(self, label):
|
| 28 |
return label.split('-')[-1]
|
|
@@ -148,7 +152,8 @@ class MlProcessing:
|
|
| 148 |
return {'label': label, 'score': prob}
|
| 149 |
|
| 150 |
def apply_sentiment_model(self, review_dict_entities):
|
| 151 |
-
nlp = spacy.load('en_core_web_sm')
|
|
|
|
| 152 |
sentence_finder = SentenceBoundsFinder(nlp)
|
| 153 |
positive_sentiment_matcher = self.configure_matcher(nlp, POSITIVE_SENTIMENT_PATTERNS)
|
| 154 |
sentiment_model = self.load_sentiment_model()
|
|
@@ -176,7 +181,8 @@ class MlProcessing:
|
|
| 176 |
return self.comment_dict
|
| 177 |
|
| 178 |
def load_sentiment_model(self):
|
| 179 |
-
return fasttext.load_model(sentiment_model_file)
|
|
|
|
| 180 |
|
| 181 |
def get_sentence_start(self, sentence_bounds, position):
|
| 182 |
for start, end in sentence_bounds:
|
|
@@ -185,20 +191,20 @@ class MlProcessing:
|
|
| 185 |
|
| 186 |
raise RuntimeError('Failed to get sentence bound')
|
| 187 |
|
| 188 |
-
def load_ner_model(self, max_seq_len=500, use_multiprocessing=
|
| 189 |
args = {'overwrite_output_dir': False, 'reprocess_input_data': True, 'num_train_epochs': 30,
|
| 190 |
'evaluation_strategy': 'epoch', 'evaluate_during_training': True, 'silent': True,
|
| 191 |
'max_seq_length': max_seq_len, 'use_multiprocessing': use_multiprocessing,
|
| 192 |
'use_multiprocessing_for_evaluation': use_multiprocessing, 'fp16': True}
|
| 193 |
|
| 194 |
-
|
| 195 |
-
labels = json.load(f)
|
| 196 |
|
| 197 |
return NERModel('longformer', ner_model_directory, args=args, use_cuda=False, labels=labels)
|
| 198 |
|
| 199 |
def apply_ner_model(self):
|
| 200 |
-
nlp =
|
| 201 |
-
nlp.
|
|
|
|
| 202 |
|
| 203 |
regex = re.compile('(\(original.{0,3}\).+)', re.IGNORECASE | re.MULTILINE | re.DOTALL)
|
| 204 |
if self.comment_dict['skip']:
|
|
@@ -299,8 +305,9 @@ class MlProcessing:
|
|
| 299 |
|
| 300 |
class SentenceBoundsFinder:
|
| 301 |
def __init__(self, nlp=None):
|
| 302 |
-
self._nlp = nlp or spacy.load('en_core_web_sm')
|
| 303 |
-
self._nlp.add_pipe('sentencizer')
|
|
|
|
| 304 |
|
| 305 |
def __call__(self, text):
|
| 306 |
bounds = []
|
|
@@ -384,8 +391,8 @@ class ReviewsCleaner:
|
|
| 384 |
text = text[:-1]
|
| 385 |
return text
|
| 386 |
|
| 387 |
-
def process_single_comment(raw_data):
|
| 388 |
-
ml = MlProcessing(comment_dict=raw_data)
|
| 389 |
processed_data = ml.main()
|
| 390 |
spans = processed_data.get('spans', list())
|
| 391 |
has_sentiments = True
|
|
|
|
| 20 |
|
| 21 |
|
| 22 |
class MlProcessing:
|
| 23 |
+
def __init__(self, comment_dict, language_model, sentiment_model, labels):
|
| 24 |
self.comment_dict = comment_dict
|
| 25 |
self.is_cleaned = False
|
| 26 |
+
self.language_model = language_model
|
| 27 |
+
self.sentiment_model = sentiment_model
|
| 28 |
+
self.labels = labels
|
| 29 |
+
|
| 30 |
|
| 31 |
def remove_prefix(self, label):
|
| 32 |
return label.split('-')[-1]
|
|
|
|
| 152 |
return {'label': label, 'score': prob}
|
| 153 |
|
| 154 |
def apply_sentiment_model(self, review_dict_entities):
|
| 155 |
+
# nlp = spacy.load('en_core_web_sm')
|
| 156 |
+
nlp = self.sentiment_model
|
| 157 |
sentence_finder = SentenceBoundsFinder(nlp)
|
| 158 |
positive_sentiment_matcher = self.configure_matcher(nlp, POSITIVE_SENTIMENT_PATTERNS)
|
| 159 |
sentiment_model = self.load_sentiment_model()
|
|
|
|
| 181 |
return self.comment_dict
|
| 182 |
|
| 183 |
def load_sentiment_model(self):
|
| 184 |
+
# return fasttext.load_model(sentiment_model_file)
|
| 185 |
+
return self.sentiment_model
|
| 186 |
|
| 187 |
def get_sentence_start(self, sentence_bounds, position):
|
| 188 |
for start, end in sentence_bounds:
|
|
|
|
| 191 |
|
| 192 |
raise RuntimeError('Failed to get sentence bound')
|
| 193 |
|
| 194 |
+
def load_ner_model(self, max_seq_len=500, use_multiprocessing=True):
|
| 195 |
args = {'overwrite_output_dir': False, 'reprocess_input_data': True, 'num_train_epochs': 30,
|
| 196 |
'evaluation_strategy': 'epoch', 'evaluate_during_training': True, 'silent': True,
|
| 197 |
'max_seq_length': max_seq_len, 'use_multiprocessing': use_multiprocessing,
|
| 198 |
'use_multiprocessing_for_evaluation': use_multiprocessing, 'fp16': True}
|
| 199 |
|
| 200 |
+
labels = self.labels
|
|
|
|
| 201 |
|
| 202 |
return NERModel('longformer', ner_model_directory, args=args, use_cuda=False, labels=labels)
|
| 203 |
|
| 204 |
def apply_ner_model(self):
|
| 205 |
+
nlp = self.language_model
|
| 206 |
+
# nlp = spacy.load('en_core_web_sm')
|
| 207 |
+
# nlp.add_pipe('sentencizer')
|
| 208 |
|
| 209 |
regex = re.compile('(\(original.{0,3}\).+)', re.IGNORECASE | re.MULTILINE | re.DOTALL)
|
| 210 |
if self.comment_dict['skip']:
|
|
|
|
| 305 |
|
| 306 |
class SentenceBoundsFinder:
|
| 307 |
def __init__(self, nlp=None):
|
| 308 |
+
# self._nlp = nlp or spacy.load('en_core_web_sm')
|
| 309 |
+
# self._nlp.add_pipe('sentencizer')
|
| 310 |
+
self._nlp = nlp or self.language_model
|
| 311 |
|
| 312 |
def __call__(self, text):
|
| 313 |
bounds = []
|
|
|
|
| 391 |
text = text[:-1]
|
| 392 |
return text
|
| 393 |
|
| 394 |
+
def process_single_comment(raw_data, LANGUAGE_MODEL, SENTIMENT_MODEL, LABELS ):
|
| 395 |
+
ml = MlProcessing(comment_dict=raw_data, language_model=LANGUAGE_MODEL, sentiment_model=SENTIMENT_MODEL, labels=LABELS )
|
| 396 |
processed_data = ml.main()
|
| 397 |
spans = processed_data.get('spans', list())
|
| 398 |
has_sentiments = True
|