Spaces:

AppliedInfo
/

turner_ml

Sleeping

App Files Files Community

aamirtaymoor commited on Feb 23, 2024

Commit

ffdedb5

verified ·

1 Parent(s): a44b3dc

Update ml_service.py

Browse files

Files changed (1) hide show

ml_service.py +19 -12

ml_service.py CHANGED Viewed

@@ -20,9 +20,13 @@ sentiment_model_file = f"ml_models/sentiment_model/model.ft"
 class MlProcessing:
-    def __init__(self, comment_dict):
         self.comment_dict = comment_dict
         self.is_cleaned = False
     def remove_prefix(self, label):
         return label.split('-')[-1]
@@ -148,7 +152,8 @@ class MlProcessing:
         return {'label': label, 'score': prob}
     def apply_sentiment_model(self, review_dict_entities):
-        nlp = spacy.load('en_core_web_sm')
         sentence_finder = SentenceBoundsFinder(nlp)
         positive_sentiment_matcher = self.configure_matcher(nlp, POSITIVE_SENTIMENT_PATTERNS)
         sentiment_model = self.load_sentiment_model()
@@ -176,7 +181,8 @@ class MlProcessing:
         return self.comment_dict
     def load_sentiment_model(self):
-        return fasttext.load_model(sentiment_model_file)
     def get_sentence_start(self, sentence_bounds, position):
         for start, end in sentence_bounds:
@@ -185,20 +191,20 @@ class MlProcessing:
         raise RuntimeError('Failed to get sentence bound')
-    def load_ner_model(self, max_seq_len=500, use_multiprocessing=False):
         args = {'overwrite_output_dir': False, 'reprocess_input_data': True, 'num_train_epochs': 30,
                 'evaluation_strategy': 'epoch', 'evaluate_during_training': True, 'silent': True,
                 'max_seq_length': max_seq_len, 'use_multiprocessing': use_multiprocessing,
                 'use_multiprocessing_for_evaluation': use_multiprocessing, 'fp16': True}
-        with open(labels_file) as f:
-            labels = json.load(f)
         return NERModel('longformer', ner_model_directory, args=args, use_cuda=False, labels=labels)
     def apply_ner_model(self):
-        nlp = spacy.load('en_core_web_sm')
-        nlp.add_pipe('sentencizer')
         regex = re.compile('(\(original.{0,3}\).+)', re.IGNORECASE | re.MULTILINE | re.DOTALL)
         if self.comment_dict['skip']:
@@ -299,8 +305,9 @@ class MlProcessing:
 class SentenceBoundsFinder:
     def __init__(self, nlp=None):
-        self._nlp = nlp or spacy.load('en_core_web_sm')
-        self._nlp.add_pipe('sentencizer')
     def __call__(self, text):
         bounds = []
@@ -384,8 +391,8 @@ class ReviewsCleaner:
             text = text[:-1]
         return text
-def process_single_comment(raw_data):
-    ml = MlProcessing(comment_dict=raw_data)
     processed_data = ml.main()
     spans = processed_data.get('spans', list())
     has_sentiments = True

 class MlProcessing:
+    def __init__(self, comment_dict, language_model, sentiment_model, labels):
         self.comment_dict = comment_dict
         self.is_cleaned = False
+        self.language_model = language_model
+        self.sentiment_model = sentiment_model
+        self.labels = labels
     def remove_prefix(self, label):
         return label.split('-')[-1]
         return {'label': label, 'score': prob}
     def apply_sentiment_model(self, review_dict_entities):
+        # nlp = spacy.load('en_core_web_sm')
+        nlp = self.sentiment_model
         sentence_finder = SentenceBoundsFinder(nlp)
         positive_sentiment_matcher = self.configure_matcher(nlp, POSITIVE_SENTIMENT_PATTERNS)
         sentiment_model = self.load_sentiment_model()
         return self.comment_dict
     def load_sentiment_model(self):
+        # return fasttext.load_model(sentiment_model_file)
+        return self.sentiment_model
     def get_sentence_start(self, sentence_bounds, position):
         for start, end in sentence_bounds:
         raise RuntimeError('Failed to get sentence bound')
+    def load_ner_model(self, max_seq_len=500, use_multiprocessing=True):
         args = {'overwrite_output_dir': False, 'reprocess_input_data': True, 'num_train_epochs': 30,
                 'evaluation_strategy': 'epoch', 'evaluate_during_training': True, 'silent': True,
                 'max_seq_length': max_seq_len, 'use_multiprocessing': use_multiprocessing,
                 'use_multiprocessing_for_evaluation': use_multiprocessing, 'fp16': True}
+        labels = self.labels
         return NERModel('longformer', ner_model_directory, args=args, use_cuda=False, labels=labels)
     def apply_ner_model(self):
+        nlp = self.language_model
+        # nlp = spacy.load('en_core_web_sm')
+        # nlp.add_pipe('sentencizer')
         regex = re.compile('(\(original.{0,3}\).+)', re.IGNORECASE | re.MULTILINE | re.DOTALL)
         if self.comment_dict['skip']:
 class SentenceBoundsFinder:
     def __init__(self, nlp=None):
+        # self._nlp = nlp or spacy.load('en_core_web_sm')
+        # self._nlp.add_pipe('sentencizer')
+        self._nlp = nlp or self.language_model
     def __call__(self, text):
         bounds = []
             text = text[:-1]
         return text
+def process_single_comment(raw_data, LANGUAGE_MODEL, SENTIMENT_MODEL, LABELS ):
+    ml = MlProcessing(comment_dict=raw_data, language_model=LANGUAGE_MODEL, sentiment_model=SENTIMENT_MODEL, labels=LABELS )
     processed_data = ml.main()
     spans = processed_data.get('spans', list())
     has_sentiments = True