File size: 3,072 Bytes
7f7b446
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eef6783
 
 
7f7b446
 
 
 
eef6783
 
 
 
 
 
 
 
 
 
 
 
 
7f7b446
 
 
 
 
 
eef6783
 
 
 
 
 
 
 
 
 
 
 
 
 
bdbbcf1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# from sentence_transformers.cross_encoder import CrossEncoder
# import torch

# class SimilarityModelHandler:
#     # HOLDING THE MODEL INSTANCE TO PREVENT RELOADING
#     SIMILARITY_MODEL_INSTANCE = None

#     def __init__(self):
#         # CONSTRUCTOR: LOADING THE MODEL IF IT DOESN'T EXIST
#         if not SimilarityModelHandler.SIMILARITY_MODEL_INSTANCE:
#             print("INITIALIZING AND LOADING THE MODEL...")
#             # CHECKING FOR GPU, FALLBACK TO CPU
#             device = 'cuda' if torch.cuda.is_available() else 'cpu'
#             print(f"SERVICE IS RUNNING ON DEVICE: {device}")
            
#             # LOADING THE PRE-TRAINED CROSS-ENCODER MODEL
#             model_Name = 'cross-encoder/stsb-roberta-base'
#             #cross-encoder/stsb-roberta-large'
#             SimilarityModelHandler.SIMILARITY_MODEL_INSTANCE = CrossEncoder(model_Name, device=device)
#             print("MODEL LOADED SUCCESSFULLY.")

#     def calculate_Similarity(self, text_One: str, text_Two: str) -> float:
#         """
#         CALCULATES THE SIMILARITY SCORE BETWEEN TWO TEXTS.
#         """
#         # GETTING THE SCORE FROM THE MODEL( 0-1 )
#         finalScore = self.SIMILARITY_MODEL_INSTANCE.predict([(text_One, text_Two)])

#         # CONVERTING FROM NUMPY ARRAY TO A SIMPLE FLOAT
#         return finalScore.item()


# # CREATING A SINGLE INSTANCE TO BE USED BY THE API
# MODEL_HANDLER = SimilarityModelHandler()





import os
from sentence_transformers.cross_encoder import CrossEncoder
import torch

# SET CACHE DIRECTORY TO A WRITABLE LOCATION
os.environ['TRANSFORMERS_CACHE'] = '/tmp/transformers_cache'
os.environ['HF_HOME'] = '/tmp/hf_home'

class SimilarityModelHandler:
    # HOLDING THE MODEL INSTANCE TO PREVENT RELOADING
    SIMILARITY_MODEL_INSTANCE = None

    def __init__(self):
        # CONSTRUCTOR: LOADING THE MODEL IF IT DOESN'T EXIST
        if not SimilarityModelHandler.SIMILARITY_MODEL_INSTANCE:
            print("INITIALIZING AND LOADING THE MODEL...")
            # CHECKING FOR GPU, FALLBACK TO CPU
            device = 'cuda' if torch.cuda.is_available() else 'cpu'
            print(f"SERVICE IS RUNNING ON DEVICE: {device}")
            
            # LOADING THE PRE-TRAINED CROSS-ENCODER MODEL
            model_Name = 'cross-encoder/stsb-roberta-large'
            SimilarityModelHandler.SIMILARITY_MODEL_INSTANCE = CrossEncoder(
                model_Name, 
                device=device,
                cache_folder='/tmp/transformers_cache'  # EXPLICIT CACHE FOLDER
            )
            print("MODEL LOADED SUCCESSFULLY.")

    def calculate_Similarity(self, text_One: str, text_Two: str) -> float:
        """
        CALCULATES THE SIMILARITY SCORE BETWEEN TWO TEXTS.
        """
        # GETTING THE SCORE FROM THE MODEL( 0-1 )
        finalScore = self.SIMILARITY_MODEL_INSTANCE.predict([(text_One, text_Two)])

        # CONVERTING FROM NUMPY ARRAY TO A SIMPLE FLOAT
        return finalScore.item()


# CREATING A SINGLE INSTANCE TO BE USED BY THE API
MODEL_HANDLER = SimilarityModelHandler()