File size: 4,331 Bytes
250d4b8
10789df
250d4b8
 
24bacd6
250d4b8
e55b6a5
 
 
ff77f42
e55b6a5
10789df
e55b6a5
250d4b8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e55b6a5
10789df
 
 
250d4b8
 
 
 
10789df
250d4b8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10789df
250d4b8
10789df
 
 
250d4b8
e55b6a5
250d4b8
 
 
10789df
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
from transformers import pipeline
from sklearn.preprocessing import LabelEncoder
import joblib
import torch
import os

# Debugging: Print current directory and contents
print("Current working directory:", os.getcwd())
print("Contents of the directory:", os.listdir())

# Load the label encoder
label_encoder = joblib.load('/repository/label_encoder.pkl')  # Use absolute path
print("Label encoder loaded successfully.")

# Load the model and tokenizer from Hugging Face
model_name = "SCANSKY/distilbertTourism-multilingual-sentiment"
sentiment_analyzer = pipeline(
    'sentiment-analysis',
    model=model_name,
    tokenizer=model_name,
    device=0 if torch.cuda.is_available() else -1  # Use GPU if available
)

def get_average_sentiment(positive_count, negative_count, neutral_count):
    total = positive_count + negative_count + neutral_count
    if total == 0:
        return "neutral"
    
    positive_pct = (positive_count / total) * 100
    negative_pct = (negative_count / total) * 100
    neutral_pct = (neutral_count / total) * 100
    
    max_sentiment = max(positive_pct, negative_pct, neutral_pct)
    
    if max_sentiment == positive_pct:
        return "positive"
    elif max_sentiment == negative_pct:
        return "negative"
    else:
        return "neutral"

class EndpointHandler:
    def __init__(self, model_dir=None):
        # Model and tokenizer are loaded globally, so no need to reinitialize here
        # The `model_dir` argument is required by Hugging Face's inference toolkit
        pass

    def preprocess(self, data):
        # Extract the input text from the request
        text = data.get("inputs", "")
        return text

    def inference(self, text):
        if not text.strip():
            return {"error": "Please enter some text for sentiment analysis."}
        
        # Split text into lines
        lines = [line.strip() for line in text.split('\n') if line.strip()]
        
        if not lines:
            return {"error": "Please enter valid text for sentiment analysis."}
        
        # Analyze each line
        total_confidence = 0
        positive_count = 0
        negative_count = 0
        neutral_count = 0
        line_results = []  # Store results for each line
        
        for line in lines:
            result = sentiment_analyzer(line)
            predicted_label_encoded = int(result[0]['label'].split('_')[-1])
            predicted_label = label_encoder.inverse_transform([predicted_label_encoded])[0]
            confidence = result[0]['score'] * 100
            
            # Store line and its sentiment result
            line_results.append({
                'text': line,
                'sentiment': predicted_label,
                'confidence': confidence
            })
            
            if predicted_label == 'positive':
                positive_count += 1
            elif predicted_label == 'negative':
                negative_count += 1
            else:
                neutral_count += 1
            
            total_confidence += confidence
        
        # Calculate averages
        avg_confidence = total_confidence / len(lines)
        positive_pct = (positive_count / len(lines)) * 100
        negative_pct = (negative_count / len(lines)) * 100
        neutral_pct = (neutral_count / len(lines)) * 100
        
        # Get average sentiment
        avg_sentiment = get_average_sentiment(positive_count, negative_count, neutral_count)
        
        # Prepare the output
        output = {
            "total_lines_analyzed": len(lines),
            "average_confidence": avg_confidence,
            "average_sentiment": avg_sentiment,
            "sentiment_distribution": {
                "positive": positive_pct,
                "negative": negative_pct,
                "neutral": neutral_pct
            },
            "line_results": line_results
        }
        
        return output

    def postprocess(self, output):
        if "error" in output:
            return [{"error": output["error"]}]
        
        # Return only the line-level results as a list
        return output["line_results"]


    def __call__(self, data):
        # Main method to handle the request
        text = self.preprocess(data)
        output = self.inference(text)
        return self.postprocess(output)