File size: 2,579 Bytes

833f879
 
 
 
 
 
 
 
 
 
a38f2ca
 
 
 
 
 
 
 
 
 
833f879
 
 
 
 
 
a38f2ca
 
833f879
 
 
 
eff95d2
d08e49e
 
 
 
eff95d2
d08e49e
 
 
eff95d2
d08e49e
eff95d2
d08e49e
 
 
eff95d2
d08e49e
eff95d2
d08e49e
 
 
eff95d2
d08e49e
eff95d2
d08e49e
 
 
 
eff95d2
d08e49e
eff95d2
d08e49e
 
 
 
 
a38f2ca

import joblib
import os
import json

class EndpointHandler:
    def __init__(self, model_dir):
        self.model_dir = model_dir
        self.vectorizer = joblib.load(os.path.join(model_dir, 'vectorizer.joblib'))
        self.model = joblib.load(os.path.join(model_dir, 'logistic_classifier.joblib'))

        # Check if the vectorizer is fitted
        if not hasattr(self.vectorizer, 'vocabulary_'):
            raise ValueError("The vectorizer is not fitted. Ensure the vectorizer is trained and saved correctly.")

        # Check if the model is fitted
        if not hasattr(self.model, 'classes_'):
            raise ValueError("The model is not fitted. Ensure the model is trained and saved correctly.")

        print("Vectorizer and model loaded successfully.")

        # Verify that the tokenizer configuration is correct
        with open(os.path.join(model_dir, "tokenizer.json"), "r") as file:
            tokenizer_config = json.load(file)
        if tokenizer_config['tokenizer'] != 'split':
            raise ValueError("Tokenizer configuration does not match the expected tokenizer.")

        print("Tokenizer configuration verified.")

    def predict_rating(self, review):
        review_tfidf = self.vectorizer.transform([review])
        predicted_rating = self.model.predict(review_tfidf)[0]
        return int(predicted_rating)

    def __call__(self, inputs):
        try:
            # Parse the input JSON string
            inputs_dict = json.loads(inputs)

            # Check if 'inputs' key exists
            if 'inputs' not in inputs_dict:
                return json.dumps({"error": "No 'inputs' key provided in the JSON input."})

            inputs_data = inputs_dict['inputs']

            # Check if 'review' key exists
            if 'review' not in inputs_data:
                return json.dumps({"error": "No 'review' key provided in the 'inputs' object."})

            review = inputs_data['review']

            # Validate that the review is a non-empty string
            if not isinstance(review, str) or not review.strip():
                return json.dumps({"error": "Review must be a non-empty string."})

            predicted_rating = self.predict_rating(review)

            response = {
                "review": review,
                "predicted_rating": predicted_rating
            }

            return json.dumps(response)

        except json.JSONDecodeError:
            return json.dumps({"error": "Invalid JSON format in input."})

        except Exception as e:
            return json.dumps({"error": str(e)})