import joblib import os import json class EndpointHandler: def __init__(self, model_dir): self.model_dir = model_dir self.vectorizer = joblib.load(os.path.join(model_dir, 'vectorizer.joblib')) self.model = joblib.load(os.path.join(model_dir, 'logistic_classifier.joblib')) # Check if the vectorizer is fitted if not hasattr(self.vectorizer, 'vocabulary_'): raise ValueError("The vectorizer is not fitted. Ensure the vectorizer is trained and saved correctly.") # Check if the model is fitted if not hasattr(self.model, 'classes_'): raise ValueError("The model is not fitted. Ensure the model is trained and saved correctly.") print("Vectorizer and model loaded successfully.") # Verify that the tokenizer configuration is correct with open(os.path.join(model_dir, "tokenizer.json"), "r") as file: tokenizer_config = json.load(file) if tokenizer_config['tokenizer'] != 'split': raise ValueError("Tokenizer configuration does not match the expected tokenizer.") print("Tokenizer configuration verified.") def predict_rating(self, review): review_tfidf = self.vectorizer.transform([review]) predicted_rating = self.model.predict(review_tfidf)[0] return int(predicted_rating) def __call__(self, inputs): try: # Parse the input JSON string inputs_dict = json.loads(inputs) # Check if 'inputs' key exists if 'inputs' not in inputs_dict: return json.dumps({"error": "No 'inputs' key provided in the JSON input."}) inputs_data = inputs_dict['inputs'] # Check if 'review' key exists if 'review' not in inputs_data: return json.dumps({"error": "No 'review' key provided in the 'inputs' object."}) review = inputs_data['review'] # Validate that the review is a non-empty string if not isinstance(review, str) or not review.strip(): return json.dumps({"error": "Review must be a non-empty string."}) predicted_rating = self.predict_rating(review) response = { "review": review, "predicted_rating": predicted_rating } return json.dumps(response) except json.JSONDecodeError: return json.dumps({"error": "Invalid JSON format in input."}) except Exception as e: return json.dumps({"error": str(e)})