# models/emotion_classifier.py

from transformers import AutoTokenizer, AutoModelForSequenceClassification, TextClassificationPipeline


class EmotionClassifier:
    """
    Wrapper around a pre-trained GoEmotions RoBERTa model.
    Uses: SamLowe/roberta-base-go_emotions
    """

    def __init__(self, model_name: str = "SamLowe/roberta-base-go_emotions"):
        print("[EmotionClassifier] Loading model... This may take a moment the first time.")
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModelForSequenceClassification.from_pretrained(model_name)
        # `top_k=None` preserves the old "all scores" behavior without the deprecation warning.
        self.pipeline = TextClassificationPipeline(
            model=self.model,
            tokenizer=self.tokenizer,
            top_k=None
        )
        print("[EmotionClassifier] Model loaded successfully.")

    def predict_emotions(self, text: str, top_k: int = 3):
        """
        Predict top_k emotions for a given input text.
        Returns a list of {label, score} dicts.
        """
        if not text or not text.strip():
            return []

        # Get scores for all labels
        outputs = self.pipeline(text)[0]  # pipeline returns a list per input
        # Sort by score descending
        sorted_outputs = sorted(outputs, key=lambda x: x["score"], reverse=True)
        # Take top_k
        return sorted_outputs[:top_k]


# Simple test code so you can run this file directly
if __name__ == "__main__":
    clf = EmotionClassifier()

    test_sentences = [
        "I feel really scared because my period is very late.",
        "I'm so happy that my cycle is finally regular.",
        "I'm embarrassed to talk about my period with anyone."
    ]

    for s in test_sentences:
        print(f"\nText: {s}")
        preds = clf.predict_emotions(s, top_k=5)
        for p in preds:
            print(f"  {p['label']}: {p['score']:.3f}")