"""
Programming Paradigm Classification - Inference Script
Uses trained SVM classifier and sentence embeddings for predictions
"""

import pickle
import numpy as np
from sentence_transformers import SentenceTransformer
import sys


class ProgrammingParadigmClassifier:
    """Classifier for programming paradigm predictions."""
    
    def __init__(self, classifier_path='svm_classifier.pkl', 
                 model_name_path='sentence_model_name.txt',
                 confidence_threshold=0.55):
        """Initialize classifier and embedding model."""
        print("Loading trained SVM classifier...")
        with open(classifier_path, 'rb') as f:
            self.classifier = pickle.load(f)
        
        # Load the model name that was used during training
        print("Reading embedding model name from training...")
        with open(model_name_path, 'r') as f:
            model_name = f.read().strip()
        
        print(f"Loading sentence embedding model: {model_name}...")
        self.model = SentenceTransformer(model_name)
        self.confidence_threshold = confidence_threshold
        print(f"Models loaded! (Confidence threshold: {confidence_threshold})\n")
    
    def predict(self, text):
        """Predict programming paradigm for given text with uncertainty handling."""
        # Generate embedding
        embedding = self.model.encode([text])
        
        # Get probabilities (handle both CalibratedClassifierCV and LinearSVC)
        if hasattr(self.classifier, 'predict_proba'):
            # CalibratedClassifierCV - has real probabilities
            probs = self.classifier.predict_proba(embedding)[0]
        else:
            # LinearSVC - use decision_function and convert to probabilities
            scores = self.classifier.decision_function(embedding)[0]
            # Softmax to convert scores to probabilities
            exp_scores = np.exp(scores - np.max(scores))
            probs = exp_scores / exp_scores.sum()
        
        prob_dict = dict(zip(self.classifier.classes_, probs))
        
        # Get top two probabilities for margin calculation
        sorted_indices = np.argsort(probs)[::-1]
        sorted_probs = probs[sorted_indices]
        max_prob = sorted_probs[0]
        second_max = sorted_probs[1] if len(sorted_probs) > 1 else 0.0
        margin = max_prob - second_max
        
        # Get class names for top two
        top_classes = self.classifier.classes_[sorted_indices]
        top_class = top_classes[0]
        second_class = top_classes[1] if len(top_classes) > 1 else None
        

        if max_prob > 0.25 and second_max > 0.25 and margin < 0.08:
            # Both classes are viable - return both
            prediction = f"{top_class} or {second_class}"
        elif max_prob < 0.30 or margin < 0.10:
            prediction = "unclear"
        else:
            prediction = top_class
        
        return prediction, prob_dict, max_prob
    
    def predict_batch(self, texts):
        """Predict programming paradigms for multiple texts."""
        results = []
        for text in texts:
            prediction, probs, max_prob = self.predict(text)
            results.append({
                'text': text,
                'prediction': prediction,
                'probabilities': probs,
                'confidence': max_prob
            })
        return results
    
    def display_prediction(self, text, prediction, probs, max_prob):
        """Display prediction results in formatted output."""
        print(f"\nInput: {text[:100]}{'...' if len(text) > 100 else ''}")
        
        # Format output for dual or single predictions
        if " or " in str(prediction):
            print(f"Predicted Paradigm: {prediction} (ambiguous - close call!)")
        elif prediction == "unclear":
            print(f"Predicted Paradigm: {prediction} (too uncertain)")
        else:
            print(f"Predicted Paradigm: {prediction} (confident)")
        
        # Get top 2 classes for margin display
        sorted_items = sorted(probs.items(), key=lambda x: x[1], reverse=True)
        top_class, top_prob = sorted_items[0]
        second_class, second_prob = sorted_items[1] if len(sorted_items) > 1 else (None, 0.0)
        margin = top_prob - second_prob
        
        print(f"Max: {top_class} ({top_prob:.3f}), 2nd: {second_class} ({second_prob:.3f}), Margin: {margin:.3f}")
        print("Class Probabilities:")
        for label, prob in sorted_items:
            print(f"  {label:12s}: {prob:7.3f}")
        print("-" * 70)


def main():
    """Main inference pipeline."""
    print("=" * 70)
    print("Programming Paradigm Classification - Inference")
    print("=" * 70)
    
    # Initialize classifier
    clf = ProgrammingParadigmClassifier()
    
    # Example texts for inference
    test_texts = [
        "How do I make this function pure without any side effects?",
        "Why does my class hierarchy have so many levels of inheritance?",
        "What's the best way to center a div in CSS?",
        "This function just loops through the array and updates each element in place.",
        "I'm using lambda functions to transform this list with map and filter.",
        "How do I properly encapsulate private variables in my class?",
        "What's the most efficient way to iterate through this data structure?",
        "Can I use functional composition to chain these operations?"
    ]
    
    # Run inference on all examples
    for text in test_texts:
        prediction, probs, max_prob = clf.predict(text)
        clf.display_prediction(text, prediction, probs, max_prob)
    
    print("\n" + "=" * 70)
    print("Inference complete!")
    print("=" * 70)


def interactive_mode():
    """Run classifier in interactive mode."""
    print("=" * 70)
    print("Programming Paradigm Classifier - Interactive Mode")
    print("=" * 70)
    print("Type 'quit' to exit\n")
    
    # Initialize classifier
    clf = ProgrammingParadigmClassifier()
    
    while True:
        try:
            text = input("\nEnter text to classify (or 'quit' to exit): ").strip()
            
            if text.lower() == 'quit':
                print("Exiting...")
                break
            
            if not text:
                print("Please enter some text.")
                continue
            
            prediction, probs, max_prob = clf.predict(text)
            clf.display_prediction(text, prediction, probs, max_prob)
        
        except KeyboardInterrupt:
            print("\n\nExiting...")
            break
        except Exception as e:
            print(f"Error: {e}")


if __name__ == "__main__":
    if len(sys.argv) > 1 and sys.argv[1] == '--interactive':
        interactive_mode()
    else:
        main()