""" Programming Paradigm Classification - Inference Script Uses trained SVM classifier and sentence embeddings for predictions """ import pickle import numpy as np from sentence_transformers import SentenceTransformer import sys class ProgrammingParadigmClassifier: """Classifier for programming paradigm predictions.""" def __init__(self, classifier_path='svm_classifier.pkl', model_name_path='sentence_model_name.txt', confidence_threshold=0.55): """Initialize classifier and embedding model.""" print("Loading trained SVM classifier...") with open(classifier_path, 'rb') as f: self.classifier = pickle.load(f) # Load the model name that was used during training print("Reading embedding model name from training...") with open(model_name_path, 'r') as f: model_name = f.read().strip() print(f"Loading sentence embedding model: {model_name}...") self.model = SentenceTransformer(model_name) self.confidence_threshold = confidence_threshold print(f"Models loaded! (Confidence threshold: {confidence_threshold})\n") def predict(self, text): """Predict programming paradigm for given text with uncertainty handling.""" # Generate embedding embedding = self.model.encode([text]) # Get probabilities (handle both CalibratedClassifierCV and LinearSVC) if hasattr(self.classifier, 'predict_proba'): # CalibratedClassifierCV - has real probabilities probs = self.classifier.predict_proba(embedding)[0] else: # LinearSVC - use decision_function and convert to probabilities scores = self.classifier.decision_function(embedding)[0] # Softmax to convert scores to probabilities exp_scores = np.exp(scores - np.max(scores)) probs = exp_scores / exp_scores.sum() prob_dict = dict(zip(self.classifier.classes_, probs)) # Get top two probabilities for margin calculation sorted_indices = np.argsort(probs)[::-1] sorted_probs = probs[sorted_indices] max_prob = sorted_probs[0] second_max = sorted_probs[1] if len(sorted_probs) > 1 else 0.0 margin = max_prob - second_max # Get class names for top two top_classes = self.classifier.classes_[sorted_indices] top_class = top_classes[0] second_class = top_classes[1] if len(top_classes) > 1 else None if max_prob > 0.25 and second_max > 0.25 and margin < 0.08: # Both classes are viable - return both prediction = f"{top_class} or {second_class}" elif max_prob < 0.30 or margin < 0.10: prediction = "unclear" else: prediction = top_class return prediction, prob_dict, max_prob def predict_batch(self, texts): """Predict programming paradigms for multiple texts.""" results = [] for text in texts: prediction, probs, max_prob = self.predict(text) results.append({ 'text': text, 'prediction': prediction, 'probabilities': probs, 'confidence': max_prob }) return results def display_prediction(self, text, prediction, probs, max_prob): """Display prediction results in formatted output.""" print(f"\nInput: {text[:100]}{'...' if len(text) > 100 else ''}") # Format output for dual or single predictions if " or " in str(prediction): print(f"Predicted Paradigm: {prediction} (ambiguous - close call!)") elif prediction == "unclear": print(f"Predicted Paradigm: {prediction} (too uncertain)") else: print(f"Predicted Paradigm: {prediction} (confident)") # Get top 2 classes for margin display sorted_items = sorted(probs.items(), key=lambda x: x[1], reverse=True) top_class, top_prob = sorted_items[0] second_class, second_prob = sorted_items[1] if len(sorted_items) > 1 else (None, 0.0) margin = top_prob - second_prob print(f"Max: {top_class} ({top_prob:.3f}), 2nd: {second_class} ({second_prob:.3f}), Margin: {margin:.3f}") print("Class Probabilities:") for label, prob in sorted_items: print(f" {label:12s}: {prob:7.3f}") print("-" * 70) def main(): """Main inference pipeline.""" print("=" * 70) print("Programming Paradigm Classification - Inference") print("=" * 70) # Initialize classifier clf = ProgrammingParadigmClassifier() # Example texts for inference test_texts = [ "How do I make this function pure without any side effects?", "Why does my class hierarchy have so many levels of inheritance?", "What's the best way to center a div in CSS?", "This function just loops through the array and updates each element in place.", "I'm using lambda functions to transform this list with map and filter.", "How do I properly encapsulate private variables in my class?", "What's the most efficient way to iterate through this data structure?", "Can I use functional composition to chain these operations?" ] # Run inference on all examples for text in test_texts: prediction, probs, max_prob = clf.predict(text) clf.display_prediction(text, prediction, probs, max_prob) print("\n" + "=" * 70) print("Inference complete!") print("=" * 70) def interactive_mode(): """Run classifier in interactive mode.""" print("=" * 70) print("Programming Paradigm Classifier - Interactive Mode") print("=" * 70) print("Type 'quit' to exit\n") # Initialize classifier clf = ProgrammingParadigmClassifier() while True: try: text = input("\nEnter text to classify (or 'quit' to exit): ").strip() if text.lower() == 'quit': print("Exiting...") break if not text: print("Please enter some text.") continue prediction, probs, max_prob = clf.predict(text) clf.display_prediction(text, prediction, probs, max_prob) except KeyboardInterrupt: print("\n\nExiting...") break except Exception as e: print(f"Error: {e}") if __name__ == "__main__": if len(sys.argv) > 1 and sys.argv[1] == '--interactive': interactive_mode() else: main()