| """
|
| Programming Paradigm Classification - Inference Script
|
| Uses trained SVM classifier and sentence embeddings for predictions
|
| """
|
|
|
| import pickle
|
| import numpy as np
|
| from sentence_transformers import SentenceTransformer
|
| import sys
|
|
|
|
|
| class ProgrammingParadigmClassifier:
|
| """Classifier for programming paradigm predictions."""
|
|
|
| def __init__(self, classifier_path='svm_classifier.pkl',
|
| model_name_path='sentence_model_name.txt',
|
| confidence_threshold=0.55):
|
| """Initialize classifier and embedding model."""
|
| print("Loading trained SVM classifier...")
|
| with open(classifier_path, 'rb') as f:
|
| self.classifier = pickle.load(f)
|
|
|
|
|
| print("Reading embedding model name from training...")
|
| with open(model_name_path, 'r') as f:
|
| model_name = f.read().strip()
|
|
|
| print(f"Loading sentence embedding model: {model_name}...")
|
| self.model = SentenceTransformer(model_name)
|
| self.confidence_threshold = confidence_threshold
|
| print(f"Models loaded! (Confidence threshold: {confidence_threshold})\n")
|
|
|
| def predict(self, text):
|
| """Predict programming paradigm for given text with uncertainty handling."""
|
|
|
| embedding = self.model.encode([text])
|
|
|
|
|
| if hasattr(self.classifier, 'predict_proba'):
|
|
|
| probs = self.classifier.predict_proba(embedding)[0]
|
| else:
|
|
|
| scores = self.classifier.decision_function(embedding)[0]
|
|
|
| exp_scores = np.exp(scores - np.max(scores))
|
| probs = exp_scores / exp_scores.sum()
|
|
|
| prob_dict = dict(zip(self.classifier.classes_, probs))
|
|
|
|
|
| sorted_indices = np.argsort(probs)[::-1]
|
| sorted_probs = probs[sorted_indices]
|
| max_prob = sorted_probs[0]
|
| second_max = sorted_probs[1] if len(sorted_probs) > 1 else 0.0
|
| margin = max_prob - second_max
|
|
|
|
|
| top_classes = self.classifier.classes_[sorted_indices]
|
| top_class = top_classes[0]
|
| second_class = top_classes[1] if len(top_classes) > 1 else None
|
|
|
|
|
| if max_prob > 0.25 and second_max > 0.25 and margin < 0.08:
|
|
|
| prediction = f"{top_class} or {second_class}"
|
| elif max_prob < 0.30 or margin < 0.10:
|
| prediction = "unclear"
|
| else:
|
| prediction = top_class
|
|
|
| return prediction, prob_dict, max_prob
|
|
|
| def predict_batch(self, texts):
|
| """Predict programming paradigms for multiple texts."""
|
| results = []
|
| for text in texts:
|
| prediction, probs, max_prob = self.predict(text)
|
| results.append({
|
| 'text': text,
|
| 'prediction': prediction,
|
| 'probabilities': probs,
|
| 'confidence': max_prob
|
| })
|
| return results
|
|
|
| def display_prediction(self, text, prediction, probs, max_prob):
|
| """Display prediction results in formatted output."""
|
| print(f"\nInput: {text[:100]}{'...' if len(text) > 100 else ''}")
|
|
|
|
|
| if " or " in str(prediction):
|
| print(f"Predicted Paradigm: {prediction} (ambiguous - close call!)")
|
| elif prediction == "unclear":
|
| print(f"Predicted Paradigm: {prediction} (too uncertain)")
|
| else:
|
| print(f"Predicted Paradigm: {prediction} (confident)")
|
|
|
|
|
| sorted_items = sorted(probs.items(), key=lambda x: x[1], reverse=True)
|
| top_class, top_prob = sorted_items[0]
|
| second_class, second_prob = sorted_items[1] if len(sorted_items) > 1 else (None, 0.0)
|
| margin = top_prob - second_prob
|
|
|
| print(f"Max: {top_class} ({top_prob:.3f}), 2nd: {second_class} ({second_prob:.3f}), Margin: {margin:.3f}")
|
| print("Class Probabilities:")
|
| for label, prob in sorted_items:
|
| print(f" {label:12s}: {prob:7.3f}")
|
| print("-" * 70)
|
|
|
|
|
| def main():
|
| """Main inference pipeline."""
|
| print("=" * 70)
|
| print("Programming Paradigm Classification - Inference")
|
| print("=" * 70)
|
|
|
|
|
| clf = ProgrammingParadigmClassifier()
|
|
|
|
|
| test_texts = [
|
| "How do I make this function pure without any side effects?",
|
| "Why does my class hierarchy have so many levels of inheritance?",
|
| "What's the best way to center a div in CSS?",
|
| "This function just loops through the array and updates each element in place.",
|
| "I'm using lambda functions to transform this list with map and filter.",
|
| "How do I properly encapsulate private variables in my class?",
|
| "What's the most efficient way to iterate through this data structure?",
|
| "Can I use functional composition to chain these operations?"
|
| ]
|
|
|
|
|
| for text in test_texts:
|
| prediction, probs, max_prob = clf.predict(text)
|
| clf.display_prediction(text, prediction, probs, max_prob)
|
|
|
| print("\n" + "=" * 70)
|
| print("Inference complete!")
|
| print("=" * 70)
|
|
|
|
|
| def interactive_mode():
|
| """Run classifier in interactive mode."""
|
| print("=" * 70)
|
| print("Programming Paradigm Classifier - Interactive Mode")
|
| print("=" * 70)
|
| print("Type 'quit' to exit\n")
|
|
|
|
|
| clf = ProgrammingParadigmClassifier()
|
|
|
| while True:
|
| try:
|
| text = input("\nEnter text to classify (or 'quit' to exit): ").strip()
|
|
|
| if text.lower() == 'quit':
|
| print("Exiting...")
|
| break
|
|
|
| if not text:
|
| print("Please enter some text.")
|
| continue
|
|
|
| prediction, probs, max_prob = clf.predict(text)
|
| clf.display_prediction(text, prediction, probs, max_prob)
|
|
|
| except KeyboardInterrupt:
|
| print("\n\nExiting...")
|
| break
|
| except Exception as e:
|
| print(f"Error: {e}")
|
|
|
|
|
| if __name__ == "__main__":
|
| if len(sys.argv) > 1 and sys.argv[1] == '--interactive':
|
| interactive_mode()
|
| else:
|
| main()
|
|
|