AryanPrakhar
/

paradigm-so

Model card Files Files and versions

xet

Community

AryanPrakhar commited on Feb 17

Commit

48c6574

verified ·

1 Parent(s): b27e979

Add inference.py

Browse files

Files changed (1) hide show

concept-classifier/inference.py +179 -0

concept-classifier/inference.py ADDED Viewed

	@@ -0,0 +1,179 @@

+"""
+Programming Paradigm Classification - Inference Script
+Uses trained SVM classifier and sentence embeddings for predictions
+"""
+import pickle
+import numpy as np
+from sentence_transformers import SentenceTransformer
+import sys
+class ProgrammingParadigmClassifier:
+    """Classifier for programming paradigm predictions."""
+    def __init__(self, classifier_path='svm_classifier.pkl',
+                 model_name_path='sentence_model_name.txt',
+                 confidence_threshold=0.55):
+        """Initialize classifier and embedding model."""
+        print("Loading trained SVM classifier...")
+        with open(classifier_path, 'rb') as f:
+            self.classifier = pickle.load(f)
+        # Load the model name that was used during training
+        print("Reading embedding model name from training...")
+        with open(model_name_path, 'r') as f:
+            model_name = f.read().strip()
+        print(f"Loading sentence embedding model: {model_name}...")
+        self.model = SentenceTransformer(model_name)
+        self.confidence_threshold = confidence_threshold
+        print(f"Models loaded! (Confidence threshold: {confidence_threshold})\n")
+    def predict(self, text):
+        """Predict programming paradigm for given text with uncertainty handling."""
+        # Generate embedding
+        embedding = self.model.encode([text])
+        # Get probabilities (handle both CalibratedClassifierCV and LinearSVC)
+        if hasattr(self.classifier, 'predict_proba'):
+            # CalibratedClassifierCV - has real probabilities
+            probs = self.classifier.predict_proba(embedding)[0]
+        else:
+            # LinearSVC - use decision_function and convert to probabilities
+            scores = self.classifier.decision_function(embedding)[0]
+            # Softmax to convert scores to probabilities
+            exp_scores = np.exp(scores - np.max(scores))
+            probs = exp_scores / exp_scores.sum()
+        prob_dict = dict(zip(self.classifier.classes_, probs))
+        # Get top two probabilities for margin calculation
+        sorted_indices = np.argsort(probs)[::-1]
+        sorted_probs = probs[sorted_indices]
+        max_prob = sorted_probs[0]
+        second_max = sorted_probs[1] if len(sorted_probs) > 1 else 0.0
+        margin = max_prob - second_max
+        # Get class names for top two
+        top_classes = self.classifier.classes_[sorted_indices]
+        top_class = top_classes[0]
+        second_class = top_classes[1] if len(top_classes) > 1 else None
+        if max_prob > 0.25 and second_max > 0.25 and margin < 0.08:
+            # Both classes are viable - return both
+            prediction = f"{top_class} or {second_class}"
+        elif max_prob < 0.30 or margin < 0.10:
+            prediction = "unclear"
+        else:
+            prediction = top_class
+        return prediction, prob_dict, max_prob
+    def predict_batch(self, texts):
+        """Predict programming paradigms for multiple texts."""
+        results = []
+        for text in texts:
+            prediction, probs, max_prob = self.predict(text)
+            results.append({
+                'text': text,
+                'prediction': prediction,
+                'probabilities': probs,
+                'confidence': max_prob
+            })
+        return results
+    def display_prediction(self, text, prediction, probs, max_prob):
+        """Display prediction results in formatted output."""
+        print(f"\nInput: {text[:100]}{'...' if len(text) > 100 else ''}")
+        # Format output for dual or single predictions
+        if " or " in str(prediction):
+            print(f"Predicted Paradigm: {prediction} (ambiguous - close call!)")
+        elif prediction == "unclear":
+            print(f"Predicted Paradigm: {prediction} (too uncertain)")
+        else:
+            print(f"Predicted Paradigm: {prediction} (confident)")
+        # Get top 2 classes for margin display
+        sorted_items = sorted(probs.items(), key=lambda x: x[1], reverse=True)
+        top_class, top_prob = sorted_items[0]
+        second_class, second_prob = sorted_items[1] if len(sorted_items) > 1 else (None, 0.0)
+        margin = top_prob - second_prob
+        print(f"Max: {top_class} ({top_prob:.3f}), 2nd: {second_class} ({second_prob:.3f}), Margin: {margin:.3f}")
+        print("Class Probabilities:")
+        for label, prob in sorted_items:
+            print(f"  {label:12s}: {prob:7.3f}")
+        print("-" * 70)
+def main():
+    """Main inference pipeline."""
+    print("=" * 70)
+    print("Programming Paradigm Classification - Inference")
+    print("=" * 70)
+    # Initialize classifier
+    clf = ProgrammingParadigmClassifier()
+    # Example texts for inference
+    test_texts = [
+        "How do I make this function pure without any side effects?",
+        "Why does my class hierarchy have so many levels of inheritance?",
+        "What's the best way to center a div in CSS?",
+        "This function just loops through the array and updates each element in place.",
+        "I'm using lambda functions to transform this list with map and filter.",
+        "How do I properly encapsulate private variables in my class?",
+        "What's the most efficient way to iterate through this data structure?",
+        "Can I use functional composition to chain these operations?"
+    ]
+    # Run inference on all examples
+    for text in test_texts:
+        prediction, probs, max_prob = clf.predict(text)
+        clf.display_prediction(text, prediction, probs, max_prob)
+    print("\n" + "=" * 70)
+    print("Inference complete!")
+    print("=" * 70)
+def interactive_mode():
+    """Run classifier in interactive mode."""
+    print("=" * 70)
+    print("Programming Paradigm Classifier - Interactive Mode")
+    print("=" * 70)
+    print("Type 'quit' to exit\n")
+    # Initialize classifier
+    clf = ProgrammingParadigmClassifier()
+    while True:
+        try:
+            text = input("\nEnter text to classify (or 'quit' to exit): ").strip()
+            if text.lower() == 'quit':
+                print("Exiting...")
+                break
+            if not text:
+                print("Please enter some text.")
+                continue
+            prediction, probs, max_prob = clf.predict(text)
+            clf.display_prediction(text, prediction, probs, max_prob)
+        except KeyboardInterrupt:
+            print("\n\nExiting...")
+            break
+        except Exception as e:
+            print(f"Error: {e}")
+if __name__ == "__main__":
+    if len(sys.argv) > 1 and sys.argv[1] == '--interactive':
+        interactive_mode()
+    else:
+        main()