File size: 5,900 Bytes

2ee5f3c

#!/usr/bin/env python3
"""
Generate 8 synthetic examples for README with model predictions
2 examples per class: simple + nuanced
"""

import torch
from transformers import DebertaV2Tokenizer, DebertaV2ForSequenceClassification

def get_examples():
    """Define 8 synthetic examples: 2 per class (simple + nuanced)"""
    return [
        # NONE - Simple
        {
            "text": "Margaret runs the village bakery, making fresh bread every morning at 5 AM for the past thirty years.",
            "expected": "NONE",
            "type": "simple"
        },
        # NONE - Nuanced  
        {
            "text": "Dr. Harrison performs routine medical check-ups with methodical precision, maintaining professional distance while patients share their deepest fears about mortality.",
            "expected": "NONE", 
            "type": "nuanced"
        },
        # INTERNAL - Simple
        {
            "text": "Emma struggles with overwhelming anxiety after her father's harsh criticism, questioning her self-worth and abilities.",
            "expected": "INTERNAL",
            "type": "simple"
        },
        # INTERNAL - Nuanced
        {
            "text": "The renowned pianist Clara finds herself paralyzed by perfectionism, her childhood trauma surfacing as she prepares for the performance that could define her legacy.",
            "expected": "INTERNAL",
            "type": "nuanced"
        },
        # EXTERNAL - Simple
        {
            "text": "Knight Roderick embarks on a dangerous quest to retrieve the stolen crown from the dragon's lair.",
            "expected": "EXTERNAL", 
            "type": "simple"
        },
        # EXTERNAL - Nuanced
        {
            "text": "Master thief Elias infiltrates the heavily guarded fortress, disabling security systems and evading patrol routes, each obstacle requiring new techniques and tools to reach the vault.",
            "expected": "EXTERNAL",
            "type": "nuanced"
        },
        # BOTH - Simple
        {
            "text": "Sarah must rescue her kidnapped daughter from the terrorist compound while confronting her own paralyzing guilt about being an absent mother.",
            "expected": "BOTH",
            "type": "simple"
        },
        # BOTH - Nuanced
        {
            "text": "Archaeologist Sophia discovers an ancient artifact that could rewrite history, but must confront her own ethical boundaries and childhood abandonment issues as powerful forces try to silence her.",
            "expected": "BOTH",
            "type": "nuanced"
        }
    ]

def predict_examples():
    """Run predictions on all examples"""
    print("Loading model...")
    tokenizer = DebertaV2Tokenizer.from_pretrained('.')
    model = DebertaV2ForSequenceClassification.from_pretrained('.')
    
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.eval()
    
    class_names = ['NONE', 'INTERNAL', 'EXTERNAL', 'BOTH']
    examples = get_examples()
    
    results = []
    
    print(f"Running predictions on {len(examples)} examples...\n")
    
    for i, example in enumerate(examples, 1):
        text = example['text']
        expected = example['expected']
        example_type = example['type']
        
        # Predict
        inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
        inputs = {k: v.to(device) for k, v in inputs.items()}
        
        with torch.no_grad():
            outputs = model(**inputs)
            probabilities = torch.softmax(outputs.logits, dim=-1)
            predicted_idx = torch.argmax(probabilities, dim=-1).item()
            confidence = probabilities[0][predicted_idx].item()
        
        predicted = class_names[predicted_idx]
        is_correct = predicted == expected
        
        result = {
            'text': text,
            'expected': expected,
            'predicted': predicted,
            'confidence': confidence,
            'correct': is_correct,
            'type': example_type
        }
        
        results.append(result)
        
        status = "✅" if is_correct else "❌"
        print(f"{status} Example {i} ({expected} - {example_type})")
        print(f"  Predicted: {predicted} (confidence: {confidence:.3f})")
        print(f"  Text: {text[:80]}...")
        print()
    
    return results

def format_for_readme(results):
    """Format results for README inclusion"""
    
    # Group by class
    by_class = {}
    for result in results:
        expected = result['expected']
        if expected not in by_class:
            by_class[expected] = []
        by_class[expected].append(result)
    
    readme_content = """
## Example Classifications

Here are sample classifications showing the model's predictions with confidence scores:

"""
    
    for class_name in ['NONE', 'INTERNAL', 'EXTERNAL', 'BOTH']:
        if class_name in by_class:
            readme_content += f"### {class_name}\n\n"
            
            for result in by_class[class_name]:
                confidence_icon = "✅" if result['confidence'] > 0.7 else "⚠️" if result['confidence'] > 0.5 else "❌"
                
                readme_content += f"**{result['type'].title()} Example:**\n"
                readme_content += f"> *\"{result['text']}\"*\n\n"
                readme_content += f"**Prediction:** {result['predicted']} {confidence_icon} (confidence: {result['confidence']:.3f})\n\n"
    
    return readme_content

if __name__ == "__main__":
    results = predict_examples()
    readme_section = format_for_readme(results)
    
    print("README Section:")
    print("=" * 50)
    print(readme_section)
    
    # Save to file
    with open('readme_examples_section.txt', 'w') as f:
        f.write(readme_section)
    
    print("Saved README section to 'readme_examples_section.txt'")