File size: 5,900 Bytes
2ee5f3c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
#!/usr/bin/env python3
"""
Generate 8 synthetic examples for README with model predictions
2 examples per class: simple + nuanced
"""

import torch
from transformers import DebertaV2Tokenizer, DebertaV2ForSequenceClassification

def get_examples():
    """Define 8 synthetic examples: 2 per class (simple + nuanced)"""
    return [
        # NONE - Simple
        {
            "text": "Margaret runs the village bakery, making fresh bread every morning at 5 AM for the past thirty years.",
            "expected": "NONE",
            "type": "simple"
        },
        # NONE - Nuanced  
        {
            "text": "Dr. Harrison performs routine medical check-ups with methodical precision, maintaining professional distance while patients share their deepest fears about mortality.",
            "expected": "NONE", 
            "type": "nuanced"
        },
        # INTERNAL - Simple
        {
            "text": "Emma struggles with overwhelming anxiety after her father's harsh criticism, questioning her self-worth and abilities.",
            "expected": "INTERNAL",
            "type": "simple"
        },
        # INTERNAL - Nuanced
        {
            "text": "The renowned pianist Clara finds herself paralyzed by perfectionism, her childhood trauma surfacing as she prepares for the performance that could define her legacy.",
            "expected": "INTERNAL",
            "type": "nuanced"
        },
        # EXTERNAL - Simple
        {
            "text": "Knight Roderick embarks on a dangerous quest to retrieve the stolen crown from the dragon's lair.",
            "expected": "EXTERNAL", 
            "type": "simple"
        },
        # EXTERNAL - Nuanced
        {
            "text": "Master thief Elias infiltrates the heavily guarded fortress, disabling security systems and evading patrol routes, each obstacle requiring new techniques and tools to reach the vault.",
            "expected": "EXTERNAL",
            "type": "nuanced"
        },
        # BOTH - Simple
        {
            "text": "Sarah must rescue her kidnapped daughter from the terrorist compound while confronting her own paralyzing guilt about being an absent mother.",
            "expected": "BOTH",
            "type": "simple"
        },
        # BOTH - Nuanced
        {
            "text": "Archaeologist Sophia discovers an ancient artifact that could rewrite history, but must confront her own ethical boundaries and childhood abandonment issues as powerful forces try to silence her.",
            "expected": "BOTH",
            "type": "nuanced"
        }
    ]

def predict_examples():
    """Run predictions on all examples"""
    print("Loading model...")
    tokenizer = DebertaV2Tokenizer.from_pretrained('.')
    model = DebertaV2ForSequenceClassification.from_pretrained('.')
    
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.eval()
    
    class_names = ['NONE', 'INTERNAL', 'EXTERNAL', 'BOTH']
    examples = get_examples()
    
    results = []
    
    print(f"Running predictions on {len(examples)} examples...\n")
    
    for i, example in enumerate(examples, 1):
        text = example['text']
        expected = example['expected']
        example_type = example['type']
        
        # Predict
        inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
        inputs = {k: v.to(device) for k, v in inputs.items()}
        
        with torch.no_grad():
            outputs = model(**inputs)
            probabilities = torch.softmax(outputs.logits, dim=-1)
            predicted_idx = torch.argmax(probabilities, dim=-1).item()
            confidence = probabilities[0][predicted_idx].item()
        
        predicted = class_names[predicted_idx]
        is_correct = predicted == expected
        
        result = {
            'text': text,
            'expected': expected,
            'predicted': predicted,
            'confidence': confidence,
            'correct': is_correct,
            'type': example_type
        }
        
        results.append(result)
        
        status = "✅" if is_correct else "❌"
        print(f"{status} Example {i} ({expected} - {example_type})")
        print(f"  Predicted: {predicted} (confidence: {confidence:.3f})")
        print(f"  Text: {text[:80]}...")
        print()
    
    return results

def format_for_readme(results):
    """Format results for README inclusion"""
    
    # Group by class
    by_class = {}
    for result in results:
        expected = result['expected']
        if expected not in by_class:
            by_class[expected] = []
        by_class[expected].append(result)
    
    readme_content = """
## Example Classifications

Here are sample classifications showing the model's predictions with confidence scores:

"""
    
    for class_name in ['NONE', 'INTERNAL', 'EXTERNAL', 'BOTH']:
        if class_name in by_class:
            readme_content += f"### {class_name}\n\n"
            
            for result in by_class[class_name]:
                confidence_icon = "✅" if result['confidence'] > 0.7 else "⚠️" if result['confidence'] > 0.5 else "❌"
                
                readme_content += f"**{result['type'].title()} Example:**\n"
                readme_content += f"> *\"{result['text']}\"*\n\n"
                readme_content += f"**Prediction:** {result['predicted']} {confidence_icon} (confidence: {result['confidence']:.3f})\n\n"
    
    return readme_content

if __name__ == "__main__":
    results = predict_examples()
    readme_section = format_for_readme(results)
    
    print("README Section:")
    print("=" * 50)
    print(readme_section)
    
    # Save to file
    with open('readme_examples_section.txt', 'w') as f:
        f.write(readme_section)
    
    print("Saved README section to 'readme_examples_section.txt'")