File size: 5,900 Bytes
2ee5f3c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 |
#!/usr/bin/env python3
"""
Generate 8 synthetic examples for README with model predictions
2 examples per class: simple + nuanced
"""
import torch
from transformers import DebertaV2Tokenizer, DebertaV2ForSequenceClassification
def get_examples():
"""Define 8 synthetic examples: 2 per class (simple + nuanced)"""
return [
# NONE - Simple
{
"text": "Margaret runs the village bakery, making fresh bread every morning at 5 AM for the past thirty years.",
"expected": "NONE",
"type": "simple"
},
# NONE - Nuanced
{
"text": "Dr. Harrison performs routine medical check-ups with methodical precision, maintaining professional distance while patients share their deepest fears about mortality.",
"expected": "NONE",
"type": "nuanced"
},
# INTERNAL - Simple
{
"text": "Emma struggles with overwhelming anxiety after her father's harsh criticism, questioning her self-worth and abilities.",
"expected": "INTERNAL",
"type": "simple"
},
# INTERNAL - Nuanced
{
"text": "The renowned pianist Clara finds herself paralyzed by perfectionism, her childhood trauma surfacing as she prepares for the performance that could define her legacy.",
"expected": "INTERNAL",
"type": "nuanced"
},
# EXTERNAL - Simple
{
"text": "Knight Roderick embarks on a dangerous quest to retrieve the stolen crown from the dragon's lair.",
"expected": "EXTERNAL",
"type": "simple"
},
# EXTERNAL - Nuanced
{
"text": "Master thief Elias infiltrates the heavily guarded fortress, disabling security systems and evading patrol routes, each obstacle requiring new techniques and tools to reach the vault.",
"expected": "EXTERNAL",
"type": "nuanced"
},
# BOTH - Simple
{
"text": "Sarah must rescue her kidnapped daughter from the terrorist compound while confronting her own paralyzing guilt about being an absent mother.",
"expected": "BOTH",
"type": "simple"
},
# BOTH - Nuanced
{
"text": "Archaeologist Sophia discovers an ancient artifact that could rewrite history, but must confront her own ethical boundaries and childhood abandonment issues as powerful forces try to silence her.",
"expected": "BOTH",
"type": "nuanced"
}
]
def predict_examples():
"""Run predictions on all examples"""
print("Loading model...")
tokenizer = DebertaV2Tokenizer.from_pretrained('.')
model = DebertaV2ForSequenceClassification.from_pretrained('.')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()
class_names = ['NONE', 'INTERNAL', 'EXTERNAL', 'BOTH']
examples = get_examples()
results = []
print(f"Running predictions on {len(examples)} examples...\n")
for i, example in enumerate(examples, 1):
text = example['text']
expected = example['expected']
example_type = example['type']
# Predict
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
inputs = {k: v.to(device) for k, v in inputs.items()}
with torch.no_grad():
outputs = model(**inputs)
probabilities = torch.softmax(outputs.logits, dim=-1)
predicted_idx = torch.argmax(probabilities, dim=-1).item()
confidence = probabilities[0][predicted_idx].item()
predicted = class_names[predicted_idx]
is_correct = predicted == expected
result = {
'text': text,
'expected': expected,
'predicted': predicted,
'confidence': confidence,
'correct': is_correct,
'type': example_type
}
results.append(result)
status = "✅" if is_correct else "❌"
print(f"{status} Example {i} ({expected} - {example_type})")
print(f" Predicted: {predicted} (confidence: {confidence:.3f})")
print(f" Text: {text[:80]}...")
print()
return results
def format_for_readme(results):
"""Format results for README inclusion"""
# Group by class
by_class = {}
for result in results:
expected = result['expected']
if expected not in by_class:
by_class[expected] = []
by_class[expected].append(result)
readme_content = """
## Example Classifications
Here are sample classifications showing the model's predictions with confidence scores:
"""
for class_name in ['NONE', 'INTERNAL', 'EXTERNAL', 'BOTH']:
if class_name in by_class:
readme_content += f"### {class_name}\n\n"
for result in by_class[class_name]:
confidence_icon = "✅" if result['confidence'] > 0.7 else "⚠️" if result['confidence'] > 0.5 else "❌"
readme_content += f"**{result['type'].title()} Example:**\n"
readme_content += f"> *\"{result['text']}\"*\n\n"
readme_content += f"**Prediction:** {result['predicted']} {confidence_icon} (confidence: {result['confidence']:.3f})\n\n"
return readme_content
if __name__ == "__main__":
results = predict_examples()
readme_section = format_for_readme(results)
print("README Section:")
print("=" * 50)
print(readme_section)
# Save to file
with open('readme_examples_section.txt', 'w') as f:
f.write(readme_section)
print("Saved README section to 'readme_examples_section.txt'") |