deberta-v3-s-plot-arc-classifier / generate_readme_examples.py
Mitchins's picture
Upload folder using huggingface_hub
2ee5f3c verified
#!/usr/bin/env python3
"""
Generate 8 synthetic examples for README with model predictions
2 examples per class: simple + nuanced
"""
import torch
from transformers import DebertaV2Tokenizer, DebertaV2ForSequenceClassification
def get_examples():
"""Define 8 synthetic examples: 2 per class (simple + nuanced)"""
return [
# NONE - Simple
{
"text": "Margaret runs the village bakery, making fresh bread every morning at 5 AM for the past thirty years.",
"expected": "NONE",
"type": "simple"
},
# NONE - Nuanced
{
"text": "Dr. Harrison performs routine medical check-ups with methodical precision, maintaining professional distance while patients share their deepest fears about mortality.",
"expected": "NONE",
"type": "nuanced"
},
# INTERNAL - Simple
{
"text": "Emma struggles with overwhelming anxiety after her father's harsh criticism, questioning her self-worth and abilities.",
"expected": "INTERNAL",
"type": "simple"
},
# INTERNAL - Nuanced
{
"text": "The renowned pianist Clara finds herself paralyzed by perfectionism, her childhood trauma surfacing as she prepares for the performance that could define her legacy.",
"expected": "INTERNAL",
"type": "nuanced"
},
# EXTERNAL - Simple
{
"text": "Knight Roderick embarks on a dangerous quest to retrieve the stolen crown from the dragon's lair.",
"expected": "EXTERNAL",
"type": "simple"
},
# EXTERNAL - Nuanced
{
"text": "Master thief Elias infiltrates the heavily guarded fortress, disabling security systems and evading patrol routes, each obstacle requiring new techniques and tools to reach the vault.",
"expected": "EXTERNAL",
"type": "nuanced"
},
# BOTH - Simple
{
"text": "Sarah must rescue her kidnapped daughter from the terrorist compound while confronting her own paralyzing guilt about being an absent mother.",
"expected": "BOTH",
"type": "simple"
},
# BOTH - Nuanced
{
"text": "Archaeologist Sophia discovers an ancient artifact that could rewrite history, but must confront her own ethical boundaries and childhood abandonment issues as powerful forces try to silence her.",
"expected": "BOTH",
"type": "nuanced"
}
]
def predict_examples():
"""Run predictions on all examples"""
print("Loading model...")
tokenizer = DebertaV2Tokenizer.from_pretrained('.')
model = DebertaV2ForSequenceClassification.from_pretrained('.')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()
class_names = ['NONE', 'INTERNAL', 'EXTERNAL', 'BOTH']
examples = get_examples()
results = []
print(f"Running predictions on {len(examples)} examples...\n")
for i, example in enumerate(examples, 1):
text = example['text']
expected = example['expected']
example_type = example['type']
# Predict
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
inputs = {k: v.to(device) for k, v in inputs.items()}
with torch.no_grad():
outputs = model(**inputs)
probabilities = torch.softmax(outputs.logits, dim=-1)
predicted_idx = torch.argmax(probabilities, dim=-1).item()
confidence = probabilities[0][predicted_idx].item()
predicted = class_names[predicted_idx]
is_correct = predicted == expected
result = {
'text': text,
'expected': expected,
'predicted': predicted,
'confidence': confidence,
'correct': is_correct,
'type': example_type
}
results.append(result)
status = "✅" if is_correct else "❌"
print(f"{status} Example {i} ({expected} - {example_type})")
print(f" Predicted: {predicted} (confidence: {confidence:.3f})")
print(f" Text: {text[:80]}...")
print()
return results
def format_for_readme(results):
"""Format results for README inclusion"""
# Group by class
by_class = {}
for result in results:
expected = result['expected']
if expected not in by_class:
by_class[expected] = []
by_class[expected].append(result)
readme_content = """
## Example Classifications
Here are sample classifications showing the model's predictions with confidence scores:
"""
for class_name in ['NONE', 'INTERNAL', 'EXTERNAL', 'BOTH']:
if class_name in by_class:
readme_content += f"### {class_name}\n\n"
for result in by_class[class_name]:
confidence_icon = "✅" if result['confidence'] > 0.7 else "⚠️" if result['confidence'] > 0.5 else "❌"
readme_content += f"**{result['type'].title()} Example:**\n"
readme_content += f"> *\"{result['text']}\"*\n\n"
readme_content += f"**Prediction:** {result['predicted']} {confidence_icon} (confidence: {result['confidence']:.3f})\n\n"
return readme_content
if __name__ == "__main__":
results = predict_examples()
readme_section = format_for_readme(results)
print("README Section:")
print("=" * 50)
print(readme_section)
# Save to file
with open('readme_examples_section.txt', 'w') as f:
f.write(readme_section)
print("Saved README section to 'readme_examples_section.txt'")