| |
| """ |
| Model verification script for DeBERTa v3 Small Explicit Classifier v2.0 |
| """ |
|
|
| import json |
| import torch |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification |
| from pathlib import Path |
|
|
| def verify_model_integrity(): |
| """Verify all model files and configurations""" |
| print("π Verifying DeBERTa v3 Small Explicit Classifier v2.0") |
| print("=" * 60) |
| |
| model_path = Path(".") |
| |
| |
| required_files = [ |
| "model.safetensors", |
| "config.json", |
| "tokenizer.json", |
| "spm.model", |
| "label_mapping.json", |
| "README.md" |
| ] |
| |
| print("π Checking required files...") |
| missing_files = [] |
| for file_name in required_files: |
| if (model_path / file_name).exists(): |
| print(f" β
{file_name}") |
| else: |
| print(f" β {file_name} - MISSING") |
| missing_files.append(file_name) |
| |
| if missing_files: |
| print(f"\nβ οΈ Missing files: {missing_files}") |
| return False |
| |
| |
| print("\nπ€ Loading model...") |
| try: |
| model = AutoModelForSequenceClassification.from_pretrained(".") |
| tokenizer = AutoTokenizer.from_pretrained(".") |
| print(" β
Model loaded successfully") |
| except Exception as e: |
| print(f" β Model loading failed: {e}") |
| return False |
| |
| |
| print("\nβοΈ Verifying configuration...") |
| config = model.config |
| |
| expected_labels = { |
| 0: "EXPLICIT-DISCLAIMER", |
| 1: "EXPLICIT-OFFENSIVE", |
| 2: "EXPLICIT-SEXUAL", |
| 3: "EXPLICIT-VIOLENT", |
| 4: "NON-EXPLICIT", |
| 5: "SEXUAL-REFERENCE", |
| 6: "SUGGESTIVE" |
| } |
| |
| |
| config_labels = {int(k): v for k, v in config.id2label.items()} |
| if config_labels == expected_labels: |
| print(" β
Label mappings correct") |
| else: |
| print(" β Label mappings incorrect") |
| print(f" Expected: {expected_labels}") |
| print(f" Got: {config_labels}") |
| return False |
| |
| |
| total_params = sum(p.numel() for p in model.parameters()) |
| expected_params = 141_900_000 |
| |
| if abs(total_params - expected_params) < 1_000_000: |
| print(f" β
Parameter count: {total_params:,} (~{total_params/1_000_000:.1f}M)") |
| else: |
| print(f" β οΈ Unexpected parameter count: {total_params:,}") |
| |
| |
| print("\nπ§ͺ Testing inference...") |
| try: |
| test_text = "This is a test sentence for classification." |
| inputs = tokenizer(test_text, return_tensors="pt", truncation=True, max_length=512) |
| |
| with torch.no_grad(): |
| outputs = model(**inputs) |
| logits = outputs.logits |
| probabilities = torch.softmax(logits, dim=-1) |
| |
| |
| if probabilities.shape == (1, 7): |
| print(" β
Inference successful") |
| |
| |
| predicted_class = torch.argmax(probabilities, dim=-1).item() |
| confidence = probabilities[0][predicted_class].item() |
| predicted_label = config.id2label[predicted_class] |
| |
| print(f" Test prediction: {predicted_label} ({confidence:.3f})") |
| else: |
| print(f" β Unexpected output shape: {probabilities.shape}") |
| return False |
| |
| except Exception as e: |
| print(f" β Inference failed: {e}") |
| return False |
| |
| |
| print("\nπ Checking evaluation files...") |
| eval_files = [ |
| "improved_classification_report.txt", |
| "recommended_thresholds.json", |
| "confusion_matrix.png", |
| "pr_curves.png", |
| "roc_curves.png", |
| "calibration.png" |
| ] |
| |
| for file_name in eval_files: |
| if (model_path / file_name).exists(): |
| print(f" β
{file_name}") |
| else: |
| print(f" βͺ {file_name} - Optional") |
| |
| |
| try: |
| with open("recommended_thresholds.json", "r") as f: |
| thresholds = json.load(f) |
| |
| if len(thresholds) == 7: |
| print(" β
Thresholds file valid") |
| else: |
| print(f" β οΈ Unexpected threshold count: {len(thresholds)}") |
| except Exception as e: |
| print(f" β οΈ Could not verify thresholds: {e}") |
| |
| print("\nπ Model verification complete!") |
| print("β
All core components verified and working correctly") |
| print("\nπ¦ Ready for deployment!") |
| |
| return True |
|
|
| def show_model_info(): |
| """Display model information summary""" |
| print("\nπ Model Information Summary") |
| print("-" * 40) |
| |
| try: |
| model = AutoModelForSequenceClassification.from_pretrained(".") |
| config = model.config |
| |
| print(f"Model Type: {config.model_type}") |
| print(f"Architecture: {config.architectures[0]}") |
| print(f"Parameters: {sum(p.numel() for p in model.parameters()):,}") |
| print(f"Layers: {config.num_hidden_layers}") |
| print(f"Hidden Size: {config.hidden_size}") |
| print(f"Attention Heads: {config.num_attention_heads}") |
| print(f"Max Length: {config.max_position_embeddings}") |
| print(f"Vocabulary Size: {config.vocab_size:,}") |
| print(f"Classes: {len(config.id2label)}") |
| |
| print(f"\nClass Labels:") |
| for id_str, label in config.id2label.items(): |
| print(f" {id_str}: {label}") |
| |
| except Exception as e: |
| print(f"Error loading model info: {e}") |
|
|
| if __name__ == "__main__": |
| success = verify_model_integrity() |
| |
| if success: |
| show_model_info() |
| else: |
| print("\nβ Verification failed - please check the issues above") |
| exit(1) |