| { | |
| "model_name": "Khmer OCR Recognition Model", | |
| "description": "CRNN-based OCR model specifically trained for Khmer text recognition", | |
| "framework": "PaddleOCR", | |
| "architecture": { | |
| "algorithm": "CRNN", | |
| "backbone": "ResNet34", | |
| "neck": "SequenceEncoder (RNN)", | |
| "head": "CTCHead", | |
| "loss": "CTCLoss" | |
| }, | |
| "performance": { | |
| "accuracy": 98.45, | |
| "normalized_edit_distance": 99.90, | |
| "inference_speed_fps": 326, | |
| "best_epoch": 29, | |
| "total_epochs": 30 | |
| }, | |
| "training_data": { | |
| "training_images": 13253, | |
| "validation_images": 4315, | |
| "total_images": 17568, | |
| "text_length_range": "3-5 words", | |
| "image_size": "600x80 pixels (training), 320x32 (inference)", | |
| "font": "KhmerOS", | |
| "augmentation": ["clean", "blurred", "noisy", "noise_blur"] | |
| }, | |
| "model_specifications": { | |
| "input_shape": [3, 32, 320], | |
| "max_text_length": 25, | |
| "character_count": 188, | |
| "supported_languages": ["Khmer", "Latin"], | |
| "model_size_mb": 106 | |
| }, | |
| "character_set": { | |
| "khmer_consonants": "แ แ แ แ แ แ แ แ แ แ แ แ แ แ แ แ แ แ แ แ แ แ แ แ แ แ แ แ แ แ แ แก แข", | |
| "khmer_vowels": "แถ แท แธ แน แบ แป แผ แฝ แพ แฟ แ แ แ แ แ แ แ แ แ", | |
| "khmer_numerals": "แ แก แข แฃ แค แฅ แฆ แง แจ แฉ", | |
| "latin_characters": "A-Z, a-z, 0-9", | |
| "punctuation": ". , ! ? - ( ) [ ] ยซ ยป โข ยฎ etc.", | |
| "khmer_symbols": "แ แ แ แ แ แ แ แ แ แ แ แ" | |
| }, | |
| "training_config": { | |
| "optimizer": "Adam", | |
| "learning_rate": "Cosine scheduling (initial: 0.001)", | |
| "batch_size": 32, | |
| "regularization": "L2 (4e-05)", | |
| "image_augmentation": true, | |
| "data_variants": 4 | |
| }, | |
| "usage_recommendations": { | |
| "optimal_text_length": "3-5 words", | |
| "image_quality": "High contrast, clear text", | |
| "use_cases": ["Road signs", "Document snippets", "Menu items", "Form fields"], | |
| "preprocessing": "Consider text detection for full documents" | |
| }, | |
| "files": { | |
| "inference.pdiparams": "Main model weights (106MB)", | |
| "inference.yml": "Model configuration", | |
| "inference.json": "Model metadata", | |
| "khmer_char_dict.txt": "Character dictionary (188 characters)", | |
| "training_config.yml": "Original training configuration" | |
| }, | |
| "requirements": [ | |
| "paddlepaddle>=2.4.0", | |
| "opencv-python>=4.5.0", | |
| "numpy>=1.19.0", | |
| "pillow>=8.0.0" | |
| ], | |
| "limitations": [ | |
| "Optimized for short text segments (3-5 words)", | |
| "Best performance on clean, printed text", | |
| "May need segmentation for longer text", | |
| "Trained primarily on synthetic data" | |
| ], | |
| "license": "Specify your license", | |
| "created_date": "2025-09-25", | |
| "version": "1.0", | |
| "contact": { | |
| "author": "Your Name", | |
| "email": "your.email@example.com", | |
| "repository": "https://huggingface.co/your-username/khmer-ocr" | |
| } | |
| } | |