{ "model_name": "Khmer OCR Recognition Model", "description": "CRNN-based OCR model specifically trained for Khmer text recognition", "framework": "PaddleOCR", "architecture": { "algorithm": "CRNN", "backbone": "ResNet34", "neck": "SequenceEncoder (RNN)", "head": "CTCHead", "loss": "CTCLoss" }, "performance": { "accuracy": 98.45, "normalized_edit_distance": 99.90, "inference_speed_fps": 326, "best_epoch": 29, "total_epochs": 30 }, "training_data": { "training_images": 13253, "validation_images": 4315, "total_images": 17568, "text_length_range": "3-5 words", "image_size": "600x80 pixels (training), 320x32 (inference)", "font": "KhmerOS", "augmentation": ["clean", "blurred", "noisy", "noise_blur"] }, "model_specifications": { "input_shape": [3, 32, 320], "max_text_length": 25, "character_count": 188, "supported_languages": ["Khmer", "Latin"], "model_size_mb": 106 }, "character_set": { "khmer_consonants": "ក ខ គ ឃ ង ច ឆ ជ ឈ ញ ដ ឋ ឌ ឍ ណ ត ថ ទ ធ ន ប ផ ព ភ ម យ រ ល វ ស ហ ឡ អ", "khmer_vowels": "ា ិ ី ឹ ឺ ុ ូ ួ ើ ឿ ៀ េ ែ ៃ ោ ៅ ំ ះ ៈ", "khmer_numerals": "០ ១ ២ ៣ ៤ ៥ ៦ ៧ ៨ ៩", "latin_characters": "A-Z, a-z, 0-9", "punctuation": ". , ! ? - ( ) [ ] « » ™ ® etc.", "khmer_symbols": "។ ៕ ៖ ៗ ៉ ៊ ់ ៌ ៍ ៏ ័ ្" }, "training_config": { "optimizer": "Adam", "learning_rate": "Cosine scheduling (initial: 0.001)", "batch_size": 32, "regularization": "L2 (4e-05)", "image_augmentation": true, "data_variants": 4 }, "usage_recommendations": { "optimal_text_length": "3-5 words", "image_quality": "High contrast, clear text", "use_cases": ["Road signs", "Document snippets", "Menu items", "Form fields"], "preprocessing": "Consider text detection for full documents" }, "files": { "inference.pdiparams": "Main model weights (106MB)", "inference.yml": "Model configuration", "inference.json": "Model metadata", "khmer_char_dict.txt": "Character dictionary (188 characters)", "training_config.yml": "Original training configuration" }, "requirements": [ "paddlepaddle>=2.4.0", "opencv-python>=4.5.0", "numpy>=1.19.0", "pillow>=8.0.0" ], "limitations": [ "Optimized for short text segments (3-5 words)", "Best performance on clean, printed text", "May need segmentation for longer text", "Trained primarily on synthetic data" ], "license": "Specify your license", "created_date": "2025-09-25", "version": "1.0", "contact": { "author": "Your Name", "email": "your.email@example.com", "repository": "https://huggingface.co/your-username/khmer-ocr" } }