File size: 2,868 Bytes

37795b9

{
  "model_name": "Khmer OCR Recognition Model",
  "description": "CRNN-based OCR model specifically trained for Khmer text recognition",
  "framework": "PaddleOCR",
  "architecture": {
    "algorithm": "CRNN",
    "backbone": "ResNet34",
    "neck": "SequenceEncoder (RNN)",
    "head": "CTCHead",
    "loss": "CTCLoss"
  },
  "performance": {
    "accuracy": 98.45,
    "normalized_edit_distance": 99.90,
    "inference_speed_fps": 326,
    "best_epoch": 29,
    "total_epochs": 30
  },
  "training_data": {
    "training_images": 13253,
    "validation_images": 4315,
    "total_images": 17568,
    "text_length_range": "3-5 words",
    "image_size": "600x80 pixels (training), 320x32 (inference)",
    "font": "KhmerOS",
    "augmentation": ["clean", "blurred", "noisy", "noise_blur"]
  },
  "model_specifications": {
    "input_shape": [3, 32, 320],
    "max_text_length": 25,
    "character_count": 188,
    "supported_languages": ["Khmer", "Latin"],
    "model_size_mb": 106
  },
  "character_set": {
    "khmer_consonants": "ក ខ គ ឃ ង ច ឆ ជ ឈ ញ ដ ឋ ឌ ឍ ណ ត ថ ទ ធ ន ប ផ ព ភ ម យ រ ល វ ស ហ ឡ អ",
    "khmer_vowels": "ា ិ ី ឹ ឺ ុ ូ ួ ើ ឿ ៀ េ ែ ៃ ោ ៅ ំ ះ ៈ",
    "khmer_numerals": "០ ១ ២ ៣ ៤ ៥ ៦ ៧ ៨ ៩",
    "latin_characters": "A-Z, a-z, 0-9",
    "punctuation": ". , ! ? - ( ) [ ] « » ™ ® etc.",
    "khmer_symbols": "។ ៕ ៖ ៗ ៉ ៊ ់ ៌ ៍ ៏ ័ ្"
  },
  "training_config": {
    "optimizer": "Adam",
    "learning_rate": "Cosine scheduling (initial: 0.001)",
    "batch_size": 32,
    "regularization": "L2 (4e-05)",
    "image_augmentation": true,
    "data_variants": 4
  },
  "usage_recommendations": {
    "optimal_text_length": "3-5 words",
    "image_quality": "High contrast, clear text",
    "use_cases": ["Road signs", "Document snippets", "Menu items", "Form fields"],
    "preprocessing": "Consider text detection for full documents"
  },
  "files": {
    "inference.pdiparams": "Main model weights (106MB)",
    "inference.yml": "Model configuration",
    "inference.json": "Model metadata",
    "khmer_char_dict.txt": "Character dictionary (188 characters)",
    "training_config.yml": "Original training configuration"
  },
  "requirements": [
    "paddlepaddle>=2.4.0",
    "opencv-python>=4.5.0", 
    "numpy>=1.19.0",
    "pillow>=8.0.0"
  ],
  "limitations": [
    "Optimized for short text segments (3-5 words)",
    "Best performance on clean, printed text",
    "May need segmentation for longer text",
    "Trained primarily on synthetic data"
  ],
  "license": "Specify your license",
  "created_date": "2025-09-25",
  "version": "1.0",
  "contact": {
    "author": "Your Name",
    "email": "your.email@example.com",
    "repository": "https://huggingface.co/your-username/khmer-ocr"
  }
}