Thareah
/

ocr_khmer_lite_train

Model card Files Files and versions

ocr_khmer_lite_train / model_info.json

Thareah's picture

Upload folder using huggingface_hub

37795b9 verified 3 months ago

history blame contribute delete

2.87 kB

	{
	"model_name": "Khmer OCR Recognition Model",
	"description": "CRNN-based OCR model specifically trained for Khmer text recognition",
	"framework": "PaddleOCR",
	"architecture": {
	"algorithm": "CRNN",
	"backbone": "ResNet34",
	"neck": "SequenceEncoder (RNN)",
	"head": "CTCHead",
	"loss": "CTCLoss"
	},
	"performance": {
	"accuracy": 98.45,
	"normalized_edit_distance": 99.90,
	"inference_speed_fps": 326,
	"best_epoch": 29,
	"total_epochs": 30
	},
	"training_data": {
	"training_images": 13253,
	"validation_images": 4315,
	"total_images": 17568,
	"text_length_range": "3-5 words",
	"image_size": "600x80 pixels (training), 320x32 (inference)",
	"font": "KhmerOS",
	"augmentation": ["clean", "blurred", "noisy", "noise_blur"]
	},
	"model_specifications": {
	"input_shape": [3, 32, 320],
	"max_text_length": 25,
	"character_count": 188,
	"supported_languages": ["Khmer", "Latin"],
	"model_size_mb": 106
	},
	"character_set": {
	"khmer_consonants": "ក ខ គ ឃ ង ច ឆ ជ ឈ ញ ដ ឋ ឌ ឍ ណ ត ថ ទ ធ ន ប ផ ព ភ ម យ រ ល វ ស ហ ឡ អ",
	"khmer_vowels": "ា ិ ី ឹ ឺ ុ ូ ួ ើ ឿ ៀ េ ែ ៃ ោ ៅ ំ ះ ៈ",
	"khmer_numerals": "០ ១ ២ ៣ ៤ ៥ ៦ ៧ ៨ ៩",
	"latin_characters": "A-Z, a-z, 0-9",
	"punctuation": ". , ! ? - ( ) [ ] « » ™ ® etc.",
	"khmer_symbols": "។ ៕ ៖ ៗ ៉ ៊ ់ ៌ ៍ ៏ ័ ្"
	},
	"training_config": {
	"optimizer": "Adam",
	"learning_rate": "Cosine scheduling (initial: 0.001)",
	"batch_size": 32,
	"regularization": "L2 (4e-05)",
	"image_augmentation": true,
	"data_variants": 4
	},
	"usage_recommendations": {
	"optimal_text_length": "3-5 words",
	"image_quality": "High contrast, clear text",
	"use_cases": ["Road signs", "Document snippets", "Menu items", "Form fields"],
	"preprocessing": "Consider text detection for full documents"
	},
	"files": {
	"inference.pdiparams": "Main model weights (106MB)",
	"inference.yml": "Model configuration",
	"inference.json": "Model metadata",
	"khmer_char_dict.txt": "Character dictionary (188 characters)",
	"training_config.yml": "Original training configuration"
	},
	"requirements": [
	"paddlepaddle>=2.4.0",
	"opencv-python>=4.5.0",
	"numpy>=1.19.0",
	"pillow>=8.0.0"
	],
	"limitations": [
	"Optimized for short text segments (3-5 words)",
	"Best performance on clean, printed text",
	"May need segmentation for longer text",
	"Trained primarily on synthetic data"
	],
	"license": "Specify your license",
	"created_date": "2025-09-25",
	"version": "1.0",
	"contact": {
	"author": "Your Name",
	"email": "your.email@example.com",
	"repository": "https://huggingface.co/your-username/khmer-ocr"
	}
	}