Spaces:

sedtha
/

khmerHandwriting

Sleeping

App Files Files Community

sedtha commited on Nov 17, 2025

Commit

fa685f9

verified ·

1 Parent(s): 0b4a5cb

Upload 3 files

Browse files

Files changed (3) hide show

khmer_model_weights.pth +3 -0
main.py +394 -131
requirements.txt +11 -6

khmer_model_weights.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:06d5efe7ca467186f9e4207d99d370bc27721d77504e2a973253e29170e9e309
+size 4007093

main.py CHANGED Viewed

@@ -1,131 +1,394 @@
-# main.py
-import gradio as gr
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from PIL import Image
-import numpy as np
-# -----------------------------
-# 1. Define the model class
-# -----------------------------
-class MyModel(nn.Module):
-    def __init__(self, num_classes=10):
-        super().__init__()
-        self.fc1 = nn.Linear(48*48, 392)
-        self.fc2 = nn.Linear(392, 196)
-        self.fc3 = nn.Linear(196, 98)
-        self.fc4 = nn.Linear(98, num_classes)
-        self.relu = nn.ReLU()
-    def forward(self, x):
-        x = self.fc1(x)
-        x = self.relu(x)
-        x = self.fc2(x)
-        x = self.relu(x)
-        x = self.fc3(x)
-        x = self.relu(x)
-        x = self.fc4(x)
-        return x
-# -----------------------------
-# 2. Manual label mapping
-# -----------------------------
-label_to_idx = {
-    'TA': 0,    # ត
-    'NGO': 1,   # ង
-    'CHA': 2,   # ច
-    'DA': 3,    # ដ
-    'KO': 4,    # ក
-    'NA': 5,    # ណ
-    'KHA': 6,   # ខ
-    'CHHA': 7,  # ឆ
-    'CHHO': 8,  # ឈ
-    'KHO': 9    # ឃ
-}
-idx_to_label = {v: k for k, v in label_to_idx.items()}
-label_to_char = {
-    'TA': 'ត',
-    'NGO': 'ង',
-    'CHA': 'ច',
-    'DA': 'ដ',
-    'KO': 'ក',
-    'NA': 'ណ',
-    'KHA': 'ខ',
-    'CHHA': 'ឆ',
-    'CHHO': 'ឈ',
-    'KHO': 'ឃ'
-}
-num_classes = len(label_to_idx)
-# -----------------------------
-# 3. Load model
-# -----------------------------
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-model = MyModel(num_classes=num_classes)
-model.load_state_dict(torch.load(r"sedtha/khmerhandwriting", map_location=device))
-model.eval()
-model.to(device)
-# -----------------------------
-# 4. Preprocess image
-# -----------------------------
-def preprocess_image(img: Image.Image):
-    img = img.convert("L").resize((48,48))
-    img_array = np.array(img, dtype=np.float32)
-    img_array = img_array.reshape(1, -1)  # flatten
-    img_array /= 255.0                     # normalize
-    tensor = torch.tensor(img_array).to(device)
-    return tensor
-# -----------------------------
-# 5. Prediction functions
-# -----------------------------
-def predict_image(img: Image.Image):
-    tensor = preprocess_image(img)
-    with torch.no_grad():
-        output = model(tensor)
-        probs = F.softmax(output, dim=1)
-        pred_idx = torch.argmax(probs, dim=1).item()
-        confidence = probs[0, pred_idx].item()
-        pred_label = idx_to_label[pred_idx]
-        pred_char = label_to_char[pred_label]
-    return f"Predicted: {pred_char} ({pred_label}), Confidence: {confidence*100:.2f}%"
-def predict_draw(image_array: np.ndarray):
-    if image_array.shape[-1] == 3:
-        img = Image.fromarray(image_array).convert("L")
-    else:
-        img = Image.fromarray(image_array.squeeze()).convert("L")
-    return predict_image(img)
-# -----------------------------
-# 6. Gradio interface
-# -----------------------------
-def main():
-    with gr.Blocks() as demo:
-        gr.Markdown("## Khmer Character Recognition")
-        with gr.Tab("Upload Image"):
-            img_input = gr.Image(type="pil")
-            img_output = gr.Textbox()
-            btn = gr.Button("Predict")
-            btn.click(predict_image, inputs=img_input, outputs=img_output)
-        with gr.Tab("Draw Letter"):
-            canvas_input = gr.Image(shape=(48,48), image_mode='L', invert_colors=True, source="canvas")
-            draw_output = gr.Textbox()
-            draw_btn = gr.Button("Predict Drawing")
-            draw_btn.click(predict_draw, inputs=canvas_input, outputs=draw_output)
-    demo.launch(share=True)
-# -----------------------------
-# 7. Run app
-# -----------------------------
-if __name__ == "__main__":
-    main()

+"""
+Khmer Character Recognition App
+Recognizes 10 Khmer characters using a neural network model
+"""
+import gradio as gr
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from PIL import Image
+import numpy as np
+from pathlib import Path
+import logging
+# Setup logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# -----------------------------
+# Model Definition
+# -----------------------------
+class KhmerModel(nn.Module):
+    """Neural network for Khmer character classification"""
+    def __init__(self, num_classes=10):
+        super().__init__()
+        self.fc1 = nn.Linear(48 * 48, 392)
+        self.fc2 = nn.Linear(392, 196)
+        self.fc3 = nn.Linear(196, 98)
+        self.fc4 = nn.Linear(98, num_classes)
+        self.relu = nn.ReLU()
+        self.dropout = nn.Dropout(0.2)
+    def forward(self, x):
+        x = self.relu(self.fc1(x))
+        x = self.dropout(x)
+        x = self.relu(self.fc2(x))
+        x = self.dropout(x)
+        x = self.relu(self.fc3(x))
+        x = self.fc4(x)
+        return x
+# -----------------------------
+# Configuration
+# -----------------------------
+class Config:
+    """Application configuration"""
+    # Model settings
+    IMAGE_SIZE = (48, 48)
+    NUM_CLASSES = 10
+    MODEL_PATH = "khmer_model_weights.pth"
+    # Label mappings
+    LABEL_TO_IDX = {
+        'TA': 0,    # ត
+        'NGO': 1,   # ង
+        'CHA': 2,   # ច
+        'DA': 3,    # ដ
+        'KO': 4,    # ក
+        'NA': 5,    # ណ
+        'KHA': 6,   # ខ
+        'CHHA': 7,  # ឆ
+        'CHHO': 8,  # ឈ
+        'KHO': 9    # ឃ
+    }
+    LABEL_TO_CHAR = {
+        'TA': 'ត',
+        'NGO': 'ង',
+        'CHA': 'ច',
+        'DA': 'ដ',
+        'KO': 'ក',
+        'NA': 'ណ',
+        'KHA': 'ខ',
+        'CHHA': 'ឆ',
+        'CHHO': 'ឈ',
+        'KHO': 'ឃ'
+    }
+    @classmethod
+    def get_idx_to_label(cls):
+        return {v: k for k, v in cls.LABEL_TO_IDX.items()}
+# -----------------------------
+# Model Manager
+# -----------------------------
+class ModelManager:
+    """Handles model loading and inference"""
+    def __init__(self):
+        self.device = torch.device("cpu")  # Force CPU usage
+        self.model = None
+        self.config = Config()
+        self.idx_to_label = self.config.get_idx_to_label()
+    def load_model(self):
+        """Load the trained model"""
+        try:
+            model_path = Path(self.config.MODEL_PATH)
+            if not model_path.exists():
+                raise FileNotFoundError(
+                    f"Model file not found: {model_path}\n"
+                    f"Please ensure '{self.config.MODEL_PATH}' is in the same directory as this script."
+                )
+            self.model = KhmerModel(num_classes=self.config.NUM_CLASSES)
+            self.model.load_state_dict(
+                torch.load(model_path, map_location=self.device)
+            )
+            self.model.eval()
+            self.model.to(self.device)
+            logger.info(f"Model loaded successfully from {model_path}")
+        except Exception as e:
+            logger.error(f"Error loading model: {e}")
+            raise
+    def preprocess_image(self, img: Image.Image) -> torch.Tensor:
+        """Preprocess image for model input"""
+        # Convert to grayscale and resize
+        img = img.convert("L").resize(self.config.IMAGE_SIZE)
+        # Convert to numpy array and normalize
+        img_array = np.array(img, dtype=np.float32)
+        img_array = img_array.reshape(1, -1)  # Flatten to (1, 2304)
+        img_array /= 255.0  # Normalize to [0, 1]
+        # Convert to tensor
+        tensor = torch.tensor(img_array, dtype=torch.float32).to(self.device)
+        return tensor
+    def predict(self, img: Image.Image) -> dict:
+        """Make prediction on image"""
+        if self.model is None:
+            raise RuntimeError("Model not loaded. Call load_model() first.")
+        try:
+            # Preprocess
+            tensor = self.preprocess_image(img)
+            # Predict
+            with torch.no_grad():
+                output = self.model(tensor)
+                probs = F.softmax(output, dim=1)
+                pred_idx = torch.argmax(probs, dim=1).item()
+                confidence = probs[0, pred_idx].item()
+            # Get labels
+            pred_label = self.idx_to_label[pred_idx]
+            pred_char = self.config.LABEL_TO_CHAR[pred_label]
+            # Get top 3 predictions
+            top3_probs, top3_indices = torch.topk(probs[0], k=min(3, self.config.NUM_CLASSES))
+            top3_predictions = []
+            for prob, idx in zip(top3_probs, top3_indices):
+                label = self.idx_to_label[idx.item()]
+                char = self.config.LABEL_TO_CHAR[label]
+                top3_predictions.append({
+                    'char': char,
+                    'label': label,
+                    'confidence': prob.item()
+                })
+            return {
+                'predicted_char': pred_char,
+                'predicted_label': pred_label,
+                'confidence': confidence,
+                'top3': top3_predictions
+            }
+        except Exception as e:
+            logger.error(f"Prediction error: {e}")
+            raise
+# -----------------------------
+# Gradio Interface Functions
+# -----------------------------
+model_manager = ModelManager()
+def format_prediction_output(result: dict) -> str:
+    """Format prediction results for display"""
+    output = f"## Predicted Character: {result['predicted_char']}\n\n"
+    output += f"**Romanization:** {result['predicted_label']}\n\n"
+    output += f"**Confidence:** {result['confidence']*100:.2f}%\n\n"
+    output += "### Top 3 Predictions:\n"
+    for i, pred in enumerate(result['top3'], 1):
+        output += f"{i}. {pred['char']} ({pred['label']}) - {pred['confidence']*100:.2f}%\n"
+    return output
+def predict_uploaded_image(img):
+    """Handle uploaded image prediction"""
+    if img is None:
+        return "❌ Please upload an image first!"
+    try:
+        result = model_manager.predict(img)
+        return format_prediction_output(result)
+    except Exception as e:
+        return f"❌ Error during prediction: {str(e)}"
+def predict_drawn_image(image_array):
+    """Handle drawn image prediction"""
+    if image_array is None:
+        return "❌ Please draw a character first!"
+    try:
+        # Convert numpy array to PIL Image
+        if len(image_array.shape) == 3:
+            # Handle RGB/RGBA
+            if image_array.shape[-1] == 4:
+                image_array = image_array[:, :, :3]
+            img = Image.fromarray(image_array.astype('uint8')).convert("L")
+        else:
+            img = Image.fromarray(image_array.astype('uint8')).convert("L")
+        result = model_manager.predict(img)
+        return format_prediction_output(result)
+    except Exception as e:
+        return f"❌ Error during prediction: {str(e)}"
+def clear_canvas():
+    """Clear the canvas"""
+    return None
+# -----------------------------
+# Gradio App
+# -----------------------------
+def create_app():
+    """Create and configure Gradio interface"""
+    # Custom CSS for better styling
+    custom_css = """
+    .gradio-container {
+        font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
+    }
+    .character-display {
+        font-size: 72px;
+        text-align: center;
+        padding: 20px;
+    }
+    """
+    with gr.Blocks(css=custom_css, title="Khmer Character Recognition") as demo:
+        gr.Markdown(
+            """
+            # 🔤 Khmer Character Recognition
+            This app recognizes 10 Khmer consonants using a neural network model.
+            **Supported Characters:**
+            - ត (TA), ង (NGO), ច (CHA), ដ (DA), ក (KO)
+            - ណ (NA), ខ (KHA), ឆ (CHHA), ឈ (CHHO), ឃ (KHO)
+            """
+        )
+        with gr.Tab("📤 Upload Image"):
+            gr.Markdown("Upload an image of a Khmer character for recognition.")
+            with gr.Row():
+                with gr.Column():
+                    img_input = gr.Image(
+                        type="pil",
+                        label="Upload Image",
+                        height=300
+                    )
+                    img_btn = gr.Button("🔍 Predict", variant="primary", size="lg")
+                with gr.Column():
+                    img_output = gr.Markdown(label="Prediction Result")
+            img_btn.click(
+                fn=predict_uploaded_image,
+                inputs=img_input,
+                outputs=img_output
+            )
+        with gr.Tab("✏️ Draw Character"):
+            gr.Markdown(
+                """
+                Draw a Khmer character on the canvas below.
+                **Tips:**
+                - Use a thick brush stroke
+                - Draw the character as clearly as possible
+                - Try to center the character
+                """
+            )
+            with gr.Row():
+                with gr.Column():
+                    canvas_input = gr.Image(
+                        source="canvas",
+                        tool="sketch",
+                        type="numpy",
+                        label="Draw Here",
+                        height=400,
+                        width=400,
+                        invert_colors=True,  # White on black
+                        brush=gr.Brush(
+                            default_size=8,
+                            colors=["#FFFFFF"],
+                            default_color="#FFFFFF"
+                        )
+                    )
+                    with gr.Row():
+                        draw_btn = gr.Button("🔍 Predict", variant="primary", size="lg")
+                        clear_btn = gr.Button("🗑️ Clear", size="lg")
+                with gr.Column():
+                    draw_output = gr.Markdown(label="Prediction Result")
+            draw_btn.click(
+                fn=predict_drawn_image,
+                inputs=canvas_input,
+                outputs=draw_output
+            )
+            clear_btn.click(
+                fn=clear_canvas,
+                outputs=canvas_input
+            )
+        with gr.Tab("ℹ️ About"):
+            gr.Markdown(
+                """
+                ## About This App
+                This application uses a neural network trained to recognize 10 Khmer consonants.
+                ### Model Architecture
+                - Input: 48x48 grayscale images
+                - 4-layer fully connected neural network
+                - Trained on handwritten Khmer characters
+                ### How to Use
+                1. **Upload Image Tab**: Upload a photo or screenshot of a Khmer character
+                2. **Draw Character Tab**: Draw a character directly on the canvas
+                3. Click "Predict" to see the results
+                ### Tips for Best Results
+                - Use clear, well-formed characters
+                - Ensure good contrast (dark character on light background or vice versa)
+                - Center the character in the image
+                - Avoid cluttered backgrounds
+                ### Technical Details
+                - Framework: PyTorch
+                - Interface: Gradio
+                - Inference: CPU-only (no GPU required)
+                """
+            )
+    return demo
+# -----------------------------
+# Main Execution
+# -----------------------------
+def main():
+    """Main application entry point"""
+    try:
+        # Load model
+        logger.info("Loading model...")
+        model_manager.load_model()
+        logger.info("Model loaded successfully!")
+        # Create and launch app
+        logger.info("Starting Gradio interface...")
+        demo = create_app()
+        demo.launch(
+            share=True,
+            server_name="0.0.0.0",
+            server_port=7860,
+            show_error=True
+        )
+    except Exception as e:
+        logger.error(f"Failed to start application: {e}")
+        raise
+if __name__ == "__main__":
+    main()

requirements.txt CHANGED Viewed

@@ -1,6 +1,11 @@
-torch
-torchvision
-numpy
-pillow
-gradio
-scikit-learn

+# Core dependencies
+torch==2.1.0
+torchvision==0.16.0
+gradio==4.44.0
+# Image processing
+Pillow==10.1.0
+numpy==1.24.3
+# Optional: for better performance
+--extra-index-url https://download.pytorch.org/whl/cpu