Spaces:

cheesecz
/

ocr-ktp

Build error

App Files Files Community

cheesecz commited on May 22, 2025

Commit

bfb6745

verified ·

1 Parent(s): b198dc1

Update app.py

Browse files

Files changed (1) hide show

app.py +68 -332

app.py CHANGED Viewed

@@ -1,356 +1,92 @@
 import gradio as gr
-import json
-import re
-from datetime import datetime
 import cv2
 import numpy as np
-from PIL import Image
-import easyocr
-import requests
-from typing import Dict, List, Optional, Tuple
-# Initialize EasyOCR reader with Indonesian and English
-reader = easyocr.Reader(['id', 'en'], gpu=False)
-class IndonesianDocumentProcessor:
-    def __init__(self):
-        self.document_patterns = {
-            'ktp': {
-                'nik': r'(\d{16})',
-                'name': r'(?:nama|name)[:\s]*([A-Za-z\s]+)',
-                'birth_place': r'(?:tempat.*lahir|place.*birth)[:\s]*([A-Za-z\s]+)',
-                'birth_date': r'(\d{2}[-/]\d{2}[-/]\d{4})',
-                'gender': r'(?:jenis.*kelamin|gender)[:\s]*(laki-laki|perempuan|male|female)',
-                'address': r'(?:alamat|address)[:\s]*([A-Za-z0-9\s,./]+)',
-                'rt_rw': r'rt[/\s]*(\d+)[/\s]*rw[/\s]*(\d+)',
-                'religion': r'(?:agama|religion)[:\s]*([A-Za-z\s]+)',
-                'marital_status': r'(?:status.*perkawinan|marital)[:\s]*([A-Za-z\s]+)',
-                'occupation': r'(?:pekerjaan|occupation)[:\s]*([A-Za-z\s]+)'
-            },
-            'bpjs': {
-                'card_number': r'(\d{13})',
-                'name': r'(?:nama|name)[:\s]*([A-Za-z\s]+)',
-                'birth_date': r'(\d{2}[-/]\d{2}[-/]\d{4})',
-                'valid_until': r'(?:berlaku.*hingga|valid.*until)[:\s]*(\d{2}[-/]\d{2}[-/]\d{4})',
-                'class': r'(?:kelas|class)[:\s]*([I-III]|[1-3])'
-            },
-            'kk': {
-                'kk_number': r'(\d{16})',
-                'head_name': r'(?:kepala.*keluarga|head)[:\s]*([A-Za-z\s]+)',
-                'address': r'(?:alamat|address)[:\s]*([A-Za-z0-9\s,./]+)',
-                'rt_rw': r'rt[/\s]*(\d+)[/\s]*rw[/\s]*(\d+)',
-                'kelurahan': r'(?:kelurahan|village)[:\s]*([A-Za-z\s]+)',
-                'kecamatan': r'(?:kecamatan|district)[:\s]*([A-Za-z\s]+)'
-            },
-            'medical_bill': {
-                'bill_number': r'(?:no.*invoice|bill.*no|nota)[:\s]*([A-Za-z0-9/-]+)',
-                'date': r'(\d{2}[-/]\d{2}[-/]\d{4})',
-                'patient_name': r'(?:nama.*pasien|patient.*name)[:\s]*([A-Za-z\s]+)',
-                'total_amount': r'(?:total|jumlah)[:\s]*(?:rp\.?\s*)?(\d{1,3}(?:[.,]\d{3})*)',
-                'hospital_name': r'(?:rumah.*sakit|hospital|klinik|clinic)[:\s]*([A-Za-z\s]+)'
-            }
-        }
-    def preprocess_image(self, image: np.ndarray) -> np.ndarray:
-        """Preprocess image for better OCR results"""
-        # Convert PIL to numpy if needed
-        if isinstance(image, Image.Image):
-            image = np.array(image)
-        # Convert to grayscale
-        if len(image.shape) == 3:
-            gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
-        else:
-            gray = image
-        # Apply adaptive threshold
-        thresh = cv2.adaptiveThreshold(
-            gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2
-        )
-        # Denoise
-        denoised = cv2.fastNlMeansDenoising(thresh)
-        return denoised
-    def extract_text_with_positions(self, image) -> List[Tuple[str, List]]:
-        """Extract text with bounding box positions"""
-        processed_img = self.preprocess_image(image)
-        results = reader.readtext(processed_img)
-        text_data = []
-        for (bbox, text, confidence) in results:
-            if confidence > 0.5:  # Filter low confidence text
-                text_data.append((text.strip(), bbox))
-        return text_data
-    def classify_document(self, text_content: str) -> str:
-        """Classify document type based on text content"""
-        text_lower = text_content.lower()
-        # Check for specific keywords
-        if any(keyword in text_lower for keyword in ['kartu tanda penduduk', 'ktp', 'republik indonesia']):
-            return 'ktp'
-        elif any(keyword in text_lower for keyword in ['bpjs', 'kesehatan', 'jaminan kesehatan']):
-            return 'bpjs'
-        elif any(keyword in text_lower for keyword in ['kartu keluarga', 'kepala keluarga']):
-            return 'kk'
-        elif any(keyword in text_lower for keyword in ['invoice', 'bill', 'tagihan', 'rumah sakit', 'klinik']):
-            return 'medical_bill'
-        else:
-            return 'unknown'
-    def extract_fields(self, text_content: str, doc_type: str) -> Dict:
-        """Extract specific fields based on document type"""
-        if doc_type not in self.document_patterns:
-            return {}
-        patterns = self.document_patterns[doc_type]
-        extracted_fields = {}
-        confidence_scores = {}
-        text_lower = text_content.lower()
-        for field, pattern in patterns.items():
-            matches = re.findall(pattern, text_lower, re.IGNORECASE | re.MULTILINE)
-            if matches:
-                if field == 'rt_rw' and len(matches[0]) == 2:
-                    extracted_fields[field] = f"{matches[0][0]}/{matches[0][1]}"
-                else:
-                    extracted_fields[field] = matches[0].strip() if isinstance(matches[0], str) else matches[0]
-                confidence_scores[field] = 0.8  # Base confidence for regex matches
-        return {
-            'extracted_fields': extracted_fields,
-            'confidence_scores': confidence_scores
         }
-    def process_document(self, image) -> Dict:
-        """Main processing function"""
-        start_time = datetime.now()
-        try:
-            # Extract text with positions
-            text_data = self.extract_text_with_positions(image)
-            # Combine all text for classification and extraction
-            full_text = ' '.join([text for text, _ in text_data])
-            # Classify document
-            doc_type = self.classify_document(full_text)
-            # Extract fields
-            field_data = self.extract_fields(full_text, doc_type)
-            processing_time = (datetime.now() - start_time).total_seconds()
-            result = {
-                'success': True,
-                'document_type': doc_type,
-                'extracted_fields': field_data.get('extracted_fields', {}),
-                'confidence_scores': field_data.get('confidence_scores', {}),
-                'raw_text': full_text,
-                'processing_time_seconds': processing_time,
-                'timestamp': datetime.now().isoformat()
-            }
-            return result
-        except Exception as e:
-            return {
-                'success': False,
-                'error': str(e),
-                'timestamp': datetime.now().isoformat()
-            }
-# Initialize processor
-processor = IndonesianDocumentProcessor()
-def process_uploaded_image(image):
-    """Process uploaded image and return formatted results"""
-    if image is None:
-        return "Please upload an image first.", "{}"
-    result = processor.process_document(image)
-    # Format for display
-    if result['success']:
-        display_text = f"""
-📄 **Document Type:** {result['document_type'].upper()}
-⏱️ **Processing Time:** {result['processing_time_seconds']:.2f} seconds
-🔍 **Extracted Fields:**
-"""
-        for field, value in result['extracted_fields'].items():
-            confidence = result['confidence_scores'].get(field, 0)
-            display_text += f"• **{field.replace('_', ' ').title()}:** {value} (confidence: {confidence:.2f})\n"
-        if not result['extracted_fields']:
-            display_text += "• No structured fields detected\n"
-        display_text += f"\n📝 **Raw Text:**\n{result['raw_text'][:500]}..."
-    else:
-        display_text = f"❌ **Error:** {result['error']}"
-    # Return both display text and JSON
-    json_output = json.dumps(result, indent=2, ensure_ascii=False)
-    return display_text, json_output
-# API endpoint function
-def api_process_image(image):
-    """API endpoint that returns only JSON"""
-    if image is None:
-        return {
-            'success': False,
-            'error': 'No image provided',
-            'timestamp': datetime.now().isoformat()
-        }
-    return processor.process_document(image)
 # Create Gradio interface
-def create_interface():
-    with gr.Blocks(
-        title="TakeCare - Indonesian Document OCR",
-        theme=gr.themes.Soft(),
-        css="""
-        .main-header {
-            text-align: center;
-            background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
-            color: white;
-            padding: 2rem;
-            border-radius: 1rem;
-            margin-bottom: 2rem;
-        }
-        .upload-section {
-            border: 2px dashed #667eea;
-            border-radius: 1rem;
-            padding: 2rem;
-            background: #f8f9ff;
-        }
-        """
-    ) as demo:
-        # Header
-        gr.HTML("""
-        <div class="main-header">
-            <h1>🏥 TakeCare - Indonesian Document OCR</h1>
-            <p>Extract data from KTP, BPJS, Kartu Keluarga, and Medical Bills</p>
-        </div>
-        """)
         with gr.Row():
-            with gr.Column(scale=1):
-                gr.HTML('<div class="upload-section">')
-                image_input = gr.Image(
-                    label="📷 Upload Document Image",
-                    type="pil",
-                    sources=["upload", "webcam"],
-                    height=400
                 )
-                gr.HTML('</div>')
-                process_btn = gr.Button(
-                    "🔍 Process Document",
-                    variant="primary",
-                    size="lg"
                 )
-                gr.HTML("""
-                <div style="margin-top: 1rem; padding: 1rem; background: #e3f2fd; border-radius: 0.5rem;">
-                    <h4>📋 Supported Documents:</h4>
-                    <ul>
-                        <li><strong>KTP</strong> - Kartu Tanda Penduduk</li>
-                        <li><strong>BPJS</strong> - Kartu BPJS Kesehatan</li>
-                        <li><strong>KK</strong> - Kartu Keluarga</li>
-                        <li><strong>Medical Bills</strong> - Hospital/Clinic invoices</li>
-                    </ul>
-                </div>
-                """)
-            with gr.Column(scale=1):
-                result_display = gr.Markdown(
-                    label="📊 Processing Results",
-                    value="Upload an image to see results here..."
-                )
-                json_output = gr.Code(
-                    label="📄 JSON Output",
-                    language="json",
-                    value="{}",
-                    interactive=False
-                )
-        # Event handlers
         process_btn.click(
-            fn=process_uploaded_image,
-            inputs=[image_input],
-            outputs=[result_display, json_output]
-        )
-        # Auto-process when image is uploaded
-        image_input.change(
-            fn=process_uploaded_image,
-            inputs=[image_input],
-            outputs=[result_display, json_output]
         )
-        # API section
-        gr.HTML("""
-        <div style="margin-top: 2rem; padding: 1.5rem; background: #f5f5f5; border-radius: 1rem;">
-            <h3>🔌 API Usage</h3>
-            <p><strong>Endpoint:</strong> <code>/api/process</code></p>
-            <p><strong>Method:</strong> POST</p>
-            <p><strong>Content-Type:</strong> multipart/form-data</p>
-            <p><strong>Parameter:</strong> <code>image</code> (file upload)</p>
-            <h4>Example cURL:</h4>
-            <pre><code>curl -X POST -F "image=@document.jpg" https://YOUR_SPACE_URL/api/process</code></pre>
-            <h4>Example Python:</h4>
-            <pre><code>import requests
-files = {'image': open('document.jpg', 'rb')}
-response = requests.post('https://YOUR_SPACE_URL/api/process', files=files)
-result = response.json()</code></pre>
-        </div>
-        """)
-    return demo
-# Create the interface
-demo = create_interface()
-# Add API endpoint
-@demo.api(route="/api/process", method="POST")
-def api_endpoint(image: gr.File):
-    """API endpoint for document processing"""
-    try:
-        if image is None:
-            return {
-                'success': False,
-                'error': 'No image file provided',
-                'timestamp': datetime.now().isoformat()
-            }
-        # Load image
-        pil_image = Image.open(image.name)
-        result = processor.process_document(pil_image)
-        return result
-    except Exception as e:
-        return {
-            'success': False,
-            'error': f'Processing failed: {str(e)}',
-            'timestamp': datetime.now().isoformat()
-        }
 if __name__ == "__main__":
-    demo.launch(
-        server_name="0.0.0.0",
-        server_port=7860,
-        share=True,
-        show_api=True
-    )

 import gradio as gr
 import cv2
 import numpy as np
+from src.ocr_service import DocumentOCRService
+from src.document_config import HealthcareProcess, DocumentType
+import json
+# Initialize OCR service
+ocr_service = DocumentOCRService()
+def process_document(image, document_type, process_type):
+    """Process document and return results."""
+    try:
+        # Convert image to numpy array
+        image_np = np.array(image)
+        # Convert to BGR for OpenCV
+        if len(image_np.shape) == 3 and image_np.shape[2] == 3:
+            image_np = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)
+        # Process document
+        result = ocr_service.process_document(image_np, document_type, process_type)
+        # Format the output
+        output = {
+            "Extracted Data": result["extracted_data"],
+            "Gemini Analysis": result["gemini_analysis"]["analysis"],
+            "Validation Result": result["validation_result"]
         }
+        return json.dumps(output, indent=2)
+    except Exception as e:
+        return f"Error processing document: {str(e)}"
+def get_requirements(process_type):
+    """Get document requirements for a process."""
+    try:
+        process = HealthcareProcess(process_type)
+        requirements = ocr_service.get_process_requirements(process)
+        return json.dumps(requirements, indent=2)
+    except ValueError as e:
+        return f"Error: {str(e)}"
 # Create Gradio interface
+with gr.Blocks(title="TakeCare OCR Service") as demo:
+    gr.Markdown("# TakeCare OCR Service")
+    gr.Markdown("Upload your healthcare documents for processing and validation.")
+    with gr.Tab("Process Document"):
         with gr.Row():
+            with gr.Column():
+                image_input = gr.Image(type="pil", label="Upload Document")
+                document_type = gr.Dropdown(
+                    choices=[dt.value for dt in DocumentType],
+                    label="Document Type"
                 )
+                process_type = gr.Dropdown(
+                    choices=[pt.value for pt in HealthcareProcess],
+                    label="Process Type (Optional)"
                 )
+                process_btn = gr.Button("Process Document")
+            with gr.Column():
+                output = gr.Textbox(label="Results", lines=20)
         process_btn.click(
+            fn=process_document,
+            inputs=[image_input, document_type, process_type],
+            outputs=output
         )
+    with gr.Tab("View Requirements"):
+        with gr.Row():
+            with gr.Column():
+                req_process_type = gr.Dropdown(
+                    choices=[pt.value for pt in HealthcareProcess],
+                    label="Select Process Type"
+                )
+                view_req_btn = gr.Button("View Requirements")
+            with gr.Column():
+                requirements_output = gr.Textbox(label="Document Requirements", lines=20)
+        view_req_btn.click(
+            fn=get_requirements,
+            inputs=[req_process_type],
+            outputs=requirements_output
+        )
+# Launch the app
 if __name__ == "__main__":
+    demo.launch()