Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- .gitkeep +0 -0
- README.md +3 -9
- calculate_ocr_accuracy.py +729 -0
- data.sql +140 -0
- ocr_accuracy_dashboard.py +238 -0
- results.json +514 -0
- results.md +290 -0
- unused/label.sql +0 -0
- unused/page1_label.sql +29 -0
- unused/page1_prediction.sql +36 -0
- unused/page2_label.sql +40 -0
- unused/page2_prediction.sql +47 -0
- unused/page3_label.sql +0 -0
- unused/page3_prediction.sql +90 -0
- unused/prediction.sql +68 -0
- unused/sample_accuracy_output.json +719 -0
.gitkeep
ADDED
|
File without changes
|
README.md
CHANGED
|
@@ -1,12 +1,6 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
|
| 4 |
-
colorFrom: yellow
|
| 5 |
-
colorTo: purple
|
| 6 |
sdk: gradio
|
| 7 |
-
sdk_version: 6.0.
|
| 8 |
-
app_file: app.py
|
| 9 |
-
pinned: false
|
| 10 |
---
|
| 11 |
-
|
| 12 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
| 1 |
---
|
| 2 |
+
title: OCR_Dashboard
|
| 3 |
+
app_file: ocr_accuracy_dashboard.py
|
|
|
|
|
|
|
| 4 |
sdk: gradio
|
| 5 |
+
sdk_version: 6.0.1
|
|
|
|
|
|
|
| 6 |
---
|
|
|
|
|
|
calculate_ocr_accuracy.py
ADDED
|
@@ -0,0 +1,729 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Calculate OCR accuracy using edit distance (Levenshtein distance)
|
| 4 |
+
between label and predict data from JSON files or BigQuery SQL queries.
|
| 5 |
+
|
| 6 |
+
Supports two modes:
|
| 7 |
+
1. Separate files: Provide --label and --predict files (records matched by keys)
|
| 8 |
+
2. Combined data: Provide --data file with label fields having a suffix (e.g., _label)
|
| 9 |
+
|
| 10 |
+
Example with combined data (data.sql):
|
| 11 |
+
python calculate_ocr_accuracy.py --data scripts/data.sql --normalize
|
| 12 |
+
|
| 13 |
+
Example with separate files:
|
| 14 |
+
python calculate_ocr_accuracy.py --label labels.json --predict predicts.json --normalize
|
| 15 |
+
"""
|
| 16 |
+
|
| 17 |
+
import json
|
| 18 |
+
import sys
|
| 19 |
+
from pathlib import Path
|
| 20 |
+
from typing import Dict, List, Tuple, Optional, Any
|
| 21 |
+
from collections import defaultdict
|
| 22 |
+
|
| 23 |
+
try:
|
| 24 |
+
from google.cloud import bigquery
|
| 25 |
+
BIGQUERY_AVAILABLE = True
|
| 26 |
+
except ImportError:
|
| 27 |
+
BIGQUERY_AVAILABLE = False
|
| 28 |
+
print("Warning: google-cloud-bigquery not installed. SQL query support disabled.")
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def levenshtein_distance(s1: str, s2: str) -> int:
|
| 32 |
+
"""
|
| 33 |
+
Calculate Levenshtein distance between two strings.
|
| 34 |
+
|
| 35 |
+
Args:
|
| 36 |
+
s1: First string
|
| 37 |
+
s2: Second string
|
| 38 |
+
|
| 39 |
+
Returns:
|
| 40 |
+
Edit distance (minimum number of single-character edits)
|
| 41 |
+
"""
|
| 42 |
+
if len(s1) < len(s2):
|
| 43 |
+
return levenshtein_distance(s2, s1)
|
| 44 |
+
|
| 45 |
+
if len(s2) == 0:
|
| 46 |
+
return len(s1)
|
| 47 |
+
|
| 48 |
+
previous_row = range(len(s2) + 1)
|
| 49 |
+
for i, c1 in enumerate(s1):
|
| 50 |
+
current_row = [i + 1]
|
| 51 |
+
for j, c2 in enumerate(s2):
|
| 52 |
+
insertions = previous_row[j + 1] + 1
|
| 53 |
+
deletions = current_row[j] + 1
|
| 54 |
+
substitutions = previous_row[j] + (c1 != c2)
|
| 55 |
+
current_row.append(min(insertions, deletions, substitutions))
|
| 56 |
+
previous_row = current_row
|
| 57 |
+
|
| 58 |
+
return previous_row[-1]
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
def normalize_string(s: str) -> str:
|
| 62 |
+
"""
|
| 63 |
+
Normalize string for comparison (convert to lowercase, strip whitespace).
|
| 64 |
+
|
| 65 |
+
Args:
|
| 66 |
+
s: Input string
|
| 67 |
+
|
| 68 |
+
Returns:
|
| 69 |
+
Normalized string
|
| 70 |
+
"""
|
| 71 |
+
return s.lower().strip()
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
def calculate_field_accuracy(
|
| 75 |
+
label_value: str,
|
| 76 |
+
predict_value: str,
|
| 77 |
+
normalize: bool = False
|
| 78 |
+
) -> Tuple[int, float, bool]:
|
| 79 |
+
"""
|
| 80 |
+
Calculate accuracy metrics for a single field.
|
| 81 |
+
|
| 82 |
+
Args:
|
| 83 |
+
label_value: Ground truth value
|
| 84 |
+
predict_value: Predicted value
|
| 85 |
+
normalize: Whether to normalize strings before comparison
|
| 86 |
+
|
| 87 |
+
Returns:
|
| 88 |
+
Tuple of (edit_distance, normalized_accuracy, exact_match)
|
| 89 |
+
"""
|
| 90 |
+
if normalize:
|
| 91 |
+
label_norm = normalize_string(label_value)
|
| 92 |
+
predict_norm = normalize_string(predict_value)
|
| 93 |
+
else:
|
| 94 |
+
label_norm = label_value
|
| 95 |
+
predict_norm = predict_value
|
| 96 |
+
|
| 97 |
+
# Exact match check
|
| 98 |
+
exact_match = label_norm == predict_norm
|
| 99 |
+
|
| 100 |
+
# Calculate edit distance
|
| 101 |
+
edit_dist = levenshtein_distance(label_norm, predict_norm)
|
| 102 |
+
|
| 103 |
+
# Calculate normalized accuracy
|
| 104 |
+
# Accuracy = 1 - (edit_distance / max_length)
|
| 105 |
+
max_len = max(len(label_norm), len(predict_norm), 1)
|
| 106 |
+
normalized_accuracy = 1.0 - (edit_dist / max_len) if max_len > 0 else 1.0
|
| 107 |
+
|
| 108 |
+
return edit_dist, normalized_accuracy, exact_match
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
def execute_bigquery_query(query_file: str, project_id: Optional[str] = None) -> List[Dict[str, Any]]:
|
| 112 |
+
"""
|
| 113 |
+
Execute a BigQuery SQL query from a file and return results as list of dictionaries.
|
| 114 |
+
|
| 115 |
+
Args:
|
| 116 |
+
query_file: Path to SQL query file
|
| 117 |
+
project_id: GCP project ID (if None, uses default from environment)
|
| 118 |
+
|
| 119 |
+
Returns:
|
| 120 |
+
List of dictionaries, one per row
|
| 121 |
+
"""
|
| 122 |
+
if not BIGQUERY_AVAILABLE:
|
| 123 |
+
raise ImportError("google-cloud-bigquery is required for SQL query support. "
|
| 124 |
+
"Install it with: poetry add google-cloud-bigquery")
|
| 125 |
+
|
| 126 |
+
# Read SQL query from file
|
| 127 |
+
query_path = Path(query_file)
|
| 128 |
+
if not query_path.exists():
|
| 129 |
+
raise FileNotFoundError(f"Query file not found: {query_file}")
|
| 130 |
+
|
| 131 |
+
with open(query_path, 'r', encoding='utf-8') as f:
|
| 132 |
+
query = f.read()
|
| 133 |
+
|
| 134 |
+
# Initialize BigQuery client
|
| 135 |
+
if project_id:
|
| 136 |
+
client = bigquery.Client(project=project_id)
|
| 137 |
+
else:
|
| 138 |
+
client = bigquery.Client()
|
| 139 |
+
|
| 140 |
+
# Execute query
|
| 141 |
+
print(f"Executing BigQuery query from: {query_file}")
|
| 142 |
+
query_job = client.query(query)
|
| 143 |
+
results = query_job.result()
|
| 144 |
+
|
| 145 |
+
# Convert to list of dictionaries
|
| 146 |
+
records = []
|
| 147 |
+
for row in results:
|
| 148 |
+
record = {}
|
| 149 |
+
for key, value in row.items():
|
| 150 |
+
# Convert None to empty string for consistency
|
| 151 |
+
record[key] = str(value) if value is not None else ''
|
| 152 |
+
records.append(record)
|
| 153 |
+
|
| 154 |
+
print(f"Retrieved {len(records)} records from BigQuery")
|
| 155 |
+
return records
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
def load_data_from_file(file_path: str, project_id: Optional[str] = None) -> List[Dict[str, Any]]:
|
| 159 |
+
"""
|
| 160 |
+
Load data from either JSON file or SQL query file.
|
| 161 |
+
|
| 162 |
+
Args:
|
| 163 |
+
file_path: Path to JSON file or SQL query file
|
| 164 |
+
project_id: GCP project ID for BigQuery queries (optional)
|
| 165 |
+
|
| 166 |
+
Returns:
|
| 167 |
+
List of dictionaries
|
| 168 |
+
"""
|
| 169 |
+
path = Path(file_path)
|
| 170 |
+
|
| 171 |
+
if not path.exists():
|
| 172 |
+
raise FileNotFoundError(f"File not found: {file_path}")
|
| 173 |
+
|
| 174 |
+
# Check if it's a SQL file
|
| 175 |
+
if path.suffix.lower() == '.sql':
|
| 176 |
+
return execute_bigquery_query(file_path, project_id)
|
| 177 |
+
else:
|
| 178 |
+
# Assume JSON file
|
| 179 |
+
with open(path, 'r', encoding='utf-8') as f:
|
| 180 |
+
return json.load(f)
|
| 181 |
+
|
| 182 |
+
|
| 183 |
+
def match_records(
|
| 184 |
+
labels: List[Dict[str, Any]],
|
| 185 |
+
predicts: List[Dict[str, Any]],
|
| 186 |
+
match_keys: List[str] = None
|
| 187 |
+
) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
|
| 188 |
+
"""
|
| 189 |
+
Match label and predict records by common keys (e.g., filename, project_id_mother).
|
| 190 |
+
|
| 191 |
+
Args:
|
| 192 |
+
labels: List of label records
|
| 193 |
+
predicts: List of predict records
|
| 194 |
+
match_keys: List of field names to use for matching (default: ['filename', 'project_id_mother'])
|
| 195 |
+
|
| 196 |
+
Returns:
|
| 197 |
+
Tuple of (matched_labels, matched_predicts) with same length and order
|
| 198 |
+
"""
|
| 199 |
+
if match_keys is None:
|
| 200 |
+
match_keys = ['filename', 'project_id_mother']
|
| 201 |
+
|
| 202 |
+
# Find keys that exist in both datasets
|
| 203 |
+
available_keys = []
|
| 204 |
+
if labels and predicts:
|
| 205 |
+
label_keys = set(labels[0].keys())
|
| 206 |
+
predict_keys = set(predicts[0].keys())
|
| 207 |
+
for key in match_keys:
|
| 208 |
+
if key in label_keys and key in predict_keys:
|
| 209 |
+
available_keys.append(key)
|
| 210 |
+
break
|
| 211 |
+
|
| 212 |
+
if not available_keys:
|
| 213 |
+
print("Warning: No matching keys found. Matching by index order.")
|
| 214 |
+
min_len = min(len(labels), len(predicts))
|
| 215 |
+
return labels[:min_len], predicts[:min_len]
|
| 216 |
+
|
| 217 |
+
match_key = available_keys[0]
|
| 218 |
+
print(f"Matching records by key: {match_key}")
|
| 219 |
+
|
| 220 |
+
# Create index of predict records by match key
|
| 221 |
+
predict_index = {}
|
| 222 |
+
for predict_record in predicts:
|
| 223 |
+
key_value = str(predict_record.get(match_key, ''))
|
| 224 |
+
if key_value:
|
| 225 |
+
predict_index[key_value] = predict_record
|
| 226 |
+
|
| 227 |
+
# Match labels with predicts
|
| 228 |
+
matched_labels = []
|
| 229 |
+
matched_predicts = []
|
| 230 |
+
|
| 231 |
+
for label_record in labels:
|
| 232 |
+
key_value = str(label_record.get(match_key, ''))
|
| 233 |
+
if key_value in predict_index:
|
| 234 |
+
matched_labels.append(label_record)
|
| 235 |
+
matched_predicts.append(predict_index[key_value])
|
| 236 |
+
|
| 237 |
+
print(f"Matched {len(matched_labels)} records out of {len(labels)} labels and {len(predicts)} predicts")
|
| 238 |
+
return matched_labels, matched_predicts
|
| 239 |
+
|
| 240 |
+
|
| 241 |
+
def calculate_ocr_accuracy_from_combined_data(
|
| 242 |
+
data_file: str,
|
| 243 |
+
normalize: bool = False,
|
| 244 |
+
project_id: Optional[str] = None,
|
| 245 |
+
label_suffix: str = '_label'
|
| 246 |
+
) -> Dict:
|
| 247 |
+
"""
|
| 248 |
+
Calculate OCR accuracy from a single data source where label and predict
|
| 249 |
+
fields are in the same records (labels have a suffix like '_label').
|
| 250 |
+
|
| 251 |
+
This function is designed for queries like data.sql that return combined
|
| 252 |
+
label and predict data in a single result set.
|
| 253 |
+
|
| 254 |
+
Args:
|
| 255 |
+
data_file: Path to JSON file or SQL query file containing combined data
|
| 256 |
+
normalize: Whether to normalize strings before comparison
|
| 257 |
+
project_id: GCP project ID for BigQuery queries (optional)
|
| 258 |
+
label_suffix: Suffix used to identify label fields (default: '_label')
|
| 259 |
+
|
| 260 |
+
Returns:
|
| 261 |
+
Dictionary containing accuracy metrics per field
|
| 262 |
+
"""
|
| 263 |
+
# Load combined data
|
| 264 |
+
records = load_data_from_file(data_file, project_id)
|
| 265 |
+
|
| 266 |
+
if len(records) == 0:
|
| 267 |
+
print("Warning: No records found in data file")
|
| 268 |
+
return {}
|
| 269 |
+
|
| 270 |
+
# Identify all fields and separate label/predict pairs
|
| 271 |
+
all_fields = set(records[0].keys())
|
| 272 |
+
|
| 273 |
+
# Find predict fields (those without label suffix)
|
| 274 |
+
# and their corresponding label fields
|
| 275 |
+
predict_fields = set()
|
| 276 |
+
field_pairs = {} # predict_field_name -> label_field_name
|
| 277 |
+
|
| 278 |
+
for field in all_fields:
|
| 279 |
+
if field.endswith(label_suffix):
|
| 280 |
+
# This is a label field
|
| 281 |
+
predict_field = field[:-len(label_suffix)]
|
| 282 |
+
if predict_field in all_fields:
|
| 283 |
+
# Both label and predict fields exist
|
| 284 |
+
predict_fields.add(predict_field)
|
| 285 |
+
field_pairs[predict_field] = field
|
| 286 |
+
elif field + label_suffix in all_fields:
|
| 287 |
+
# This is a predict field with a corresponding label
|
| 288 |
+
predict_fields.add(field)
|
| 289 |
+
field_pairs[field] = field + label_suffix
|
| 290 |
+
|
| 291 |
+
# Exclude metadata columns
|
| 292 |
+
exclude_fields = {'filename', 'project_id_mother', 'id', 'rn'}
|
| 293 |
+
predict_fields = {f for f in predict_fields if f not in exclude_fields}
|
| 294 |
+
|
| 295 |
+
if not predict_fields:
|
| 296 |
+
print("Warning: No matching field pairs found. "
|
| 297 |
+
f"Looking for fields with '{label_suffix}' suffix.")
|
| 298 |
+
return {}
|
| 299 |
+
|
| 300 |
+
print(f"Found {len(predict_fields)} field pairs to compare")
|
| 301 |
+
print(f"Fields: {', '.join(sorted(predict_fields))}")
|
| 302 |
+
|
| 303 |
+
# Initialize statistics per field
|
| 304 |
+
field_stats = defaultdict(lambda: {
|
| 305 |
+
'total_records': 0,
|
| 306 |
+
'exact_matches': 0,
|
| 307 |
+
'total_edit_distance': 0,
|
| 308 |
+
'total_normalized_accuracy': 0.0,
|
| 309 |
+
'min_accuracy': 1.0,
|
| 310 |
+
'max_edit_distance': 0,
|
| 311 |
+
'examples': [] # Store examples of mismatches
|
| 312 |
+
})
|
| 313 |
+
|
| 314 |
+
# Process each record
|
| 315 |
+
for record_idx, record in enumerate(records):
|
| 316 |
+
for predict_field in predict_fields:
|
| 317 |
+
label_field = field_pairs[predict_field]
|
| 318 |
+
|
| 319 |
+
label_value = str(record.get(label_field, ''))
|
| 320 |
+
predict_value = str(record.get(predict_field, ''))
|
| 321 |
+
|
| 322 |
+
# Skip if both label and predict are empty
|
| 323 |
+
label_stripped = label_value.strip()
|
| 324 |
+
predict_stripped = predict_value.strip()
|
| 325 |
+
if not label_stripped and not predict_stripped:
|
| 326 |
+
continue
|
| 327 |
+
|
| 328 |
+
edit_dist, normalized_acc, exact_match = calculate_field_accuracy(
|
| 329 |
+
label_value, predict_value, normalize
|
| 330 |
+
)
|
| 331 |
+
|
| 332 |
+
stats = field_stats[predict_field]
|
| 333 |
+
stats['total_records'] += 1
|
| 334 |
+
stats['total_edit_distance'] += edit_dist
|
| 335 |
+
stats['total_normalized_accuracy'] += normalized_acc
|
| 336 |
+
stats['max_edit_distance'] = max(stats['max_edit_distance'], edit_dist)
|
| 337 |
+
stats['min_accuracy'] = min(stats['min_accuracy'], normalized_acc)
|
| 338 |
+
|
| 339 |
+
if exact_match:
|
| 340 |
+
stats['exact_matches'] += 1
|
| 341 |
+
|
| 342 |
+
# Store example if there's a mismatch
|
| 343 |
+
if not exact_match and len(stats['examples']) < 3:
|
| 344 |
+
stats['examples'].append({
|
| 345 |
+
'record_idx': record_idx,
|
| 346 |
+
'label': label_value,
|
| 347 |
+
'predict': predict_value,
|
| 348 |
+
'edit_distance': edit_dist,
|
| 349 |
+
'accuracy': normalized_acc
|
| 350 |
+
})
|
| 351 |
+
|
| 352 |
+
# Calculate final statistics
|
| 353 |
+
results = {}
|
| 354 |
+
for predict_field in sorted(predict_fields):
|
| 355 |
+
stats = field_stats[predict_field]
|
| 356 |
+
total = stats['total_records']
|
| 357 |
+
|
| 358 |
+
if total == 0:
|
| 359 |
+
continue
|
| 360 |
+
|
| 361 |
+
results[predict_field] = {
|
| 362 |
+
'exact_match_rate': stats['exact_matches'] / total,
|
| 363 |
+
'average_edit_distance': stats['total_edit_distance'] / total,
|
| 364 |
+
'average_normalized_accuracy': stats['total_normalized_accuracy'] / total,
|
| 365 |
+
'min_accuracy': stats['min_accuracy'],
|
| 366 |
+
'max_edit_distance': stats['max_edit_distance'],
|
| 367 |
+
'exact_matches': stats['exact_matches'],
|
| 368 |
+
'total_records': total,
|
| 369 |
+
'examples': stats['examples']
|
| 370 |
+
}
|
| 371 |
+
|
| 372 |
+
return results
|
| 373 |
+
|
| 374 |
+
|
| 375 |
+
def calculate_ocr_accuracy(
|
| 376 |
+
label_file: str,
|
| 377 |
+
predict_file: str,
|
| 378 |
+
normalize: bool = False,
|
| 379 |
+
match_keys: List[str] = None,
|
| 380 |
+
project_id: Optional[str] = None
|
| 381 |
+
) -> Dict:
|
| 382 |
+
"""
|
| 383 |
+
Calculate OCR accuracy per field between label and predict data.
|
| 384 |
+
Supports both JSON files and BigQuery SQL query files.
|
| 385 |
+
|
| 386 |
+
Args:
|
| 387 |
+
label_file: Path to label JSON file or SQL query file
|
| 388 |
+
predict_file: Path to predict JSON file or SQL query file
|
| 389 |
+
normalize: Whether to normalize strings before comparison
|
| 390 |
+
match_keys: List of field names to use for matching records
|
| 391 |
+
project_id: GCP project ID for BigQuery queries (optional)
|
| 392 |
+
|
| 393 |
+
Returns:
|
| 394 |
+
Dictionary containing accuracy metrics per field
|
| 395 |
+
"""
|
| 396 |
+
# Load data from files (JSON or SQL)
|
| 397 |
+
labels = load_data_from_file(label_file, project_id)
|
| 398 |
+
predicts = load_data_from_file(predict_file, project_id)
|
| 399 |
+
|
| 400 |
+
# Match records by common keys
|
| 401 |
+
labels, predicts = match_records(labels, predicts, match_keys)
|
| 402 |
+
|
| 403 |
+
# Validate that both files have the same number of records
|
| 404 |
+
if len(labels) != len(predicts):
|
| 405 |
+
print(f"Warning: Label file has {len(labels)} records, "
|
| 406 |
+
f"predict file has {len(predicts)} records")
|
| 407 |
+
min_records = min(len(labels), len(predicts))
|
| 408 |
+
labels = labels[:min_records]
|
| 409 |
+
predicts = predicts[:min_records]
|
| 410 |
+
|
| 411 |
+
# Get all field names from the first record
|
| 412 |
+
if len(labels) == 0:
|
| 413 |
+
return {}
|
| 414 |
+
|
| 415 |
+
# Exclude metadata columns from accuracy calculation
|
| 416 |
+
exclude_fields = {'filename', 'project_id_mother'}
|
| 417 |
+
field_names = [f for f in labels[0].keys() if f not in exclude_fields]
|
| 418 |
+
|
| 419 |
+
if not field_names:
|
| 420 |
+
print("Warning: No fields to calculate accuracy for (all fields excluded)")
|
| 421 |
+
return {}
|
| 422 |
+
|
| 423 |
+
print(f"Calculating accuracy for {len(field_names)} fields (excluding: {', '.join(exclude_fields)})")
|
| 424 |
+
|
| 425 |
+
# Initialize statistics per field
|
| 426 |
+
field_stats = defaultdict(lambda: {
|
| 427 |
+
'total_records': 0,
|
| 428 |
+
'exact_matches': 0,
|
| 429 |
+
'total_edit_distance': 0,
|
| 430 |
+
'total_normalized_accuracy': 0.0,
|
| 431 |
+
'min_accuracy': 1.0,
|
| 432 |
+
'max_edit_distance': 0,
|
| 433 |
+
'examples': [] # Store examples of mismatches
|
| 434 |
+
})
|
| 435 |
+
|
| 436 |
+
# Process each record
|
| 437 |
+
for record_idx, (label_record, predict_record) in enumerate(zip(labels, predicts)):
|
| 438 |
+
for field_name in field_names:
|
| 439 |
+
label_value = str(label_record.get(field_name, ''))
|
| 440 |
+
predict_value = str(predict_record.get(field_name, ''))
|
| 441 |
+
|
| 442 |
+
# Skip if both label and predict are empty
|
| 443 |
+
label_stripped = label_value.strip()
|
| 444 |
+
predict_stripped = predict_value.strip()
|
| 445 |
+
if not label_stripped and not predict_stripped:
|
| 446 |
+
continue
|
| 447 |
+
|
| 448 |
+
edit_dist, normalized_acc, exact_match = calculate_field_accuracy(
|
| 449 |
+
label_value, predict_value, normalize
|
| 450 |
+
)
|
| 451 |
+
|
| 452 |
+
stats = field_stats[field_name]
|
| 453 |
+
stats['total_records'] += 1
|
| 454 |
+
stats['total_edit_distance'] += edit_dist
|
| 455 |
+
stats['total_normalized_accuracy'] += normalized_acc
|
| 456 |
+
stats['max_edit_distance'] = max(stats['max_edit_distance'], edit_dist)
|
| 457 |
+
stats['min_accuracy'] = min(stats['min_accuracy'], normalized_acc)
|
| 458 |
+
|
| 459 |
+
if exact_match:
|
| 460 |
+
stats['exact_matches'] += 1
|
| 461 |
+
|
| 462 |
+
# Store example if there's a mismatch
|
| 463 |
+
if not exact_match and len(stats['examples']) < 3:
|
| 464 |
+
stats['examples'].append({
|
| 465 |
+
'record_idx': record_idx,
|
| 466 |
+
'label': label_value,
|
| 467 |
+
'predict': predict_value,
|
| 468 |
+
'edit_distance': edit_dist,
|
| 469 |
+
'accuracy': normalized_acc
|
| 470 |
+
})
|
| 471 |
+
|
| 472 |
+
# Calculate final statistics
|
| 473 |
+
results = {}
|
| 474 |
+
for field_name in field_names:
|
| 475 |
+
stats = field_stats[field_name]
|
| 476 |
+
total = stats['total_records']
|
| 477 |
+
|
| 478 |
+
if total == 0:
|
| 479 |
+
continue
|
| 480 |
+
|
| 481 |
+
results[field_name] = {
|
| 482 |
+
'exact_match_rate': stats['exact_matches'] / total,
|
| 483 |
+
'average_edit_distance': stats['total_edit_distance'] / total,
|
| 484 |
+
'average_normalized_accuracy': stats['total_normalized_accuracy'] / total,
|
| 485 |
+
'min_accuracy': stats['min_accuracy'],
|
| 486 |
+
'max_edit_distance': stats['max_edit_distance'],
|
| 487 |
+
'exact_matches': stats['exact_matches'],
|
| 488 |
+
'total_records': total,
|
| 489 |
+
'examples': stats['examples']
|
| 490 |
+
}
|
| 491 |
+
|
| 492 |
+
return results
|
| 493 |
+
|
| 494 |
+
|
| 495 |
+
def print_results(results: Dict, output_file: str = None):
|
| 496 |
+
"""
|
| 497 |
+
Print accuracy results in a formatted way.
|
| 498 |
+
|
| 499 |
+
Args:
|
| 500 |
+
results: Dictionary containing accuracy metrics per field
|
| 501 |
+
output_file: Optional file path to save results
|
| 502 |
+
"""
|
| 503 |
+
output_lines = []
|
| 504 |
+
|
| 505 |
+
# Header
|
| 506 |
+
header = "=" * 100
|
| 507 |
+
output_lines.append(header)
|
| 508 |
+
output_lines.append("OCR ACCURACY REPORT (Edit Distance Analysis)")
|
| 509 |
+
output_lines.append(header)
|
| 510 |
+
output_lines.append("")
|
| 511 |
+
|
| 512 |
+
# Sort fields by average normalized accuracy (descending)
|
| 513 |
+
sorted_fields = sorted(
|
| 514 |
+
results.items(),
|
| 515 |
+
key=lambda x: x[1]['average_normalized_accuracy'],
|
| 516 |
+
reverse=True
|
| 517 |
+
)
|
| 518 |
+
|
| 519 |
+
# Summary statistics
|
| 520 |
+
total_fields = len(results)
|
| 521 |
+
avg_exact_match = sum(r['exact_match_rate'] for r in results.values()) / total_fields
|
| 522 |
+
avg_normalized_acc = sum(r['average_normalized_accuracy'] for r in results.values()) / total_fields
|
| 523 |
+
|
| 524 |
+
output_lines.append("SUMMARY STATISTICS")
|
| 525 |
+
output_lines.append("-" * 100)
|
| 526 |
+
output_lines.append(f"Total Fields Analyzed: {total_fields}")
|
| 527 |
+
output_lines.append(f"Overall Exact Match Rate: {avg_exact_match:.2%}")
|
| 528 |
+
output_lines.append(f"Overall Average Normalized Accuracy: {avg_normalized_acc:.2%}")
|
| 529 |
+
output_lines.append("")
|
| 530 |
+
|
| 531 |
+
# Per-field statistics
|
| 532 |
+
output_lines.append("PER-FIELD STATISTICS")
|
| 533 |
+
output_lines.append("-" * 100)
|
| 534 |
+
output_lines.append(f"{'Field Name':<50} {'Exact Match':<15} {'Avg Accuracy':<15} {'Avg Edit Dist':<15}")
|
| 535 |
+
output_lines.append("-" * 100)
|
| 536 |
+
|
| 537 |
+
for field_name, stats in sorted_fields:
|
| 538 |
+
exact_match_pct = stats['exact_match_rate'] * 100
|
| 539 |
+
avg_acc = stats['average_normalized_accuracy'] * 100
|
| 540 |
+
avg_edit = stats['average_edit_distance']
|
| 541 |
+
|
| 542 |
+
output_lines.append(
|
| 543 |
+
f"{field_name:<50} {exact_match_pct:>6.2f}% ({stats['exact_matches']}/{stats['total_records']}) "
|
| 544 |
+
f"{avg_acc:>6.2f}% {avg_edit:>6.2f}"
|
| 545 |
+
)
|
| 546 |
+
|
| 547 |
+
output_lines.append("")
|
| 548 |
+
output_lines.append("")
|
| 549 |
+
|
| 550 |
+
# Detailed examples for fields with errors
|
| 551 |
+
output_lines.append("EXAMPLES OF MISMATCHES (Top 3 per field)")
|
| 552 |
+
output_lines.append("-" * 100)
|
| 553 |
+
|
| 554 |
+
for field_name, stats in sorted_fields:
|
| 555 |
+
if stats['exact_matches'] < stats['total_records'] and stats['examples']:
|
| 556 |
+
output_lines.append(f"\nField: {field_name}")
|
| 557 |
+
output_lines.append(f" Exact Match Rate: {stats['exact_match_rate']:.2%}")
|
| 558 |
+
output_lines.append(f" Average Accuracy: {stats['average_normalized_accuracy']:.2%}")
|
| 559 |
+
|
| 560 |
+
for example in stats['examples']:
|
| 561 |
+
output_lines.append(f" Record {example['record_idx']}:")
|
| 562 |
+
output_lines.append(f" Label: '{example['label']}'")
|
| 563 |
+
output_lines.append(f" Predict: '{example['predict']}'")
|
| 564 |
+
output_lines.append(f" Edit Distance: {example['edit_distance']}, "
|
| 565 |
+
f"Accuracy: {example['accuracy']:.2%}")
|
| 566 |
+
|
| 567 |
+
# Print to console
|
| 568 |
+
output_text = "\n".join(output_lines)
|
| 569 |
+
print(output_text)
|
| 570 |
+
|
| 571 |
+
# Save to file if specified
|
| 572 |
+
if output_file:
|
| 573 |
+
with open(output_file, 'w', encoding='utf-8') as f:
|
| 574 |
+
f.write(output_text)
|
| 575 |
+
print(f"\nResults saved to: {output_file}")
|
| 576 |
+
|
| 577 |
+
|
| 578 |
+
def main():
|
| 579 |
+
"""Main function to run the OCR accuracy calculation."""
|
| 580 |
+
import argparse
|
| 581 |
+
|
| 582 |
+
parser = argparse.ArgumentParser(
|
| 583 |
+
description='Calculate OCR accuracy using edit distance between label and predict data. '
|
| 584 |
+
'Supports JSON files and BigQuery SQL query files (.sql). '
|
| 585 |
+
'Can use either two separate files (--label and --predict) or a single '
|
| 586 |
+
'combined file (--data) with label fields having a suffix (e.g., _label).'
|
| 587 |
+
)
|
| 588 |
+
|
| 589 |
+
# Mode selection: either combined data or separate label/predict files
|
| 590 |
+
input_group = parser.add_mutually_exclusive_group(required=True)
|
| 591 |
+
input_group.add_argument(
|
| 592 |
+
'--data',
|
| 593 |
+
type=str,
|
| 594 |
+
help='Path to combined data file (JSON or SQL) with label and predict fields in same records. '
|
| 595 |
+
'Label fields should have a suffix (default: _label). Use this for queries like data.sql'
|
| 596 |
+
)
|
| 597 |
+
input_group.add_argument(
|
| 598 |
+
'--label',
|
| 599 |
+
type=str,
|
| 600 |
+
help='Path to label JSON file or SQL query file (use with --predict)'
|
| 601 |
+
)
|
| 602 |
+
|
| 603 |
+
parser.add_argument(
|
| 604 |
+
'--predict',
|
| 605 |
+
type=str,
|
| 606 |
+
help='Path to predict JSON file or SQL query file (use with --label)'
|
| 607 |
+
)
|
| 608 |
+
parser.add_argument(
|
| 609 |
+
'--output',
|
| 610 |
+
type=str,
|
| 611 |
+
default=None,
|
| 612 |
+
help='Path to output file for results (optional)'
|
| 613 |
+
)
|
| 614 |
+
parser.add_argument(
|
| 615 |
+
'--normalize',
|
| 616 |
+
action='store_true',
|
| 617 |
+
help='Normalize strings (lowercase, strip) before comparison'
|
| 618 |
+
)
|
| 619 |
+
parser.add_argument(
|
| 620 |
+
'--json-output',
|
| 621 |
+
type=str,
|
| 622 |
+
default=None,
|
| 623 |
+
help='Path to save results as JSON (optional)'
|
| 624 |
+
)
|
| 625 |
+
parser.add_argument(
|
| 626 |
+
'--match-keys',
|
| 627 |
+
type=str,
|
| 628 |
+
nargs='+',
|
| 629 |
+
default=['filename', 'project_id_mother'],
|
| 630 |
+
help='Field names to use for matching records (default: filename project_id_mother). '
|
| 631 |
+
'Only used with --label/--predict mode.'
|
| 632 |
+
)
|
| 633 |
+
parser.add_argument(
|
| 634 |
+
'--project-id',
|
| 635 |
+
type=str,
|
| 636 |
+
default=None,
|
| 637 |
+
help='GCP project ID for BigQuery (default: uses default from environment)'
|
| 638 |
+
)
|
| 639 |
+
parser.add_argument(
|
| 640 |
+
'--label-suffix',
|
| 641 |
+
type=str,
|
| 642 |
+
default='_label',
|
| 643 |
+
help='Suffix used to identify label fields in combined data mode (default: _label)'
|
| 644 |
+
)
|
| 645 |
+
|
| 646 |
+
args = parser.parse_args()
|
| 647 |
+
|
| 648 |
+
# Validate arguments based on mode
|
| 649 |
+
if args.data:
|
| 650 |
+
# Combined data mode
|
| 651 |
+
data_path = Path(args.data)
|
| 652 |
+
if not data_path.exists():
|
| 653 |
+
print(f"Error: Data file not found: {data_path}")
|
| 654 |
+
sys.exit(1)
|
| 655 |
+
|
| 656 |
+
print(f"Calculating OCR accuracy from combined data...")
|
| 657 |
+
print(f"Data file: {data_path}")
|
| 658 |
+
print(f"Label suffix: {args.label_suffix}")
|
| 659 |
+
print(f"Normalize: {args.normalize}")
|
| 660 |
+
print()
|
| 661 |
+
|
| 662 |
+
try:
|
| 663 |
+
results = calculate_ocr_accuracy_from_combined_data(
|
| 664 |
+
str(data_path),
|
| 665 |
+
normalize=args.normalize,
|
| 666 |
+
project_id=args.project_id,
|
| 667 |
+
label_suffix=args.label_suffix
|
| 668 |
+
)
|
| 669 |
+
|
| 670 |
+
# Print results
|
| 671 |
+
print_results(results, args.output)
|
| 672 |
+
|
| 673 |
+
# Save JSON output if requested
|
| 674 |
+
if args.json_output:
|
| 675 |
+
with open(args.json_output, 'w', encoding='utf-8') as f:
|
| 676 |
+
json.dump(results, f, indent=2, ensure_ascii=False)
|
| 677 |
+
print(f"\nJSON results saved to: {args.json_output}")
|
| 678 |
+
except Exception as e:
|
| 679 |
+
print(f"Error: {e}", file=sys.stderr)
|
| 680 |
+
sys.exit(1)
|
| 681 |
+
|
| 682 |
+
else:
|
| 683 |
+
# Separate label/predict files mode
|
| 684 |
+
if not args.label or not args.predict:
|
| 685 |
+
parser.error("--label and --predict are required when not using --data mode")
|
| 686 |
+
|
| 687 |
+
label_path = Path(args.label)
|
| 688 |
+
predict_path = Path(args.predict)
|
| 689 |
+
|
| 690 |
+
if not label_path.exists():
|
| 691 |
+
print(f"Error: Label file not found: {label_path}")
|
| 692 |
+
sys.exit(1)
|
| 693 |
+
|
| 694 |
+
if not predict_path.exists():
|
| 695 |
+
print(f"Error: Predict file not found: {predict_path}")
|
| 696 |
+
sys.exit(1)
|
| 697 |
+
|
| 698 |
+
# Calculate accuracy
|
| 699 |
+
print(f"Calculating OCR accuracy...")
|
| 700 |
+
print(f"Label file: {label_path}")
|
| 701 |
+
print(f"Predict file: {predict_path}")
|
| 702 |
+
print(f"Normalize: {args.normalize}")
|
| 703 |
+
print(f"Match keys: {args.match_keys}")
|
| 704 |
+
print()
|
| 705 |
+
|
| 706 |
+
try:
|
| 707 |
+
results = calculate_ocr_accuracy(
|
| 708 |
+
str(label_path),
|
| 709 |
+
str(predict_path),
|
| 710 |
+
normalize=args.normalize,
|
| 711 |
+
match_keys=args.match_keys,
|
| 712 |
+
project_id=args.project_id
|
| 713 |
+
)
|
| 714 |
+
|
| 715 |
+
# Print results
|
| 716 |
+
print_results(results, args.output)
|
| 717 |
+
|
| 718 |
+
# Save JSON output if requested
|
| 719 |
+
if args.json_output:
|
| 720 |
+
with open(args.json_output, 'w', encoding='utf-8') as f:
|
| 721 |
+
json.dump(results, f, indent=2, ensure_ascii=False)
|
| 722 |
+
print(f"\nJSON results saved to: {args.json_output}")
|
| 723 |
+
except Exception as e:
|
| 724 |
+
print(f"Error: {e}", file=sys.stderr)
|
| 725 |
+
sys.exit(1)
|
| 726 |
+
|
| 727 |
+
|
| 728 |
+
if __name__ == '__main__':
|
| 729 |
+
main()
|
data.sql
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
WITH dedup_master_files AS (
|
| 2 |
+
SELECT *
|
| 3 |
+
FROM (
|
| 4 |
+
SELECT
|
| 5 |
+
*,
|
| 6 |
+
LOWER(
|
| 7 |
+
REGEXP_REPLACE(original_filename, r'^(Copy of\s*)+', '')
|
| 8 |
+
) AS cleaned_filename,
|
| 9 |
+
ROW_NUMBER() OVER (
|
| 10 |
+
PARTITION BY original_filename
|
| 11 |
+
ORDER BY created_at DESC
|
| 12 |
+
) AS rn
|
| 13 |
+
FROM `stellar-orb-451904-d9.raw_data_ocr_kia.master_files`
|
| 14 |
+
)
|
| 15 |
+
WHERE rn = 1
|
| 16 |
+
),
|
| 17 |
+
dedup_page1_cover AS (
|
| 18 |
+
SELECT *
|
| 19 |
+
FROM (
|
| 20 |
+
SELECT
|
| 21 |
+
*,
|
| 22 |
+
ROW_NUMBER() OVER (
|
| 23 |
+
PARTITION BY file_id
|
| 24 |
+
ORDER BY created_at DESC
|
| 25 |
+
) AS rn
|
| 26 |
+
FROM `stellar-orb-451904-d9.raw_data_ocr_kia.page1_cover`
|
| 27 |
+
)
|
| 28 |
+
WHERE rn = 1
|
| 29 |
+
),
|
| 30 |
+
|
| 31 |
+
dedup_page2_identitas AS (
|
| 32 |
+
SELECT *
|
| 33 |
+
FROM (
|
| 34 |
+
SELECT
|
| 35 |
+
*,
|
| 36 |
+
ROW_NUMBER() OVER (
|
| 37 |
+
PARTITION BY file_id
|
| 38 |
+
ORDER BY inserted_at DESC
|
| 39 |
+
) AS rn
|
| 40 |
+
FROM `stellar-orb-451904-d9.raw_data_ocr_kia.page2_identitas`
|
| 41 |
+
)
|
| 42 |
+
WHERE rn = 1
|
| 43 |
+
),
|
| 44 |
+
|
| 45 |
+
label AS (
|
| 46 |
+
SELECT
|
| 47 |
+
filename AS filename_label,
|
| 48 |
+
project_id_mother AS project_id_mother_label,
|
| 49 |
+
CASE
|
| 50 |
+
WHEN first_name IS NOT NULL
|
| 51 |
+
AND last_name IS NOT NULL
|
| 52 |
+
AND first_name = last_name
|
| 53 |
+
THEN first_name
|
| 54 |
+
ELSE CONCAT(
|
| 55 |
+
IFNULL(first_name, ''),
|
| 56 |
+
IFNULL(
|
| 57 |
+
CONCAT(' ', last_name),
|
| 58 |
+
''
|
| 59 |
+
)
|
| 60 |
+
)
|
| 61 |
+
END AS nama_ibu_cover_label,
|
| 62 |
+
puskesmas_name AS dikeluarkan_oleh_fasilitas_kesehatan_label,
|
| 63 |
+
regency_name AS kabupaten_kota_label,
|
| 64 |
+
CASE
|
| 65 |
+
WHEN first_name IS NOT NULL
|
| 66 |
+
AND last_name IS NOT NULL
|
| 67 |
+
AND first_name = last_name
|
| 68 |
+
THEN first_name
|
| 69 |
+
ELSE CONCAT(
|
| 70 |
+
IFNULL(first_name, ''),
|
| 71 |
+
IFNULL(
|
| 72 |
+
CONCAT(' ', last_name),
|
| 73 |
+
''
|
| 74 |
+
)
|
| 75 |
+
)
|
| 76 |
+
END AS nama_ibu,
|
| 77 |
+
nik_mother AS nik_ibu_label,
|
| 78 |
+
birth_date AS tempat_tanggal_lahir_ibu_label,
|
| 79 |
+
address_street AS alamat_rumah_ibu_label,
|
| 80 |
+
contact_number AS telepon_ibu_label,
|
| 81 |
+
bpjs_mother AS no_jkn_ibu_label,
|
| 82 |
+
education_level AS pendidikan_ibu_label,
|
| 83 |
+
occupation AS pekerjaan_ibu_label,
|
| 84 |
+
blood_type_result AS golongan_darah_ibu_label,
|
| 85 |
+
age AS usia_ibu_label,
|
| 86 |
+
pregnancy_number AS kehamilan_ke_label,
|
| 87 |
+
number_live_birth AS jumlah_anak_lahir_hidup_label,
|
| 88 |
+
number_birth_lost AS riwayat_keguguran_label,
|
| 89 |
+
previous_preg_issue AS riwayat_penyakit_ibu_label
|
| 90 |
+
FROM (
|
| 91 |
+
SELECT
|
| 92 |
+
*,
|
| 93 |
+
ROW_NUMBER() OVER (
|
| 94 |
+
PARTITION BY `filename`
|
| 95 |
+
ORDER BY `end` DESC
|
| 96 |
+
) AS rn
|
| 97 |
+
FROM `stellar-orb-451904-d9.ocr_bukukia_ocr_sid.List Correct Entry 2025`
|
| 98 |
+
)
|
| 99 |
+
WHERE
|
| 100 |
+
rn = 1
|
| 101 |
+
)
|
| 102 |
+
,
|
| 103 |
+
main AS (
|
| 104 |
+
SELECT
|
| 105 |
+
MF.id,
|
| 106 |
+
MF.cleaned_filename AS `filename`,
|
| 107 |
+
REGEXP_EXTRACT(MF.original_filename, r'(7000\d{4,})') AS project_id_mother,
|
| 108 |
+
P1.nama_ibu_cover,
|
| 109 |
+
P1.dikeluarkan_oleh_fasilitas_kesehatan,
|
| 110 |
+
P1.kabupaten_kota,
|
| 111 |
+
P2.nama_ibu,
|
| 112 |
+
P2.nik_ibu,
|
| 113 |
+
P2.tempat_tanggal_lahir_ibu,
|
| 114 |
+
P2.alamat_rumah_ibu,
|
| 115 |
+
P2.telepon_ibu,
|
| 116 |
+
P2.no_jkn_ibu,
|
| 117 |
+
P2.pendidikan_ibu,
|
| 118 |
+
P2.pekerjaan_ibu,
|
| 119 |
+
P2.golongan_darah_ibu,
|
| 120 |
+
P2.usia_ibu,
|
| 121 |
+
P2.kehamilan_ke,
|
| 122 |
+
P2.jumlah_anak_lahir_hidup,
|
| 123 |
+
P2.riwayat_keguguran,
|
| 124 |
+
P2.riwayat_penyakit_ibu,
|
| 125 |
+
L.*,
|
| 126 |
+
ROW_NUMBER() OVER (
|
| 127 |
+
PARTITION BY MF.`cleaned_filename`
|
| 128 |
+
ORDER BY MF.`cleaned_filename` DESC
|
| 129 |
+
) AS rn
|
| 130 |
+
FROM dedup_master_files MF
|
| 131 |
+
LEFT JOIN dedup_page1_cover P1 ON MF.id = P1.id
|
| 132 |
+
LEFT JOIN dedup_page2_identitas P2 ON MF.id = P2.id
|
| 133 |
+
LEFT JOIN label L ON CAST(REGEXP_EXTRACT(MF.original_filename, r'(7000\d{4,})') AS INTEGER) = L.project_id_mother_label
|
| 134 |
+
)
|
| 135 |
+
|
| 136 |
+
SELECT *
|
| 137 |
+
FROM main
|
| 138 |
+
WHERE
|
| 139 |
+
rn = 1 and
|
| 140 |
+
id != "9a21ef00-6b02-4b04-81c3-68e25e2c8b7f" --duplicated
|
ocr_accuracy_dashboard.py
ADDED
|
@@ -0,0 +1,238 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Gradio dashboard for visualizing OCR accuracy results."""
|
| 2 |
+
|
| 3 |
+
import json
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
from typing import Any
|
| 6 |
+
|
| 7 |
+
import gradio as gr
|
| 8 |
+
import pandas as pd
|
| 9 |
+
import plotly.express as px
|
| 10 |
+
import plotly.graph_objects as go
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def load_accuracy_data(json_path: str | Path) -> dict[str, Any]:
|
| 14 |
+
"""Load accuracy data from JSON file."""
|
| 15 |
+
with open(json_path, "r", encoding="utf-8") as f:
|
| 16 |
+
return json.load(f)
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def create_summary_stats(data: dict[str, Any]) -> pd.DataFrame:
|
| 20 |
+
"""Create summary statistics DataFrame with percent-style floats (2 decimals)."""
|
| 21 |
+
summary_data = []
|
| 22 |
+
for field_name, metrics in data.items():
|
| 23 |
+
summary_data.append(
|
| 24 |
+
{
|
| 25 |
+
"Field": field_name.replace("_", " ").title(),
|
| 26 |
+
"Exact Match Rate": metrics['exact_match_rate'],
|
| 27 |
+
"Average Accuracy": metrics['average_normalized_accuracy'],
|
| 28 |
+
"Min Accuracy": metrics['min_accuracy'],
|
| 29 |
+
"Average Edit Distance": metrics['average_edit_distance'],
|
| 30 |
+
"Total Records": metrics['total_records'],
|
| 31 |
+
"Exact Matches": metrics['exact_matches'],
|
| 32 |
+
}
|
| 33 |
+
)
|
| 34 |
+
df = pd.DataFrame(summary_data)
|
| 35 |
+
df = df.sort_values("Average Accuracy", ascending=False)
|
| 36 |
+
df["Average Accuracy"] = df["Average Accuracy"].apply(lambda x: f"{x:.2%}")
|
| 37 |
+
df["Exact Match Rate"] = df["Exact Match Rate"].apply(lambda x: f"{x:.2%}")
|
| 38 |
+
df["Min Accuracy"] = df["Min Accuracy"].apply(lambda x: f"{x:.2%}")
|
| 39 |
+
return df
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def create_accuracy_chart(data: dict[str, Any]) -> go.Figure:
|
| 43 |
+
"""Create bar chart of average normalized accuracy by field."""
|
| 44 |
+
# Sort by average normalized accuracy (descending - best first)
|
| 45 |
+
sorted_items = sorted(
|
| 46 |
+
data.items(),
|
| 47 |
+
key=lambda x: x[1]["average_normalized_accuracy"],
|
| 48 |
+
reverse=True,
|
| 49 |
+
)
|
| 50 |
+
|
| 51 |
+
fields = []
|
| 52 |
+
accuracies = []
|
| 53 |
+
exact_match_rates = []
|
| 54 |
+
|
| 55 |
+
for field_name, metrics in sorted_items:
|
| 56 |
+
fields.append(field_name.replace("_", " ").title())
|
| 57 |
+
accuracies.append(metrics["average_normalized_accuracy"])
|
| 58 |
+
exact_match_rates.append(metrics["exact_match_rate"])
|
| 59 |
+
|
| 60 |
+
fig = go.Figure()
|
| 61 |
+
fig.add_trace(
|
| 62 |
+
go.Bar(
|
| 63 |
+
name="Average Normalized Accuracy",
|
| 64 |
+
x=fields,
|
| 65 |
+
y=accuracies,
|
| 66 |
+
marker_color="lightblue",
|
| 67 |
+
)
|
| 68 |
+
)
|
| 69 |
+
fig.add_trace(
|
| 70 |
+
go.Bar(
|
| 71 |
+
name="Exact Match Rate",
|
| 72 |
+
x=fields,
|
| 73 |
+
y=exact_match_rates,
|
| 74 |
+
marker_color="lightcoral",
|
| 75 |
+
)
|
| 76 |
+
)
|
| 77 |
+
|
| 78 |
+
fig.update_layout(
|
| 79 |
+
title="OCR Accuracy Metrics by Field",
|
| 80 |
+
xaxis_title="Field",
|
| 81 |
+
yaxis_title="Accuracy Rate",
|
| 82 |
+
barmode="group",
|
| 83 |
+
height=600,
|
| 84 |
+
xaxis={"tickangle": -45},
|
| 85 |
+
)
|
| 86 |
+
return fig
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
def create_edit_distance_chart(data: dict[str, Any]) -> go.Figure:
|
| 90 |
+
"""Create bar chart of average edit distance by field."""
|
| 91 |
+
# Sort by average edit distance (ascending - best first, lower is better)
|
| 92 |
+
sorted_items = sorted(
|
| 93 |
+
data.items(),
|
| 94 |
+
key=lambda x: x[1]["average_edit_distance"],
|
| 95 |
+
)
|
| 96 |
+
|
| 97 |
+
fields = []
|
| 98 |
+
edit_distances = []
|
| 99 |
+
|
| 100 |
+
for field_name, metrics in sorted_items:
|
| 101 |
+
fields.append(field_name.replace("_", " ").title())
|
| 102 |
+
edit_distances.append(metrics["average_edit_distance"])
|
| 103 |
+
|
| 104 |
+
fig = px.bar(
|
| 105 |
+
x=fields,
|
| 106 |
+
y=edit_distances,
|
| 107 |
+
title="Average Edit Distance by Field",
|
| 108 |
+
labels={"x": "Field", "y": "Average Edit Distance"},
|
| 109 |
+
color=edit_distances,
|
| 110 |
+
color_continuous_scale="Reds",
|
| 111 |
+
)
|
| 112 |
+
fig.update_layout(height=600, xaxis={"tickangle": -45})
|
| 113 |
+
return fig
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
def get_field_examples(data: dict[str, Any], field_name: str) -> pd.DataFrame:
|
| 117 |
+
"""Get examples for a specific field."""
|
| 118 |
+
if field_name not in data:
|
| 119 |
+
return pd.DataFrame()
|
| 120 |
+
|
| 121 |
+
examples = data[field_name].get("examples", [])
|
| 122 |
+
if not examples:
|
| 123 |
+
return pd.DataFrame(
|
| 124 |
+
columns=["Record Index", "Label", "Prediction", "Edit Distance", "Accuracy"]
|
| 125 |
+
)
|
| 126 |
+
|
| 127 |
+
example_data = []
|
| 128 |
+
for ex in examples:
|
| 129 |
+
example_data.append(
|
| 130 |
+
{
|
| 131 |
+
"Record Index": ex["record_idx"],
|
| 132 |
+
"Label": ex["label"],
|
| 133 |
+
"Prediction": ex["predict"],
|
| 134 |
+
"Edit Distance": ex["edit_distance"],
|
| 135 |
+
"Accuracy": f"{ex['accuracy']:.2%}",
|
| 136 |
+
}
|
| 137 |
+
)
|
| 138 |
+
return pd.DataFrame(example_data)
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
def create_dashboard(json_path: str | Path):
|
| 142 |
+
"""Create and launch Gradio dashboard."""
|
| 143 |
+
# Load data
|
| 144 |
+
data = load_accuracy_data(json_path)
|
| 145 |
+
|
| 146 |
+
# Create components
|
| 147 |
+
with gr.Blocks(title="OCR Accuracy Dashboard") as demo:
|
| 148 |
+
gr.Markdown(
|
| 149 |
+
"""
|
| 150 |
+
# 📊 OCR Accuracy Dashboard
|
| 151 |
+
Visualize and analyze OCR accuracy results from KIA document processing.
|
| 152 |
+
"""
|
| 153 |
+
)
|
| 154 |
+
|
| 155 |
+
with gr.Tabs():
|
| 156 |
+
# Summary Tab
|
| 157 |
+
with gr.Tab("📈 Summary"):
|
| 158 |
+
gr.Markdown("### Overall Statistics")
|
| 159 |
+
summary_df = create_summary_stats(data)
|
| 160 |
+
summary_table = gr.Dataframe(
|
| 161 |
+
value=summary_df,
|
| 162 |
+
interactive=False,
|
| 163 |
+
wrap=True,
|
| 164 |
+
)
|
| 165 |
+
|
| 166 |
+
gr.Markdown("### Accuracy Metrics Comparison")
|
| 167 |
+
accuracy_chart = gr.Plot(
|
| 168 |
+
value=create_accuracy_chart(data),
|
| 169 |
+
label="Accuracy Metrics",
|
| 170 |
+
)
|
| 171 |
+
|
| 172 |
+
gr.Markdown("### Edit Distance Analysis")
|
| 173 |
+
edit_distance_chart = gr.Plot(
|
| 174 |
+
value=create_edit_distance_chart(data),
|
| 175 |
+
label="Edit Distance",
|
| 176 |
+
)
|
| 177 |
+
|
| 178 |
+
# Field Details Tab
|
| 179 |
+
with gr.Tab("🔍 Field Details"):
|
| 180 |
+
gr.Markdown("### Select a field to view detailed examples")
|
| 181 |
+
field_dropdown = gr.Dropdown(
|
| 182 |
+
choices=[name.replace("_", " ").title() for name in data.keys()],
|
| 183 |
+
label="Select Field",
|
| 184 |
+
value=list(data.keys())[0].replace("_", " ").title(),
|
| 185 |
+
)
|
| 186 |
+
|
| 187 |
+
def update_field_details(field_display_name: str):
|
| 188 |
+
# Find the original field name
|
| 189 |
+
field_name = None
|
| 190 |
+
for name in data.keys():
|
| 191 |
+
if name.replace("_", " ").title() == field_display_name:
|
| 192 |
+
field_name = name
|
| 193 |
+
break
|
| 194 |
+
|
| 195 |
+
if not field_name:
|
| 196 |
+
return "", pd.DataFrame()
|
| 197 |
+
|
| 198 |
+
metrics = data[field_name]
|
| 199 |
+
metrics_text = f"""
|
| 200 |
+
### {field_display_name}
|
| 201 |
+
|
| 202 |
+
- **Exact Match Rate**: {metrics['exact_match_rate']:.2%}
|
| 203 |
+
- **Average Normalized Accuracy**: {metrics['average_normalized_accuracy']:.2%}
|
| 204 |
+
- **Min Accuracy**: {metrics['min_accuracy']:.2%}
|
| 205 |
+
- **Average Edit Distance**: {metrics['average_edit_distance']:.2f}
|
| 206 |
+
- **Max Edit Distance**: {metrics['max_edit_distance']}
|
| 207 |
+
- **Exact Matches**: {metrics['exact_matches']} / {metrics['total_records']}
|
| 208 |
+
"""
|
| 209 |
+
examples_df = get_field_examples(data, field_name)
|
| 210 |
+
return metrics_text, examples_df
|
| 211 |
+
|
| 212 |
+
# Initialize with first field
|
| 213 |
+
first_field = list(data.keys())[0].replace("_", " ").title()
|
| 214 |
+
initial_metrics, initial_examples = update_field_details(first_field)
|
| 215 |
+
|
| 216 |
+
field_metrics = gr.Markdown(value=initial_metrics)
|
| 217 |
+
examples_table = gr.Dataframe(
|
| 218 |
+
value=initial_examples,
|
| 219 |
+
interactive=False,
|
| 220 |
+
wrap=True,
|
| 221 |
+
)
|
| 222 |
+
|
| 223 |
+
field_dropdown.change(
|
| 224 |
+
fn=update_field_details,
|
| 225 |
+
inputs=field_dropdown,
|
| 226 |
+
outputs=[field_metrics, examples_table],
|
| 227 |
+
)
|
| 228 |
+
|
| 229 |
+
return demo
|
| 230 |
+
|
| 231 |
+
|
| 232 |
+
if __name__ == "__main__":
|
| 233 |
+
# Default path to the JSON file
|
| 234 |
+
json_file = Path(__file__).parent / "results.json"
|
| 235 |
+
|
| 236 |
+
# Create and launch dashboard
|
| 237 |
+
demo = create_dashboard(json_file)
|
| 238 |
+
demo.launch(server_name="0.0.0.0", server_port=7860, share=True)
|
results.json
ADDED
|
@@ -0,0 +1,514 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"alamat_rumah_ibu": {
|
| 3 |
+
"exact_match_rate": 0.0,
|
| 4 |
+
"average_edit_distance": 11.180851063829786,
|
| 5 |
+
"average_normalized_accuracy": 0.13064494103170882,
|
| 6 |
+
"min_accuracy": 0.0,
|
| 7 |
+
"max_edit_distance": 33,
|
| 8 |
+
"exact_matches": 0,
|
| 9 |
+
"total_records": 94,
|
| 10 |
+
"examples": [
|
| 11 |
+
{
|
| 12 |
+
"record_idx": 0,
|
| 13 |
+
"label": "",
|
| 14 |
+
"predict": "Tebon Selatan",
|
| 15 |
+
"edit_distance": 13,
|
| 16 |
+
"accuracy": 0.0
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"record_idx": 1,
|
| 20 |
+
"label": "",
|
| 21 |
+
"predict": "PANGKALAN BUNTE.",
|
| 22 |
+
"edit_distance": 16,
|
| 23 |
+
"accuracy": 0.0
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"record_idx": 2,
|
| 27 |
+
"label": "",
|
| 28 |
+
"predict": "BENDUNG SELATAN, KILANG",
|
| 29 |
+
"edit_distance": 23,
|
| 30 |
+
"accuracy": 0.0
|
| 31 |
+
}
|
| 32 |
+
]
|
| 33 |
+
},
|
| 34 |
+
"dikeluarkan_oleh_fasilitas_kesehatan": {
|
| 35 |
+
"exact_match_rate": 0.05,
|
| 36 |
+
"average_edit_distance": 7.98,
|
| 37 |
+
"average_normalized_accuracy": 0.34242116726433275,
|
| 38 |
+
"min_accuracy": 0.0,
|
| 39 |
+
"max_edit_distance": 20,
|
| 40 |
+
"exact_matches": 5,
|
| 41 |
+
"total_records": 100,
|
| 42 |
+
"examples": [
|
| 43 |
+
{
|
| 44 |
+
"record_idx": 0,
|
| 45 |
+
"label": "Keruak",
|
| 46 |
+
"predict": "kruak",
|
| 47 |
+
"edit_distance": 1,
|
| 48 |
+
"accuracy": 0.8333333333333334
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"record_idx": 1,
|
| 52 |
+
"label": "Lenek",
|
| 53 |
+
"predict": "LETIEK",
|
| 54 |
+
"edit_distance": 2,
|
| 55 |
+
"accuracy": 0.6666666666666667
|
| 56 |
+
},
|
| 57 |
+
{
|
| 58 |
+
"record_idx": 2,
|
| 59 |
+
"label": "Montong Betok",
|
| 60 |
+
"predict": "PUSKESDES KILANG",
|
| 61 |
+
"edit_distance": 15,
|
| 62 |
+
"accuracy": 0.0625
|
| 63 |
+
}
|
| 64 |
+
]
|
| 65 |
+
},
|
| 66 |
+
"golongan_darah_ibu": {
|
| 67 |
+
"exact_match_rate": 0.2839506172839506,
|
| 68 |
+
"average_edit_distance": 0.9506172839506173,
|
| 69 |
+
"average_normalized_accuracy": 0.35020576131687237,
|
| 70 |
+
"min_accuracy": 0.0,
|
| 71 |
+
"max_edit_distance": 4,
|
| 72 |
+
"exact_matches": 23,
|
| 73 |
+
"total_records": 81,
|
| 74 |
+
"examples": [
|
| 75 |
+
{
|
| 76 |
+
"record_idx": 0,
|
| 77 |
+
"label": "o",
|
| 78 |
+
"predict": "",
|
| 79 |
+
"edit_distance": 1,
|
| 80 |
+
"accuracy": 0.0
|
| 81 |
+
},
|
| 82 |
+
{
|
| 83 |
+
"record_idx": 1,
|
| 84 |
+
"label": "b",
|
| 85 |
+
"predict": "B +",
|
| 86 |
+
"edit_distance": 2,
|
| 87 |
+
"accuracy": 0.33333333333333337
|
| 88 |
+
},
|
| 89 |
+
{
|
| 90 |
+
"record_idx": 4,
|
| 91 |
+
"label": "a",
|
| 92 |
+
"predict": "B A",
|
| 93 |
+
"edit_distance": 2,
|
| 94 |
+
"accuracy": 0.33333333333333337
|
| 95 |
+
}
|
| 96 |
+
]
|
| 97 |
+
},
|
| 98 |
+
"jumlah_anak_lahir_hidup": {
|
| 99 |
+
"exact_match_rate": 0.40860215053763443,
|
| 100 |
+
"average_edit_distance": 0.6559139784946236,
|
| 101 |
+
"average_normalized_accuracy": 0.4155145929339477,
|
| 102 |
+
"min_accuracy": 0.0,
|
| 103 |
+
"max_edit_distance": 6,
|
| 104 |
+
"exact_matches": 38,
|
| 105 |
+
"total_records": 93,
|
| 106 |
+
"examples": [
|
| 107 |
+
{
|
| 108 |
+
"record_idx": 3,
|
| 109 |
+
"label": "",
|
| 110 |
+
"predict": "-",
|
| 111 |
+
"edit_distance": 1,
|
| 112 |
+
"accuracy": 0.0
|
| 113 |
+
},
|
| 114 |
+
{
|
| 115 |
+
"record_idx": 4,
|
| 116 |
+
"label": "2",
|
| 117 |
+
"predict": "",
|
| 118 |
+
"edit_distance": 1,
|
| 119 |
+
"accuracy": 0.0
|
| 120 |
+
},
|
| 121 |
+
{
|
| 122 |
+
"record_idx": 7,
|
| 123 |
+
"label": "1",
|
| 124 |
+
"predict": "",
|
| 125 |
+
"edit_distance": 1,
|
| 126 |
+
"accuracy": 0.0
|
| 127 |
+
}
|
| 128 |
+
]
|
| 129 |
+
},
|
| 130 |
+
"kabupaten_kota": {
|
| 131 |
+
"exact_match_rate": 0.18,
|
| 132 |
+
"average_edit_distance": 6.73,
|
| 133 |
+
"average_normalized_accuracy": 0.44309218559218544,
|
| 134 |
+
"min_accuracy": 0.0,
|
| 135 |
+
"max_edit_distance": 14,
|
| 136 |
+
"exact_matches": 18,
|
| 137 |
+
"total_records": 100,
|
| 138 |
+
"examples": [
|
| 139 |
+
{
|
| 140 |
+
"record_idx": 0,
|
| 141 |
+
"label": "Lombok Timur",
|
| 142 |
+
"predict": "LOTIM",
|
| 143 |
+
"edit_distance": 7,
|
| 144 |
+
"accuracy": 0.41666666666666663
|
| 145 |
+
},
|
| 146 |
+
{
|
| 147 |
+
"record_idx": 1,
|
| 148 |
+
"label": "Lombok Timur",
|
| 149 |
+
"predict": "LOTIM",
|
| 150 |
+
"edit_distance": 7,
|
| 151 |
+
"accuracy": 0.41666666666666663
|
| 152 |
+
},
|
| 153 |
+
{
|
| 154 |
+
"record_idx": 3,
|
| 155 |
+
"label": "Lombok Timur",
|
| 156 |
+
"predict": "LOTIM",
|
| 157 |
+
"edit_distance": 7,
|
| 158 |
+
"accuracy": 0.41666666666666663
|
| 159 |
+
}
|
| 160 |
+
]
|
| 161 |
+
},
|
| 162 |
+
"kehamilan_ke": {
|
| 163 |
+
"exact_match_rate": 0.3978494623655914,
|
| 164 |
+
"average_edit_distance": 0.946236559139785,
|
| 165 |
+
"average_normalized_accuracy": 0.44871138419525514,
|
| 166 |
+
"min_accuracy": 0.0,
|
| 167 |
+
"max_edit_distance": 12,
|
| 168 |
+
"exact_matches": 37,
|
| 169 |
+
"total_records": 93,
|
| 170 |
+
"examples": [
|
| 171 |
+
{
|
| 172 |
+
"record_idx": 2,
|
| 173 |
+
"label": "2",
|
| 174 |
+
"predict": "6219041",
|
| 175 |
+
"edit_distance": 6,
|
| 176 |
+
"accuracy": 0.1428571428571429
|
| 177 |
+
},
|
| 178 |
+
{
|
| 179 |
+
"record_idx": 3,
|
| 180 |
+
"label": "",
|
| 181 |
+
"predict": "1",
|
| 182 |
+
"edit_distance": 1,
|
| 183 |
+
"accuracy": 0.0
|
| 184 |
+
},
|
| 185 |
+
{
|
| 186 |
+
"record_idx": 4,
|
| 187 |
+
"label": "3",
|
| 188 |
+
"predict": "",
|
| 189 |
+
"edit_distance": 1,
|
| 190 |
+
"accuracy": 0.0
|
| 191 |
+
}
|
| 192 |
+
]
|
| 193 |
+
},
|
| 194 |
+
"nama_ibu_cover": {
|
| 195 |
+
"exact_match_rate": 0.56,
|
| 196 |
+
"average_edit_distance": 1.68,
|
| 197 |
+
"average_normalized_accuracy": 0.8923876665746872,
|
| 198 |
+
"min_accuracy": 0.0,
|
| 199 |
+
"max_edit_distance": 15,
|
| 200 |
+
"exact_matches": 56,
|
| 201 |
+
"total_records": 100,
|
| 202 |
+
"examples": [
|
| 203 |
+
{
|
| 204 |
+
"record_idx": 1,
|
| 205 |
+
"label": "Asmini Wati",
|
| 206 |
+
"predict": "ASMIHI WATI",
|
| 207 |
+
"edit_distance": 1,
|
| 208 |
+
"accuracy": 0.9090909090909091
|
| 209 |
+
},
|
| 210 |
+
{
|
| 211 |
+
"record_idx": 3,
|
| 212 |
+
"label": "Depa Apriani",
|
| 213 |
+
"predict": "PEPA APRIANI",
|
| 214 |
+
"edit_distance": 1,
|
| 215 |
+
"accuracy": 0.9166666666666666
|
| 216 |
+
},
|
| 217 |
+
{
|
| 218 |
+
"record_idx": 4,
|
| 219 |
+
"label": "ENI NURIANA FATHURRAHMAN",
|
| 220 |
+
"predict": "ERI HURIANA P",
|
| 221 |
+
"edit_distance": 14,
|
| 222 |
+
"accuracy": 0.41666666666666663
|
| 223 |
+
}
|
| 224 |
+
]
|
| 225 |
+
},
|
| 226 |
+
"nik_ibu": {
|
| 227 |
+
"exact_match_rate": 0.15053763440860216,
|
| 228 |
+
"average_edit_distance": 3.7204301075268815,
|
| 229 |
+
"average_normalized_accuracy": 0.7661248784584818,
|
| 230 |
+
"min_accuracy": 0.0,
|
| 231 |
+
"max_edit_distance": 18,
|
| 232 |
+
"exact_matches": 14,
|
| 233 |
+
"total_records": 93,
|
| 234 |
+
"examples": [
|
| 235 |
+
{
|
| 236 |
+
"record_idx": 0,
|
| 237 |
+
"label": "5203014210030001",
|
| 238 |
+
"predict": "S 003019216830001",
|
| 239 |
+
"edit_distance": 6,
|
| 240 |
+
"accuracy": 0.6470588235294117
|
| 241 |
+
},
|
| 242 |
+
{
|
| 243 |
+
"record_idx": 2,
|
| 244 |
+
"label": "5203035202980001",
|
| 245 |
+
"predict": "S20303S20298 0001",
|
| 246 |
+
"edit_distance": 3,
|
| 247 |
+
"accuracy": 0.8235294117647058
|
| 248 |
+
},
|
| 249 |
+
{
|
| 250 |
+
"record_idx": 3,
|
| 251 |
+
"label": "5203015901070003",
|
| 252 |
+
"predict": "S203015901090003",
|
| 253 |
+
"edit_distance": 2,
|
| 254 |
+
"accuracy": 0.875
|
| 255 |
+
}
|
| 256 |
+
]
|
| 257 |
+
},
|
| 258 |
+
"no_jkn_ibu": {
|
| 259 |
+
"exact_match_rate": 0.0,
|
| 260 |
+
"average_edit_distance": 7.730769230769231,
|
| 261 |
+
"average_normalized_accuracy": 0.19083130293537534,
|
| 262 |
+
"min_accuracy": 0.0,
|
| 263 |
+
"max_edit_distance": 16,
|
| 264 |
+
"exact_matches": 0,
|
| 265 |
+
"total_records": 26,
|
| 266 |
+
"examples": [
|
| 267 |
+
{
|
| 268 |
+
"record_idx": 1,
|
| 269 |
+
"label": "",
|
| 270 |
+
"predict": "- 17 1 568 5 NA",
|
| 271 |
+
"edit_distance": 15,
|
| 272 |
+
"accuracy": 0.0
|
| 273 |
+
},
|
| 274 |
+
{
|
| 275 |
+
"record_idx": 4,
|
| 276 |
+
"label": "2190703127",
|
| 277 |
+
"predict": "0002190703127",
|
| 278 |
+
"edit_distance": 3,
|
| 279 |
+
"accuracy": 0.7692307692307692
|
| 280 |
+
},
|
| 281 |
+
{
|
| 282 |
+
"record_idx": 5,
|
| 283 |
+
"label": "",
|
| 284 |
+
"predict": "Q - ",
|
| 285 |
+
"edit_distance": 3,
|
| 286 |
+
"accuracy": 0.0
|
| 287 |
+
}
|
| 288 |
+
]
|
| 289 |
+
},
|
| 290 |
+
"pekerjaan_ibu": {
|
| 291 |
+
"exact_match_rate": 0.0,
|
| 292 |
+
"average_edit_distance": 12.646464646464647,
|
| 293 |
+
"average_normalized_accuracy": 0.17792207792207793,
|
| 294 |
+
"min_accuracy": 0.0,
|
| 295 |
+
"max_edit_distance": 16,
|
| 296 |
+
"exact_matches": 0,
|
| 297 |
+
"total_records": 99,
|
| 298 |
+
"examples": [
|
| 299 |
+
{
|
| 300 |
+
"record_idx": 0,
|
| 301 |
+
"label": "ibu_rumah_tangga",
|
| 302 |
+
"predict": "IRT",
|
| 303 |
+
"edit_distance": 13,
|
| 304 |
+
"accuracy": 0.1875
|
| 305 |
+
},
|
| 306 |
+
{
|
| 307 |
+
"record_idx": 1,
|
| 308 |
+
"label": "ibu_rumah_tangga",
|
| 309 |
+
"predict": "Irt.",
|
| 310 |
+
"edit_distance": 13,
|
| 311 |
+
"accuracy": 0.1875
|
| 312 |
+
},
|
| 313 |
+
{
|
| 314 |
+
"record_idx": 2,
|
| 315 |
+
"label": "ibu_rumah_tangga",
|
| 316 |
+
"predict": "IIT",
|
| 317 |
+
"edit_distance": 14,
|
| 318 |
+
"accuracy": 0.125
|
| 319 |
+
}
|
| 320 |
+
]
|
| 321 |
+
},
|
| 322 |
+
"pendidikan_ibu": {
|
| 323 |
+
"exact_match_rate": 0.58,
|
| 324 |
+
"average_edit_distance": 2.32,
|
| 325 |
+
"average_normalized_accuracy": 0.7334166666666667,
|
| 326 |
+
"min_accuracy": 0.0,
|
| 327 |
+
"max_edit_distance": 16,
|
| 328 |
+
"exact_matches": 58,
|
| 329 |
+
"total_records": 100,
|
| 330 |
+
"examples": [
|
| 331 |
+
{
|
| 332 |
+
"record_idx": 1,
|
| 333 |
+
"label": "sd",
|
| 334 |
+
"predict": "SD.",
|
| 335 |
+
"edit_distance": 1,
|
| 336 |
+
"accuracy": 0.6666666666666667
|
| 337 |
+
},
|
| 338 |
+
{
|
| 339 |
+
"record_idx": 5,
|
| 340 |
+
"label": "smp",
|
| 341 |
+
"predict": "SMP.",
|
| 342 |
+
"edit_distance": 1,
|
| 343 |
+
"accuracy": 0.75
|
| 344 |
+
},
|
| 345 |
+
{
|
| 346 |
+
"record_idx": 8,
|
| 347 |
+
"label": "sma",
|
| 348 |
+
"predict": "SLTA",
|
| 349 |
+
"edit_distance": 2,
|
| 350 |
+
"accuracy": 0.5
|
| 351 |
+
}
|
| 352 |
+
]
|
| 353 |
+
},
|
| 354 |
+
"riwayat_keguguran": {
|
| 355 |
+
"exact_match_rate": 0.3118279569892473,
|
| 356 |
+
"average_edit_distance": 0.6989247311827957,
|
| 357 |
+
"average_normalized_accuracy": 0.3118279569892473,
|
| 358 |
+
"min_accuracy": 0.0,
|
| 359 |
+
"max_edit_distance": 2,
|
| 360 |
+
"exact_matches": 29,
|
| 361 |
+
"total_records": 93,
|
| 362 |
+
"examples": [
|
| 363 |
+
{
|
| 364 |
+
"record_idx": 2,
|
| 365 |
+
"label": "0",
|
| 366 |
+
"predict": "-",
|
| 367 |
+
"edit_distance": 1,
|
| 368 |
+
"accuracy": 0.0
|
| 369 |
+
},
|
| 370 |
+
{
|
| 371 |
+
"record_idx": 3,
|
| 372 |
+
"label": "",
|
| 373 |
+
"predict": "-",
|
| 374 |
+
"edit_distance": 1,
|
| 375 |
+
"accuracy": 0.0
|
| 376 |
+
},
|
| 377 |
+
{
|
| 378 |
+
"record_idx": 4,
|
| 379 |
+
"label": "0",
|
| 380 |
+
"predict": "",
|
| 381 |
+
"edit_distance": 1,
|
| 382 |
+
"accuracy": 0.0
|
| 383 |
+
}
|
| 384 |
+
]
|
| 385 |
+
},
|
| 386 |
+
"riwayat_penyakit_ibu": {
|
| 387 |
+
"exact_match_rate": 0.0,
|
| 388 |
+
"average_edit_distance": 18.66,
|
| 389 |
+
"average_normalized_accuracy": 0.01044047619047619,
|
| 390 |
+
"min_accuracy": 0.0,
|
| 391 |
+
"max_edit_distance": 30,
|
| 392 |
+
"exact_matches": 0,
|
| 393 |
+
"total_records": 100,
|
| 394 |
+
"examples": [
|
| 395 |
+
{
|
| 396 |
+
"record_idx": 0,
|
| 397 |
+
"label": "other",
|
| 398 |
+
"predict": "-",
|
| 399 |
+
"edit_distance": 5,
|
| 400 |
+
"accuracy": 0.0
|
| 401 |
+
},
|
| 402 |
+
{
|
| 403 |
+
"record_idx": 1,
|
| 404 |
+
"label": "no_past_complications",
|
| 405 |
+
"predict": "-",
|
| 406 |
+
"edit_distance": 21,
|
| 407 |
+
"accuracy": 0.0
|
| 408 |
+
},
|
| 409 |
+
{
|
| 410 |
+
"record_idx": 2,
|
| 411 |
+
"label": "do_not_know_past_complications",
|
| 412 |
+
"predict": "-",
|
| 413 |
+
"edit_distance": 30,
|
| 414 |
+
"accuracy": 0.0
|
| 415 |
+
}
|
| 416 |
+
]
|
| 417 |
+
},
|
| 418 |
+
"telepon_ibu": {
|
| 419 |
+
"exact_match_rate": 0.0,
|
| 420 |
+
"average_edit_distance": 5.385714285714286,
|
| 421 |
+
"average_normalized_accuracy": 0.6077884615384617,
|
| 422 |
+
"min_accuracy": 0.0,
|
| 423 |
+
"max_edit_distance": 16,
|
| 424 |
+
"exact_matches": 0,
|
| 425 |
+
"total_records": 70,
|
| 426 |
+
"examples": [
|
| 427 |
+
{
|
| 428 |
+
"record_idx": 0,
|
| 429 |
+
"label": "6281929813493",
|
| 430 |
+
"predict": "081 929 813 493.",
|
| 431 |
+
"edit_distance": 6,
|
| 432 |
+
"accuracy": 0.625
|
| 433 |
+
},
|
| 434 |
+
{
|
| 435 |
+
"record_idx": 1,
|
| 436 |
+
"label": "",
|
| 437 |
+
"predict": "087 894 63 251",
|
| 438 |
+
"edit_distance": 14,
|
| 439 |
+
"accuracy": 0.0
|
| 440 |
+
},
|
| 441 |
+
{
|
| 442 |
+
"record_idx": 3,
|
| 443 |
+
"label": "6282340578115",
|
| 444 |
+
"predict": "082340578115",
|
| 445 |
+
"edit_distance": 2,
|
| 446 |
+
"accuracy": 0.8461538461538461
|
| 447 |
+
}
|
| 448 |
+
]
|
| 449 |
+
},
|
| 450 |
+
"tempat_tanggal_lahir_ibu": {
|
| 451 |
+
"exact_match_rate": 0.0,
|
| 452 |
+
"average_edit_distance": 10.747474747474747,
|
| 453 |
+
"average_normalized_accuracy": 0.23632201183326582,
|
| 454 |
+
"min_accuracy": 0.0,
|
| 455 |
+
"max_edit_distance": 17,
|
| 456 |
+
"exact_matches": 0,
|
| 457 |
+
"total_records": 99,
|
| 458 |
+
"examples": [
|
| 459 |
+
{
|
| 460 |
+
"record_idx": 0,
|
| 461 |
+
"label": "Oct 2, 03",
|
| 462 |
+
"predict": "TJ. Luar 0/10 2003",
|
| 463 |
+
"edit_distance": 14,
|
| 464 |
+
"accuracy": 0.2222222222222222
|
| 465 |
+
},
|
| 466 |
+
{
|
| 467 |
+
"record_idx": 1,
|
| 468 |
+
"label": "Feb 27, 91",
|
| 469 |
+
"predict": "27 -2 -1991",
|
| 470 |
+
"edit_distance": 8,
|
| 471 |
+
"accuracy": 0.2727272727272727
|
| 472 |
+
},
|
| 473 |
+
{
|
| 474 |
+
"record_idx": 2,
|
| 475 |
+
"label": "Feb 12, 98",
|
| 476 |
+
"predict": "sukadana 12-02-1998",
|
| 477 |
+
"edit_distance": 14,
|
| 478 |
+
"accuracy": 0.26315789473684215
|
| 479 |
+
}
|
| 480 |
+
]
|
| 481 |
+
},
|
| 482 |
+
"usia_ibu": {
|
| 483 |
+
"exact_match_rate": 0.0,
|
| 484 |
+
"average_edit_distance": 5.172413793103448,
|
| 485 |
+
"average_normalized_accuracy": 0.0,
|
| 486 |
+
"min_accuracy": 0.0,
|
| 487 |
+
"max_edit_distance": 8,
|
| 488 |
+
"exact_matches": 0,
|
| 489 |
+
"total_records": 58,
|
| 490 |
+
"examples": [
|
| 491 |
+
{
|
| 492 |
+
"record_idx": 0,
|
| 493 |
+
"label": "",
|
| 494 |
+
"predict": "31 Th",
|
| 495 |
+
"edit_distance": 5,
|
| 496 |
+
"accuracy": 0.0
|
| 497 |
+
},
|
| 498 |
+
{
|
| 499 |
+
"record_idx": 1,
|
| 500 |
+
"label": "",
|
| 501 |
+
"predict": "34 thn",
|
| 502 |
+
"edit_distance": 6,
|
| 503 |
+
"accuracy": 0.0
|
| 504 |
+
},
|
| 505 |
+
{
|
| 506 |
+
"record_idx": 2,
|
| 507 |
+
"label": "",
|
| 508 |
+
"predict": "27th",
|
| 509 |
+
"edit_distance": 4,
|
| 510 |
+
"accuracy": 0.0
|
| 511 |
+
}
|
| 512 |
+
]
|
| 513 |
+
}
|
| 514 |
+
}
|
results.md
ADDED
|
@@ -0,0 +1,290 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
====================================================================================================
|
| 2 |
+
OCR ACCURACY REPORT (Edit Distance Analysis)
|
| 3 |
+
====================================================================================================
|
| 4 |
+
|
| 5 |
+
SUMMARY STATISTICS
|
| 6 |
+
----------------------------------------------------------------------------------------------------
|
| 7 |
+
Total Fields Analyzed: 16
|
| 8 |
+
Overall Exact Match Rate: 18.27%
|
| 9 |
+
Overall Average Normalized Accuracy: 37.86%
|
| 10 |
+
|
| 11 |
+
PER-FIELD STATISTICS
|
| 12 |
+
----------------------------------------------------------------------------------------------------
|
| 13 |
+
Field Name Exact Match Avg Accuracy Avg Edit Dist
|
| 14 |
+
----------------------------------------------------------------------------------------------------
|
| 15 |
+
nama_ibu_cover 56.00% (56/100) 89.24% 1.68
|
| 16 |
+
nik_ibu 15.05% (14/93) 76.61% 3.72
|
| 17 |
+
pendidikan_ibu 58.00% (58/100) 73.34% 2.32
|
| 18 |
+
telepon_ibu 0.00% (0/70) 60.78% 5.39
|
| 19 |
+
kehamilan_ke 39.78% (37/93) 44.87% 0.95
|
| 20 |
+
kabupaten_kota 18.00% (18/100) 44.31% 6.73
|
| 21 |
+
jumlah_anak_lahir_hidup 40.86% (38/93) 41.55% 0.66
|
| 22 |
+
golongan_darah_ibu 28.40% (23/81) 35.02% 0.95
|
| 23 |
+
dikeluarkan_oleh_fasilitas_kesehatan 5.00% (5/100) 34.24% 7.98
|
| 24 |
+
riwayat_keguguran 31.18% (29/93) 31.18% 0.70
|
| 25 |
+
tempat_tanggal_lahir_ibu 0.00% (0/99) 23.63% 10.75
|
| 26 |
+
no_jkn_ibu 0.00% (0/26) 19.08% 7.73
|
| 27 |
+
pekerjaan_ibu 0.00% (0/99) 17.79% 12.65
|
| 28 |
+
alamat_rumah_ibu 0.00% (0/94) 13.06% 11.18
|
| 29 |
+
riwayat_penyakit_ibu 0.00% (0/100) 1.04% 18.66
|
| 30 |
+
usia_ibu 0.00% (0/58) 0.00% 5.17
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
EXAMPLES OF MISMATCHES (Top 3 per field)
|
| 34 |
+
----------------------------------------------------------------------------------------------------
|
| 35 |
+
|
| 36 |
+
Field: nama_ibu_cover
|
| 37 |
+
Exact Match Rate: 56.00%
|
| 38 |
+
Average Accuracy: 89.24%
|
| 39 |
+
Record 1:
|
| 40 |
+
Label: 'Asmini Wati'
|
| 41 |
+
Predict: 'ASMIHI WATI'
|
| 42 |
+
Edit Distance: 1, Accuracy: 90.91%
|
| 43 |
+
Record 3:
|
| 44 |
+
Label: 'Depa Apriani'
|
| 45 |
+
Predict: 'PEPA APRIANI'
|
| 46 |
+
Edit Distance: 1, Accuracy: 91.67%
|
| 47 |
+
Record 4:
|
| 48 |
+
Label: 'ENI NURIANA FATHURRAHMAN'
|
| 49 |
+
Predict: 'ERI HURIANA P'
|
| 50 |
+
Edit Distance: 14, Accuracy: 41.67%
|
| 51 |
+
|
| 52 |
+
Field: nik_ibu
|
| 53 |
+
Exact Match Rate: 15.05%
|
| 54 |
+
Average Accuracy: 76.61%
|
| 55 |
+
Record 0:
|
| 56 |
+
Label: '5203014210030001'
|
| 57 |
+
Predict: 'S 003019216830001'
|
| 58 |
+
Edit Distance: 6, Accuracy: 64.71%
|
| 59 |
+
Record 2:
|
| 60 |
+
Label: '5203035202980001'
|
| 61 |
+
Predict: 'S20303S20298 0001'
|
| 62 |
+
Edit Distance: 3, Accuracy: 82.35%
|
| 63 |
+
Record 3:
|
| 64 |
+
Label: '5203015901070003'
|
| 65 |
+
Predict: 'S203015901090003'
|
| 66 |
+
Edit Distance: 2, Accuracy: 87.50%
|
| 67 |
+
|
| 68 |
+
Field: pendidikan_ibu
|
| 69 |
+
Exact Match Rate: 58.00%
|
| 70 |
+
Average Accuracy: 73.34%
|
| 71 |
+
Record 1:
|
| 72 |
+
Label: 'sd'
|
| 73 |
+
Predict: 'SD.'
|
| 74 |
+
Edit Distance: 1, Accuracy: 66.67%
|
| 75 |
+
Record 5:
|
| 76 |
+
Label: 'smp'
|
| 77 |
+
Predict: 'SMP.'
|
| 78 |
+
Edit Distance: 1, Accuracy: 75.00%
|
| 79 |
+
Record 8:
|
| 80 |
+
Label: 'sma'
|
| 81 |
+
Predict: 'SLTA'
|
| 82 |
+
Edit Distance: 2, Accuracy: 50.00%
|
| 83 |
+
|
| 84 |
+
Field: telepon_ibu
|
| 85 |
+
Exact Match Rate: 0.00%
|
| 86 |
+
Average Accuracy: 60.78%
|
| 87 |
+
Record 0:
|
| 88 |
+
Label: '6281929813493'
|
| 89 |
+
Predict: '081 929 813 493.'
|
| 90 |
+
Edit Distance: 6, Accuracy: 62.50%
|
| 91 |
+
Record 1:
|
| 92 |
+
Label: ''
|
| 93 |
+
Predict: '087 894 63 251'
|
| 94 |
+
Edit Distance: 14, Accuracy: 0.00%
|
| 95 |
+
Record 3:
|
| 96 |
+
Label: '6282340578115'
|
| 97 |
+
Predict: '082340578115'
|
| 98 |
+
Edit Distance: 2, Accuracy: 84.62%
|
| 99 |
+
|
| 100 |
+
Field: kehamilan_ke
|
| 101 |
+
Exact Match Rate: 39.78%
|
| 102 |
+
Average Accuracy: 44.87%
|
| 103 |
+
Record 2:
|
| 104 |
+
Label: '2'
|
| 105 |
+
Predict: '6219041'
|
| 106 |
+
Edit Distance: 6, Accuracy: 14.29%
|
| 107 |
+
Record 3:
|
| 108 |
+
Label: ''
|
| 109 |
+
Predict: '1'
|
| 110 |
+
Edit Distance: 1, Accuracy: 0.00%
|
| 111 |
+
Record 4:
|
| 112 |
+
Label: '3'
|
| 113 |
+
Predict: ''
|
| 114 |
+
Edit Distance: 1, Accuracy: 0.00%
|
| 115 |
+
|
| 116 |
+
Field: kabupaten_kota
|
| 117 |
+
Exact Match Rate: 18.00%
|
| 118 |
+
Average Accuracy: 44.31%
|
| 119 |
+
Record 0:
|
| 120 |
+
Label: 'Lombok Timur'
|
| 121 |
+
Predict: 'LOTIM'
|
| 122 |
+
Edit Distance: 7, Accuracy: 41.67%
|
| 123 |
+
Record 1:
|
| 124 |
+
Label: 'Lombok Timur'
|
| 125 |
+
Predict: 'LOTIM'
|
| 126 |
+
Edit Distance: 7, Accuracy: 41.67%
|
| 127 |
+
Record 3:
|
| 128 |
+
Label: 'Lombok Timur'
|
| 129 |
+
Predict: 'LOTIM'
|
| 130 |
+
Edit Distance: 7, Accuracy: 41.67%
|
| 131 |
+
|
| 132 |
+
Field: jumlah_anak_lahir_hidup
|
| 133 |
+
Exact Match Rate: 40.86%
|
| 134 |
+
Average Accuracy: 41.55%
|
| 135 |
+
Record 3:
|
| 136 |
+
Label: ''
|
| 137 |
+
Predict: '-'
|
| 138 |
+
Edit Distance: 1, Accuracy: 0.00%
|
| 139 |
+
Record 4:
|
| 140 |
+
Label: '2'
|
| 141 |
+
Predict: ''
|
| 142 |
+
Edit Distance: 1, Accuracy: 0.00%
|
| 143 |
+
Record 7:
|
| 144 |
+
Label: '1'
|
| 145 |
+
Predict: ''
|
| 146 |
+
Edit Distance: 1, Accuracy: 0.00%
|
| 147 |
+
|
| 148 |
+
Field: golongan_darah_ibu
|
| 149 |
+
Exact Match Rate: 28.40%
|
| 150 |
+
Average Accuracy: 35.02%
|
| 151 |
+
Record 0:
|
| 152 |
+
Label: 'o'
|
| 153 |
+
Predict: ''
|
| 154 |
+
Edit Distance: 1, Accuracy: 0.00%
|
| 155 |
+
Record 1:
|
| 156 |
+
Label: 'b'
|
| 157 |
+
Predict: 'B +'
|
| 158 |
+
Edit Distance: 2, Accuracy: 33.33%
|
| 159 |
+
Record 4:
|
| 160 |
+
Label: 'a'
|
| 161 |
+
Predict: 'B A'
|
| 162 |
+
Edit Distance: 2, Accuracy: 33.33%
|
| 163 |
+
|
| 164 |
+
Field: dikeluarkan_oleh_fasilitas_kesehatan
|
| 165 |
+
Exact Match Rate: 5.00%
|
| 166 |
+
Average Accuracy: 34.24%
|
| 167 |
+
Record 0:
|
| 168 |
+
Label: 'Keruak'
|
| 169 |
+
Predict: 'kruak'
|
| 170 |
+
Edit Distance: 1, Accuracy: 83.33%
|
| 171 |
+
Record 1:
|
| 172 |
+
Label: 'Lenek'
|
| 173 |
+
Predict: 'LETIEK'
|
| 174 |
+
Edit Distance: 2, Accuracy: 66.67%
|
| 175 |
+
Record 2:
|
| 176 |
+
Label: 'Montong Betok'
|
| 177 |
+
Predict: 'PUSKESDES KILANG'
|
| 178 |
+
Edit Distance: 15, Accuracy: 6.25%
|
| 179 |
+
|
| 180 |
+
Field: riwayat_keguguran
|
| 181 |
+
Exact Match Rate: 31.18%
|
| 182 |
+
Average Accuracy: 31.18%
|
| 183 |
+
Record 2:
|
| 184 |
+
Label: '0'
|
| 185 |
+
Predict: '-'
|
| 186 |
+
Edit Distance: 1, Accuracy: 0.00%
|
| 187 |
+
Record 3:
|
| 188 |
+
Label: ''
|
| 189 |
+
Predict: '-'
|
| 190 |
+
Edit Distance: 1, Accuracy: 0.00%
|
| 191 |
+
Record 4:
|
| 192 |
+
Label: '0'
|
| 193 |
+
Predict: ''
|
| 194 |
+
Edit Distance: 1, Accuracy: 0.00%
|
| 195 |
+
|
| 196 |
+
Field: tempat_tanggal_lahir_ibu
|
| 197 |
+
Exact Match Rate: 0.00%
|
| 198 |
+
Average Accuracy: 23.63%
|
| 199 |
+
Record 0:
|
| 200 |
+
Label: 'Oct 2, 03'
|
| 201 |
+
Predict: 'TJ. Luar 0/10 2003'
|
| 202 |
+
Edit Distance: 14, Accuracy: 22.22%
|
| 203 |
+
Record 1:
|
| 204 |
+
Label: 'Feb 27, 91'
|
| 205 |
+
Predict: '27 -2 -1991'
|
| 206 |
+
Edit Distance: 8, Accuracy: 27.27%
|
| 207 |
+
Record 2:
|
| 208 |
+
Label: 'Feb 12, 98'
|
| 209 |
+
Predict: 'sukadana 12-02-1998'
|
| 210 |
+
Edit Distance: 14, Accuracy: 26.32%
|
| 211 |
+
|
| 212 |
+
Field: no_jkn_ibu
|
| 213 |
+
Exact Match Rate: 0.00%
|
| 214 |
+
Average Accuracy: 19.08%
|
| 215 |
+
Record 1:
|
| 216 |
+
Label: ''
|
| 217 |
+
Predict: '- 17 1 568 5 NA'
|
| 218 |
+
Edit Distance: 15, Accuracy: 0.00%
|
| 219 |
+
Record 4:
|
| 220 |
+
Label: '2190703127'
|
| 221 |
+
Predict: '0002190703127'
|
| 222 |
+
Edit Distance: 3, Accuracy: 76.92%
|
| 223 |
+
Record 5:
|
| 224 |
+
Label: ''
|
| 225 |
+
Predict: 'Q - '
|
| 226 |
+
Edit Distance: 3, Accuracy: 0.00%
|
| 227 |
+
|
| 228 |
+
Field: pekerjaan_ibu
|
| 229 |
+
Exact Match Rate: 0.00%
|
| 230 |
+
Average Accuracy: 17.79%
|
| 231 |
+
Record 0:
|
| 232 |
+
Label: 'ibu_rumah_tangga'
|
| 233 |
+
Predict: 'IRT'
|
| 234 |
+
Edit Distance: 13, Accuracy: 18.75%
|
| 235 |
+
Record 1:
|
| 236 |
+
Label: 'ibu_rumah_tangga'
|
| 237 |
+
Predict: 'Irt.'
|
| 238 |
+
Edit Distance: 13, Accuracy: 18.75%
|
| 239 |
+
Record 2:
|
| 240 |
+
Label: 'ibu_rumah_tangga'
|
| 241 |
+
Predict: 'IIT'
|
| 242 |
+
Edit Distance: 14, Accuracy: 12.50%
|
| 243 |
+
|
| 244 |
+
Field: alamat_rumah_ibu
|
| 245 |
+
Exact Match Rate: 0.00%
|
| 246 |
+
Average Accuracy: 13.06%
|
| 247 |
+
Record 0:
|
| 248 |
+
Label: ''
|
| 249 |
+
Predict: 'Tebon Selatan'
|
| 250 |
+
Edit Distance: 13, Accuracy: 0.00%
|
| 251 |
+
Record 1:
|
| 252 |
+
Label: ''
|
| 253 |
+
Predict: 'PANGKALAN BUNTE.'
|
| 254 |
+
Edit Distance: 16, Accuracy: 0.00%
|
| 255 |
+
Record 2:
|
| 256 |
+
Label: ''
|
| 257 |
+
Predict: 'BENDUNG SELATAN, KILANG'
|
| 258 |
+
Edit Distance: 23, Accuracy: 0.00%
|
| 259 |
+
|
| 260 |
+
Field: riwayat_penyakit_ibu
|
| 261 |
+
Exact Match Rate: 0.00%
|
| 262 |
+
Average Accuracy: 1.04%
|
| 263 |
+
Record 0:
|
| 264 |
+
Label: 'other'
|
| 265 |
+
Predict: '-'
|
| 266 |
+
Edit Distance: 5, Accuracy: 0.00%
|
| 267 |
+
Record 1:
|
| 268 |
+
Label: 'no_past_complications'
|
| 269 |
+
Predict: '-'
|
| 270 |
+
Edit Distance: 21, Accuracy: 0.00%
|
| 271 |
+
Record 2:
|
| 272 |
+
Label: 'do_not_know_past_complications'
|
| 273 |
+
Predict: '-'
|
| 274 |
+
Edit Distance: 30, Accuracy: 0.00%
|
| 275 |
+
|
| 276 |
+
Field: usia_ibu
|
| 277 |
+
Exact Match Rate: 0.00%
|
| 278 |
+
Average Accuracy: 0.00%
|
| 279 |
+
Record 0:
|
| 280 |
+
Label: ''
|
| 281 |
+
Predict: '31 Th'
|
| 282 |
+
Edit Distance: 5, Accuracy: 0.00%
|
| 283 |
+
Record 1:
|
| 284 |
+
Label: ''
|
| 285 |
+
Predict: '34 thn'
|
| 286 |
+
Edit Distance: 6, Accuracy: 0.00%
|
| 287 |
+
Record 2:
|
| 288 |
+
Label: ''
|
| 289 |
+
Predict: '27th'
|
| 290 |
+
Edit Distance: 4, Accuracy: 0.00%
|
unused/label.sql
ADDED
|
File without changes
|
unused/page1_label.sql
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
SELECT
|
| 2 |
+
filename,
|
| 3 |
+
project_id_mother,
|
| 4 |
+
CASE
|
| 5 |
+
WHEN first_name IS NOT NULL
|
| 6 |
+
AND last_name IS NOT NULL
|
| 7 |
+
AND first_name = last_name
|
| 8 |
+
THEN first_name
|
| 9 |
+
ELSE CONCAT(
|
| 10 |
+
IFNULL(first_name, ''),
|
| 11 |
+
IFNULL(
|
| 12 |
+
CONCAT(' ', last_name),
|
| 13 |
+
''
|
| 14 |
+
)
|
| 15 |
+
)
|
| 16 |
+
END AS nama_ibu_cover,
|
| 17 |
+
puskesmas_name AS dikeluarkan_oleh_fasilitas_kesehatan,
|
| 18 |
+
regency_name AS kabupaten_kota
|
| 19 |
+
FROM (
|
| 20 |
+
SELECT
|
| 21 |
+
*,
|
| 22 |
+
ROW_NUMBER() OVER (
|
| 23 |
+
PARTITION BY project_id_mother
|
| 24 |
+
ORDER BY `end` DESC
|
| 25 |
+
) AS rn
|
| 26 |
+
FROM `stellar-orb-451904-d9.ocr_bukukia_ocr_sid.List Correct Entry 2025`
|
| 27 |
+
)
|
| 28 |
+
WHERE
|
| 29 |
+
rn = 1
|
unused/page1_prediction.sql
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
WITH dedup_master_files AS (
|
| 2 |
+
SELECT *
|
| 3 |
+
FROM (
|
| 4 |
+
SELECT
|
| 5 |
+
*,
|
| 6 |
+
ROW_NUMBER() OVER (
|
| 7 |
+
PARTITION BY original_filename
|
| 8 |
+
ORDER BY inserted_at DESC
|
| 9 |
+
) AS rn
|
| 10 |
+
FROM `stellar-orb-451904-d9.raw_data_ocr_kia.master_files`
|
| 11 |
+
)
|
| 12 |
+
WHERE rn = 1
|
| 13 |
+
),
|
| 14 |
+
dedup_page1_cover AS (
|
| 15 |
+
SELECT *
|
| 16 |
+
FROM (
|
| 17 |
+
SELECT
|
| 18 |
+
*,
|
| 19 |
+
ROW_NUMBER() OVER (
|
| 20 |
+
ORDER BY inserted_at DESC
|
| 21 |
+
) AS rn
|
| 22 |
+
FROM `stellar-orb-451904-d9.raw_data_ocr_kia.page1_cover`
|
| 23 |
+
)
|
| 24 |
+
WHERE rn = 1
|
| 25 |
+
)
|
| 26 |
+
|
| 27 |
+
SELECT
|
| 28 |
+
-- MF.id,
|
| 29 |
+
-- MF.file_id,
|
| 30 |
+
MF.original_filename AS filename,
|
| 31 |
+
REGEXP_EXTRACT(MF.original_filename, r'(7000\d{4,})') AS project_id_mother,
|
| 32 |
+
P1.nama_ibu_cover,
|
| 33 |
+
P1.dikeluarkan_oleh_fasilitas_kesehatan,
|
| 34 |
+
P1.kabupaten_kota
|
| 35 |
+
FROM dedup_page1_cover P1
|
| 36 |
+
LEFT JOIN dedup_master_files MF ON P1.id = MF.id
|
unused/page2_label.sql
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
SELECT
|
| 2 |
+
filename,
|
| 3 |
+
project_id_mother,
|
| 4 |
+
CASE
|
| 5 |
+
WHEN first_name IS NOT NULL
|
| 6 |
+
AND last_name IS NOT NULL
|
| 7 |
+
AND first_name = last_name
|
| 8 |
+
THEN first_name
|
| 9 |
+
ELSE CONCAT(
|
| 10 |
+
IFNULL(first_name, ''),
|
| 11 |
+
IFNULL(
|
| 12 |
+
CONCAT(' ', last_name),
|
| 13 |
+
''
|
| 14 |
+
)
|
| 15 |
+
)
|
| 16 |
+
END AS nama_ibu,
|
| 17 |
+
nik_mother AS nik_ibu,
|
| 18 |
+
birth_date AS tempat_tanggal_lahir_ibu,
|
| 19 |
+
address_street AS alamat_rumah_ibu,
|
| 20 |
+
contact_number AS telepon_ibu,
|
| 21 |
+
bpjs_mother AS no_jkn_ibu,
|
| 22 |
+
education_level AS pendidikan_ibu,
|
| 23 |
+
occupation AS pekerjaan_ibu,
|
| 24 |
+
blood_type_result AS golongan_darah_ibu,
|
| 25 |
+
age AS usia_ibu,
|
| 26 |
+
pregnancy_number AS kehamilan_ke,
|
| 27 |
+
number_live_birth AS jumlah_anak_lahir_hidup,
|
| 28 |
+
number_birth_lost AS riwayat_keguguran,
|
| 29 |
+
previous_preg_issue AS riwayat_penyakit_ibu
|
| 30 |
+
FROM (
|
| 31 |
+
SELECT
|
| 32 |
+
*,
|
| 33 |
+
ROW_NUMBER() OVER (
|
| 34 |
+
PARTITION BY project_id_mother
|
| 35 |
+
ORDER BY `end` DESC
|
| 36 |
+
) AS rn
|
| 37 |
+
FROM `stellar-orb-451904-d9.ocr_bukukia_ocr_sid.List Correct Entry 2025`
|
| 38 |
+
)
|
| 39 |
+
WHERE
|
| 40 |
+
rn = 1
|
unused/page2_prediction.sql
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
WITH dedup_master_files AS (
|
| 2 |
+
SELECT *
|
| 3 |
+
FROM (
|
| 4 |
+
SELECT
|
| 5 |
+
*,
|
| 6 |
+
ROW_NUMBER() OVER (
|
| 7 |
+
PARTITION BY original_filename
|
| 8 |
+
ORDER BY inserted_at DESC
|
| 9 |
+
) AS rn
|
| 10 |
+
FROM `stellar-orb-451904-d9.raw_data_ocr_kia.master_files`
|
| 11 |
+
)
|
| 12 |
+
WHERE rn = 1
|
| 13 |
+
),
|
| 14 |
+
dedup_page2_identitas AS (
|
| 15 |
+
SELECT *
|
| 16 |
+
FROM (
|
| 17 |
+
SELECT
|
| 18 |
+
*,
|
| 19 |
+
ROW_NUMBER() OVER (
|
| 20 |
+
ORDER BY inserted_at DESC
|
| 21 |
+
) AS rn
|
| 22 |
+
FROM `stellar-orb-451904-d9.raw_data_ocr_kia.page2_identitas`
|
| 23 |
+
)
|
| 24 |
+
WHERE rn = 1
|
| 25 |
+
)
|
| 26 |
+
|
| 27 |
+
SELECT
|
| 28 |
+
-- MF.id,
|
| 29 |
+
-- MF.file_id,
|
| 30 |
+
MF.original_filename AS filename,
|
| 31 |
+
REGEXP_EXTRACT(MF.original_filename, r'(7000\d{4,})') AS project_id_mother,
|
| 32 |
+
P2.nama_ibu,
|
| 33 |
+
P2.nik_ibu,
|
| 34 |
+
P2.tempat_tanggal_lahir_ibu,
|
| 35 |
+
P2.alamat_rumah_ibu,
|
| 36 |
+
P2.telepon_ibu,
|
| 37 |
+
P2.no_jkn_ibu,
|
| 38 |
+
P2.pendidikan_ibu,
|
| 39 |
+
P2.pekerjaan_ibu,
|
| 40 |
+
P2.golongan_darah_ibu,
|
| 41 |
+
P2.usia_ibu,
|
| 42 |
+
P2.kehamilan_ke,
|
| 43 |
+
P2.jumlah_anak_lahir_hidup,
|
| 44 |
+
P2.riwayat_keguguran,
|
| 45 |
+
P2.riwayat_penyakit_ibu
|
| 46 |
+
FROM dedup_page2_identitas P2
|
| 47 |
+
LEFT JOIN dedup_master_files MF ON P2.id = MF.id
|
unused/page3_label.sql
ADDED
|
File without changes
|
unused/page3_prediction.sql
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
WITH dedup_master_files AS (
|
| 2 |
+
SELECT *
|
| 3 |
+
FROM (
|
| 4 |
+
SELECT
|
| 5 |
+
*,
|
| 6 |
+
ROW_NUMBER() OVER (
|
| 7 |
+
PARTITION BY original_filename
|
| 8 |
+
ORDER BY inserted_at DESC
|
| 9 |
+
) AS rn
|
| 10 |
+
FROM `stellar-orb-451904-d9.raw_data_ocr_kia.master_files`
|
| 11 |
+
)
|
| 12 |
+
WHERE rn = 1
|
| 13 |
+
),
|
| 14 |
+
dedup_page3_pengukuran_kunjungan AS (
|
| 15 |
+
SELECT *
|
| 16 |
+
FROM (
|
| 17 |
+
SELECT
|
| 18 |
+
*,
|
| 19 |
+
ROW_NUMBER() OVER (
|
| 20 |
+
ORDER BY inserted_at DESC
|
| 21 |
+
) AS rn
|
| 22 |
+
FROM `stellar-orb-451904-d9.raw_data_ocr_kia.page3_pengukuran_kunjungan`
|
| 23 |
+
)
|
| 24 |
+
WHERE rn = 1
|
| 25 |
+
)
|
| 26 |
+
|
| 27 |
+
SELECT
|
| 28 |
+
MF.id,
|
| 29 |
+
MF.file_id,
|
| 30 |
+
MF.original_filename,
|
| 31 |
+
REGEXP_EXTRACT(MF.original_filename, r'(7000\d{4,})') AS project_id_mother,
|
| 32 |
+
tanggal_periksa_kunjungan_ke_1,
|
| 33 |
+
tempat_periksa_kunjungan_ke_1,
|
| 34 |
+
berat_badan_kunjungan_ke_1,
|
| 35 |
+
tinggi_badan_kunjungan_ke_1,
|
| 36 |
+
lingkar_lengan_atas_kunjungan_ke_1,
|
| 37 |
+
tekanan_darah_kunjungan_ke_1,
|
| 38 |
+
tinggi_rahim_kunjungan_ke_1,
|
| 39 |
+
letak_dan_denyut_jantung_bayi_kunjungan_ke_1,
|
| 40 |
+
status_dan_imunisasi_tetanus_kunjungan_ke_1,
|
| 41 |
+
konseling_kunjungan_ke_1,
|
| 42 |
+
skrining_dokter_kunjungan_ke_1,
|
| 43 |
+
tablet_tambah_darah_kunjungan_ke_1,
|
| 44 |
+
tes_lab_hemoglobin_hb_kunjungan_ke_1,
|
| 45 |
+
tes_golongan_darah_kunjungan_ke_1,
|
| 46 |
+
tes_lab_protein_urine_kunjungan_ke_1,
|
| 47 |
+
tes_lab_gula_darah_kunjungan_ke_1,
|
| 48 |
+
usg_kunjungan_ke_1,
|
| 49 |
+
tripel_eliminasi_h_s_hep_b_kunjungan_ke_1,
|
| 50 |
+
tata_laksana_kasus_kunjungan_ke_1,
|
| 51 |
+
tanggal_periksa_kunjungan_ke_2,
|
| 52 |
+
tempat_periksa_kunjungan_ke_2,
|
| 53 |
+
berat_badan_kunjungan_ke_2,
|
| 54 |
+
tinggi_badan_kunjungan_ke_2,
|
| 55 |
+
lingkar_lengan_atas_kunjungan_ke_2,
|
| 56 |
+
tekanan_darah_kunjungan_ke_2,
|
| 57 |
+
tinggi_rahim_kunjungan_ke_2,
|
| 58 |
+
letak_dan_denyut_jantung_bayi_kunjungan_ke_2,
|
| 59 |
+
status_dan_imunisasi_tetanus_kunjungan_ke_2,
|
| 60 |
+
konseling_kunjungan_ke_2,
|
| 61 |
+
skrining_dokter_kunjungan_ke_2,
|
| 62 |
+
tablet_tambah_darah_kunjungan_ke_2,
|
| 63 |
+
tes_lab_hemoglobin_hb_kunjungan_ke_2,
|
| 64 |
+
tes_golongan_darah_kunjungan_ke_2,
|
| 65 |
+
tes_lab_protein_urine_kunjungan_ke_2,
|
| 66 |
+
tes_lab_gula_darah_kunjungan_ke_2,
|
| 67 |
+
usg_kunjungan_ke_2,
|
| 68 |
+
tripel_eliminasi_h_s_hep_b_kunjungan_ke_2,
|
| 69 |
+
tata_laksana_kasus_kunjungan_ke_2,
|
| 70 |
+
tanggal_periksa_kunjungan_ke_3,
|
| 71 |
+
tempat_periksa_kunjungan_ke_3,
|
| 72 |
+
berat_badan_kunjungan_ke_3,
|
| 73 |
+
tinggi_badan_kunjungan_ke_3,
|
| 74 |
+
lingkar_lengan_atas_kunjungan_ke_3,
|
| 75 |
+
tekanan_darah_kunjungan_ke_3,
|
| 76 |
+
tinggi_rahim_kunjungan_ke_3,
|
| 77 |
+
letak_dan_denyut_jantung_bayi_kunjungan_ke_3,
|
| 78 |
+
status_dan_imunisasi_tetanus_kunjungan_ke_3,
|
| 79 |
+
konseling_kunjungan_ke_3,
|
| 80 |
+
skrining_dokter_kunjungan_ke_3,
|
| 81 |
+
tablet_tambah_darah_kunjungan_ke_3,
|
| 82 |
+
tes_lab_hemoglobin_hb_kunjungan_ke_3,
|
| 83 |
+
tes_golongan_darah_kunjungan_ke_3,
|
| 84 |
+
tes_lab_protein_urine_kunjungan_ke_3,
|
| 85 |
+
tes_lab_gula_darah_kunjungan_ke_3,
|
| 86 |
+
usg_kunjungan_ke_3,
|
| 87 |
+
tripel_eliminasi_h_s_hep_b_kunjungan_ke_3,
|
| 88 |
+
tata_laksana_kasus_kunjungan_ke_3
|
| 89 |
+
FROM dedup_page3_pengukuran_kunjungan P3
|
| 90 |
+
LEFT JOIN dedup_master_files MF ON P3.id = MF.id
|
unused/prediction.sql
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
WITH dedup_master_files AS (
|
| 2 |
+
SELECT *
|
| 3 |
+
FROM (
|
| 4 |
+
SELECT
|
| 5 |
+
*,
|
| 6 |
+
LOWER(
|
| 7 |
+
REGEXP_REPLACE(original_filename, r'^(Copy of\s*)+', '')
|
| 8 |
+
) AS cleaned_filename,
|
| 9 |
+
ROW_NUMBER() OVER (
|
| 10 |
+
PARTITION BY original_filename
|
| 11 |
+
ORDER BY created_at DESC
|
| 12 |
+
) AS rn
|
| 13 |
+
FROM `stellar-orb-451904-d9.raw_data_ocr_kia.master_files`
|
| 14 |
+
)
|
| 15 |
+
WHERE rn = 1
|
| 16 |
+
),
|
| 17 |
+
dedup_page1_cover AS (
|
| 18 |
+
SELECT *
|
| 19 |
+
FROM (
|
| 20 |
+
SELECT
|
| 21 |
+
*,
|
| 22 |
+
ROW_NUMBER() OVER (
|
| 23 |
+
PARTITION BY file_id
|
| 24 |
+
ORDER BY created_at DESC
|
| 25 |
+
) AS rn
|
| 26 |
+
FROM `stellar-orb-451904-d9.raw_data_ocr_kia.page1_cover`
|
| 27 |
+
)
|
| 28 |
+
WHERE rn = 1
|
| 29 |
+
),
|
| 30 |
+
|
| 31 |
+
dedup_page2_identitas AS (
|
| 32 |
+
SELECT *
|
| 33 |
+
FROM (
|
| 34 |
+
SELECT
|
| 35 |
+
*,
|
| 36 |
+
ROW_NUMBER() OVER (
|
| 37 |
+
PARTITION BY file_id
|
| 38 |
+
ORDER BY inserted_at DESC
|
| 39 |
+
) AS rn
|
| 40 |
+
FROM `stellar-orb-451904-d9.raw_data_ocr_kia.page2_identitas`
|
| 41 |
+
)
|
| 42 |
+
WHERE rn = 1
|
| 43 |
+
)
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
SELECT
|
| 47 |
+
MF.original_filename AS `filename`,
|
| 48 |
+
REGEXP_EXTRACT(MF.original_filename, r'(7000\d{4,})') AS project_id_mother,
|
| 49 |
+
P1.nama_ibu_cover,
|
| 50 |
+
P1.dikeluarkan_oleh_fasilitas_kesehatan,
|
| 51 |
+
P1.kabupaten_kota,
|
| 52 |
+
P2.nama_ibu,
|
| 53 |
+
P2.nik_ibu,
|
| 54 |
+
P2.tempat_tanggal_lahir_ibu,
|
| 55 |
+
P2.alamat_rumah_ibu,
|
| 56 |
+
P2.telepon_ibu,
|
| 57 |
+
P2.no_jkn_ibu,
|
| 58 |
+
P2.pendidikan_ibu,
|
| 59 |
+
P2.pekerjaan_ibu,
|
| 60 |
+
P2.golongan_darah_ibu,
|
| 61 |
+
P2.usia_ibu,
|
| 62 |
+
P2.kehamilan_ke,
|
| 63 |
+
P2.jumlah_anak_lahir_hidup,
|
| 64 |
+
P2.riwayat_keguguran,
|
| 65 |
+
P2.riwayat_penyakit_ibu
|
| 66 |
+
FROM dedup_master_files MF
|
| 67 |
+
LEFT JOIN dedup_page1_cover P1 ON MF.id = P1.id
|
| 68 |
+
LEFT JOIN dedup_page2_identitas P2 ON MF.id = P2.id
|
unused/sample_accuracy_output.json
ADDED
|
@@ -0,0 +1,719 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"nama_ibu_cover": {
|
| 3 |
+
"exact_match_rate": 0.6666666666666666,
|
| 4 |
+
"average_edit_distance": 1.6666666666666667,
|
| 5 |
+
"average_normalized_accuracy": 0.9333333333333332,
|
| 6 |
+
"min_accuracy": 0.8,
|
| 7 |
+
"max_edit_distance": 5,
|
| 8 |
+
"exact_matches": 2,
|
| 9 |
+
"total_records": 3,
|
| 10 |
+
"examples": [
|
| 11 |
+
{
|
| 12 |
+
"record_idx": 0,
|
| 13 |
+
"label": "Sonika Laksono Putri, Ny",
|
| 14 |
+
"predict": "SoniKa Laksana Putri, NY.",
|
| 15 |
+
"edit_distance": 5,
|
| 16 |
+
"accuracy": 0.8
|
| 17 |
+
}
|
| 18 |
+
]
|
| 19 |
+
},
|
| 20 |
+
"dikeluarkan_oleh_fasilitas_kesehatan": {
|
| 21 |
+
"exact_match_rate": 0.3333333333333333,
|
| 22 |
+
"average_edit_distance": 1.3333333333333333,
|
| 23 |
+
"average_normalized_accuracy": 0.6388888888888888,
|
| 24 |
+
"min_accuracy": 0.0,
|
| 25 |
+
"max_edit_distance": 3,
|
| 26 |
+
"exact_matches": 1,
|
| 27 |
+
"total_records": 3,
|
| 28 |
+
"examples": [
|
| 29 |
+
{
|
| 30 |
+
"record_idx": 1,
|
| 31 |
+
"label": "DWB",
|
| 32 |
+
"predict": "PUS",
|
| 33 |
+
"edit_distance": 3,
|
| 34 |
+
"accuracy": 0.0
|
| 35 |
+
},
|
| 36 |
+
{
|
| 37 |
+
"record_idx": 2,
|
| 38 |
+
"label": "UTD PKM KMK",
|
| 39 |
+
"predict": "UPTD PKM KMK",
|
| 40 |
+
"edit_distance": 1,
|
| 41 |
+
"accuracy": 0.9166666666666666
|
| 42 |
+
}
|
| 43 |
+
]
|
| 44 |
+
},
|
| 45 |
+
"kabupaten_kota": {
|
| 46 |
+
"exact_match_rate": 1.0,
|
| 47 |
+
"average_edit_distance": 0.0,
|
| 48 |
+
"average_normalized_accuracy": 1.0,
|
| 49 |
+
"min_accuracy": 1.0,
|
| 50 |
+
"max_edit_distance": 0,
|
| 51 |
+
"exact_matches": 3,
|
| 52 |
+
"total_records": 3,
|
| 53 |
+
"examples": []
|
| 54 |
+
},
|
| 55 |
+
"tanggal_dikeluarkannya_buku": {
|
| 56 |
+
"exact_match_rate": 0.3333333333333333,
|
| 57 |
+
"average_edit_distance": 1.6666666666666667,
|
| 58 |
+
"average_normalized_accuracy": 0.8250000000000001,
|
| 59 |
+
"min_accuracy": 0.6,
|
| 60 |
+
"max_edit_distance": 4,
|
| 61 |
+
"exact_matches": 1,
|
| 62 |
+
"total_records": 3,
|
| 63 |
+
"examples": [
|
| 64 |
+
{
|
| 65 |
+
"record_idx": 1,
|
| 66 |
+
"label": "8-9-2025",
|
| 67 |
+
"predict": "8-9-2024",
|
| 68 |
+
"edit_distance": 1,
|
| 69 |
+
"accuracy": 0.875
|
| 70 |
+
},
|
| 71 |
+
{
|
| 72 |
+
"record_idx": 2,
|
| 73 |
+
"label": "30-09-2025",
|
| 74 |
+
"predict": "30/09/25",
|
| 75 |
+
"edit_distance": 4,
|
| 76 |
+
"accuracy": 0.6
|
| 77 |
+
}
|
| 78 |
+
]
|
| 79 |
+
},
|
| 80 |
+
"provinsi": {
|
| 81 |
+
"exact_match_rate": 1.0,
|
| 82 |
+
"average_edit_distance": 0.0,
|
| 83 |
+
"average_normalized_accuracy": 1.0,
|
| 84 |
+
"min_accuracy": 1.0,
|
| 85 |
+
"max_edit_distance": 0,
|
| 86 |
+
"exact_matches": 3,
|
| 87 |
+
"total_records": 3,
|
| 88 |
+
"examples": []
|
| 89 |
+
},
|
| 90 |
+
"nama_ibu": {
|
| 91 |
+
"exact_match_rate": 0.6666666666666666,
|
| 92 |
+
"average_edit_distance": 2.0,
|
| 93 |
+
"average_normalized_accuracy": 0.9,
|
| 94 |
+
"min_accuracy": 0.7,
|
| 95 |
+
"max_edit_distance": 6,
|
| 96 |
+
"exact_matches": 2,
|
| 97 |
+
"total_records": 3,
|
| 98 |
+
"examples": [
|
| 99 |
+
{
|
| 100 |
+
"record_idx": 0,
|
| 101 |
+
"label": "Sonika Laksono P",
|
| 102 |
+
"predict": "Sonika Laksana Putri",
|
| 103 |
+
"edit_distance": 6,
|
| 104 |
+
"accuracy": 0.7
|
| 105 |
+
}
|
| 106 |
+
]
|
| 107 |
+
},
|
| 108 |
+
"nik_ibu": {
|
| 109 |
+
"exact_match_rate": 0.6666666666666666,
|
| 110 |
+
"average_edit_distance": 0.3333333333333333,
|
| 111 |
+
"average_normalized_accuracy": 0.9791666666666666,
|
| 112 |
+
"min_accuracy": 0.9375,
|
| 113 |
+
"max_edit_distance": 1,
|
| 114 |
+
"exact_matches": 2,
|
| 115 |
+
"total_records": 3,
|
| 116 |
+
"examples": [
|
| 117 |
+
{
|
| 118 |
+
"record_idx": 0,
|
| 119 |
+
"label": "3303071004010001",
|
| 120 |
+
"predict": "3303271004010001",
|
| 121 |
+
"edit_distance": 1,
|
| 122 |
+
"accuracy": 0.9375
|
| 123 |
+
}
|
| 124 |
+
]
|
| 125 |
+
},
|
| 126 |
+
"no_jkn_ibu": {
|
| 127 |
+
"exact_match_rate": 0.6666666666666666,
|
| 128 |
+
"average_edit_distance": 0.6666666666666666,
|
| 129 |
+
"average_normalized_accuracy": 0.9333333333333332,
|
| 130 |
+
"min_accuracy": 0.8,
|
| 131 |
+
"max_edit_distance": 2,
|
| 132 |
+
"exact_matches": 2,
|
| 133 |
+
"total_records": 3,
|
| 134 |
+
"examples": [
|
| 135 |
+
{
|
| 136 |
+
"record_idx": 1,
|
| 137 |
+
"label": "0001389281",
|
| 138 |
+
"predict": "8801389281",
|
| 139 |
+
"edit_distance": 2,
|
| 140 |
+
"accuracy": 0.8
|
| 141 |
+
}
|
| 142 |
+
]
|
| 143 |
+
},
|
| 144 |
+
"fasilitas_kesehatan_tk1_ibu": {
|
| 145 |
+
"exact_match_rate": 0.3333333333333333,
|
| 146 |
+
"average_edit_distance": 0.6666666666666666,
|
| 147 |
+
"average_normalized_accuracy": 0.9388888888888888,
|
| 148 |
+
"min_accuracy": 0.9,
|
| 149 |
+
"max_edit_distance": 1,
|
| 150 |
+
"exact_matches": 1,
|
| 151 |
+
"total_records": 3,
|
| 152 |
+
"examples": [
|
| 153 |
+
{
|
| 154 |
+
"record_idx": 0,
|
| 155 |
+
"label": "Klinik PMI",
|
| 156 |
+
"predict": "Klinik PMU",
|
| 157 |
+
"edit_distance": 1,
|
| 158 |
+
"accuracy": 0.9
|
| 159 |
+
},
|
| 160 |
+
{
|
| 161 |
+
"record_idx": 1,
|
| 162 |
+
"label": "Pusk Bojong",
|
| 163 |
+
"predict": "Pusk. Bojong",
|
| 164 |
+
"edit_distance": 1,
|
| 165 |
+
"accuracy": 0.9166666666666666
|
| 166 |
+
}
|
| 167 |
+
]
|
| 168 |
+
},
|
| 169 |
+
"fasilitas_kesehatan_rujukan_ibu": {
|
| 170 |
+
"exact_match_rate": 0.6666666666666666,
|
| 171 |
+
"average_edit_distance": 0.3333333333333333,
|
| 172 |
+
"average_normalized_accuracy": 0.6666666666666666,
|
| 173 |
+
"min_accuracy": 0.0,
|
| 174 |
+
"max_edit_distance": 1,
|
| 175 |
+
"exact_matches": 2,
|
| 176 |
+
"total_records": 3,
|
| 177 |
+
"examples": [
|
| 178 |
+
{
|
| 179 |
+
"record_idx": 0,
|
| 180 |
+
"label": "-",
|
| 181 |
+
"predict": "",
|
| 182 |
+
"edit_distance": 1,
|
| 183 |
+
"accuracy": 0.0
|
| 184 |
+
}
|
| 185 |
+
]
|
| 186 |
+
},
|
| 187 |
+
"tempat_tanggal_lahir_ibu": {
|
| 188 |
+
"exact_match_rate": 0.6666666666666666,
|
| 189 |
+
"average_edit_distance": 0.3333333333333333,
|
| 190 |
+
"average_normalized_accuracy": 0.9848484848484849,
|
| 191 |
+
"min_accuracy": 0.9545454545454546,
|
| 192 |
+
"max_edit_distance": 1,
|
| 193 |
+
"exact_matches": 2,
|
| 194 |
+
"total_records": 3,
|
| 195 |
+
"examples": [
|
| 196 |
+
{
|
| 197 |
+
"record_idx": 0,
|
| 198 |
+
"label": "Purbalingga 17/6/2000",
|
| 199 |
+
"predict": "Purbalingga, 17/6/2000",
|
| 200 |
+
"edit_distance": 1,
|
| 201 |
+
"accuracy": 0.9545454545454546
|
| 202 |
+
}
|
| 203 |
+
]
|
| 204 |
+
},
|
| 205 |
+
"pendidikan_ibu": {
|
| 206 |
+
"exact_match_rate": 1.0,
|
| 207 |
+
"average_edit_distance": 0.0,
|
| 208 |
+
"average_normalized_accuracy": 1.0,
|
| 209 |
+
"min_accuracy": 1.0,
|
| 210 |
+
"max_edit_distance": 0,
|
| 211 |
+
"exact_matches": 3,
|
| 212 |
+
"total_records": 3,
|
| 213 |
+
"examples": []
|
| 214 |
+
},
|
| 215 |
+
"pekerjaan_ibu": {
|
| 216 |
+
"exact_match_rate": 0.6666666666666666,
|
| 217 |
+
"average_edit_distance": 0.3333333333333333,
|
| 218 |
+
"average_normalized_accuracy": 0.9333333333333332,
|
| 219 |
+
"min_accuracy": 0.8,
|
| 220 |
+
"max_edit_distance": 1,
|
| 221 |
+
"exact_matches": 2,
|
| 222 |
+
"total_records": 3,
|
| 223 |
+
"examples": [
|
| 224 |
+
{
|
| 225 |
+
"record_idx": 1,
|
| 226 |
+
"label": "Buruh",
|
| 227 |
+
"predict": "Bunuh",
|
| 228 |
+
"edit_distance": 1,
|
| 229 |
+
"accuracy": 0.8
|
| 230 |
+
}
|
| 231 |
+
]
|
| 232 |
+
},
|
| 233 |
+
"alamat_rumah_ibu": {
|
| 234 |
+
"exact_match_rate": 0.3333333333333333,
|
| 235 |
+
"average_edit_distance": 4.666666666666667,
|
| 236 |
+
"average_normalized_accuracy": 0.7333333333333334,
|
| 237 |
+
"min_accuracy": 0.44999999999999996,
|
| 238 |
+
"max_edit_distance": 11,
|
| 239 |
+
"exact_matches": 1,
|
| 240 |
+
"total_records": 3,
|
| 241 |
+
"examples": [
|
| 242 |
+
{
|
| 243 |
+
"record_idx": 0,
|
| 244 |
+
"label": "Kd. mjgn 2/1",
|
| 245 |
+
"predict": "Kd. Myun 2/1",
|
| 246 |
+
"edit_distance": 3,
|
| 247 |
+
"accuracy": 0.75
|
| 248 |
+
},
|
| 249 |
+
{
|
| 250 |
+
"record_idx": 1,
|
| 251 |
+
"label": "Kedung Menjangan 1/1",
|
| 252 |
+
"predict": "Wedy Wijayars 1/1",
|
| 253 |
+
"edit_distance": 11,
|
| 254 |
+
"accuracy": 0.44999999999999996
|
| 255 |
+
}
|
| 256 |
+
]
|
| 257 |
+
},
|
| 258 |
+
"telepon_ibu": {
|
| 259 |
+
"exact_match_rate": 0.6666666666666666,
|
| 260 |
+
"average_edit_distance": 0.3333333333333333,
|
| 261 |
+
"average_normalized_accuracy": 0.9722222222222222,
|
| 262 |
+
"min_accuracy": 0.9166666666666666,
|
| 263 |
+
"max_edit_distance": 1,
|
| 264 |
+
"exact_matches": 2,
|
| 265 |
+
"total_records": 3,
|
| 266 |
+
"examples": [
|
| 267 |
+
{
|
| 268 |
+
"record_idx": 0,
|
| 269 |
+
"label": "089682122648",
|
| 270 |
+
"predict": "089682122698",
|
| 271 |
+
"edit_distance": 1,
|
| 272 |
+
"accuracy": 0.9166666666666666
|
| 273 |
+
}
|
| 274 |
+
]
|
| 275 |
+
},
|
| 276 |
+
"anak_ke_ibu": {
|
| 277 |
+
"exact_match_rate": 1.0,
|
| 278 |
+
"average_edit_distance": 0.0,
|
| 279 |
+
"average_normalized_accuracy": 1.0,
|
| 280 |
+
"min_accuracy": 1.0,
|
| 281 |
+
"max_edit_distance": 0,
|
| 282 |
+
"exact_matches": 3,
|
| 283 |
+
"total_records": 3,
|
| 284 |
+
"examples": []
|
| 285 |
+
},
|
| 286 |
+
"nomor_akta_kelahiran_ibu": {
|
| 287 |
+
"exact_match_rate": 1.0,
|
| 288 |
+
"average_edit_distance": 0.0,
|
| 289 |
+
"average_normalized_accuracy": 1.0,
|
| 290 |
+
"min_accuracy": 1.0,
|
| 291 |
+
"max_edit_distance": 0,
|
| 292 |
+
"exact_matches": 3,
|
| 293 |
+
"total_records": 3,
|
| 294 |
+
"examples": []
|
| 295 |
+
},
|
| 296 |
+
"golongan_darah_ibu": {
|
| 297 |
+
"exact_match_rate": 1.0,
|
| 298 |
+
"average_edit_distance": 0.0,
|
| 299 |
+
"average_normalized_accuracy": 1.0,
|
| 300 |
+
"min_accuracy": 1.0,
|
| 301 |
+
"max_edit_distance": 0,
|
| 302 |
+
"exact_matches": 3,
|
| 303 |
+
"total_records": 3,
|
| 304 |
+
"examples": []
|
| 305 |
+
},
|
| 306 |
+
"nama_suami_keluarga": {
|
| 307 |
+
"exact_match_rate": 0.3333333333333333,
|
| 308 |
+
"average_edit_distance": 3.0,
|
| 309 |
+
"average_normalized_accuracy": 0.7678571428571429,
|
| 310 |
+
"min_accuracy": 0.4285714285714286,
|
| 311 |
+
"max_edit_distance": 8,
|
| 312 |
+
"exact_matches": 1,
|
| 313 |
+
"total_records": 3,
|
| 314 |
+
"examples": [
|
| 315 |
+
{
|
| 316 |
+
"record_idx": 0,
|
| 317 |
+
"label": "Safrudin",
|
| 318 |
+
"predict": "Saprudin",
|
| 319 |
+
"edit_distance": 1,
|
| 320 |
+
"accuracy": 0.875
|
| 321 |
+
},
|
| 322 |
+
{
|
| 323 |
+
"record_idx": 1,
|
| 324 |
+
"label": "Sugiato Sujono",
|
| 325 |
+
"predict": "Sugrah Fuzmi",
|
| 326 |
+
"edit_distance": 8,
|
| 327 |
+
"accuracy": 0.4285714285714286
|
| 328 |
+
}
|
| 329 |
+
]
|
| 330 |
+
},
|
| 331 |
+
"nik_suami_keluarga": {
|
| 332 |
+
"exact_match_rate": 0.6666666666666666,
|
| 333 |
+
"average_edit_distance": 0.3333333333333333,
|
| 334 |
+
"average_normalized_accuracy": 0.9791666666666666,
|
| 335 |
+
"min_accuracy": 0.9375,
|
| 336 |
+
"max_edit_distance": 1,
|
| 337 |
+
"exact_matches": 2,
|
| 338 |
+
"total_records": 3,
|
| 339 |
+
"examples": [
|
| 340 |
+
{
|
| 341 |
+
"record_idx": 1,
|
| 342 |
+
"label": "3303050711800001",
|
| 343 |
+
"predict": "3363050711800001",
|
| 344 |
+
"edit_distance": 1,
|
| 345 |
+
"accuracy": 0.9375
|
| 346 |
+
}
|
| 347 |
+
]
|
| 348 |
+
},
|
| 349 |
+
"no_jkn_suami_keluarga": {
|
| 350 |
+
"exact_match_rate": 1.0,
|
| 351 |
+
"average_edit_distance": 0.0,
|
| 352 |
+
"average_normalized_accuracy": 1.0,
|
| 353 |
+
"min_accuracy": 1.0,
|
| 354 |
+
"max_edit_distance": 0,
|
| 355 |
+
"exact_matches": 3,
|
| 356 |
+
"total_records": 3,
|
| 357 |
+
"examples": []
|
| 358 |
+
},
|
| 359 |
+
"fasilitas_kesehatan_tk1_suami_keluarga": {
|
| 360 |
+
"exact_match_rate": 0.6666666666666666,
|
| 361 |
+
"average_edit_distance": 3.0,
|
| 362 |
+
"average_normalized_accuracy": 0.7692307692307692,
|
| 363 |
+
"min_accuracy": 0.3076923076923077,
|
| 364 |
+
"max_edit_distance": 9,
|
| 365 |
+
"exact_matches": 2,
|
| 366 |
+
"total_records": 3,
|
| 367 |
+
"examples": [
|
| 368 |
+
{
|
| 369 |
+
"record_idx": 0,
|
| 370 |
+
"label": "Pusk. Bojong",
|
| 371 |
+
"predict": "PUSK. BOJONG.",
|
| 372 |
+
"edit_distance": 9,
|
| 373 |
+
"accuracy": 0.3076923076923077
|
| 374 |
+
}
|
| 375 |
+
]
|
| 376 |
+
},
|
| 377 |
+
"fasilitas_kesehatan_rujukan_suami_keluarga": {
|
| 378 |
+
"exact_match_rate": 0.6666666666666666,
|
| 379 |
+
"average_edit_distance": 0.3333333333333333,
|
| 380 |
+
"average_normalized_accuracy": 0.6666666666666666,
|
| 381 |
+
"min_accuracy": 0.0,
|
| 382 |
+
"max_edit_distance": 1,
|
| 383 |
+
"exact_matches": 2,
|
| 384 |
+
"total_records": 3,
|
| 385 |
+
"examples": [
|
| 386 |
+
{
|
| 387 |
+
"record_idx": 0,
|
| 388 |
+
"label": "-",
|
| 389 |
+
"predict": "",
|
| 390 |
+
"edit_distance": 1,
|
| 391 |
+
"accuracy": 0.0
|
| 392 |
+
}
|
| 393 |
+
]
|
| 394 |
+
},
|
| 395 |
+
"tempat_tanggal_lahir_suami_keluarga": {
|
| 396 |
+
"exact_match_rate": 0.6666666666666666,
|
| 397 |
+
"average_edit_distance": 0.3333333333333333,
|
| 398 |
+
"average_normalized_accuracy": 0.9777777777777779,
|
| 399 |
+
"min_accuracy": 0.9333333333333333,
|
| 400 |
+
"max_edit_distance": 1,
|
| 401 |
+
"exact_matches": 2,
|
| 402 |
+
"total_records": 3,
|
| 403 |
+
"examples": [
|
| 404 |
+
{
|
| 405 |
+
"record_idx": 0,
|
| 406 |
+
"label": "Pbgg, 10/4/2001",
|
| 407 |
+
"predict": "Pbg, 10/4/2001",
|
| 408 |
+
"edit_distance": 1,
|
| 409 |
+
"accuracy": 0.9333333333333333
|
| 410 |
+
}
|
| 411 |
+
]
|
| 412 |
+
},
|
| 413 |
+
"pendidikan_suami_keluarga": {
|
| 414 |
+
"exact_match_rate": 1.0,
|
| 415 |
+
"average_edit_distance": 0.0,
|
| 416 |
+
"average_normalized_accuracy": 1.0,
|
| 417 |
+
"min_accuracy": 1.0,
|
| 418 |
+
"max_edit_distance": 0,
|
| 419 |
+
"exact_matches": 3,
|
| 420 |
+
"total_records": 3,
|
| 421 |
+
"examples": []
|
| 422 |
+
},
|
| 423 |
+
"pekerjaan_suami_keluarga": {
|
| 424 |
+
"exact_match_rate": 0.6666666666666666,
|
| 425 |
+
"average_edit_distance": 2.3333333333333335,
|
| 426 |
+
"average_normalized_accuracy": 0.7407407407407408,
|
| 427 |
+
"min_accuracy": 0.2222222222222222,
|
| 428 |
+
"max_edit_distance": 7,
|
| 429 |
+
"exact_matches": 2,
|
| 430 |
+
"total_records": 3,
|
| 431 |
+
"examples": [
|
| 432 |
+
{
|
| 433 |
+
"record_idx": 1,
|
| 434 |
+
"label": "Buruh",
|
| 435 |
+
"predict": "Kary Bumi",
|
| 436 |
+
"edit_distance": 7,
|
| 437 |
+
"accuracy": 0.2222222222222222
|
| 438 |
+
}
|
| 439 |
+
]
|
| 440 |
+
},
|
| 441 |
+
"alamat_rumah_suami_keluarga": {
|
| 442 |
+
"exact_match_rate": 0.6666666666666666,
|
| 443 |
+
"average_edit_distance": 4.0,
|
| 444 |
+
"average_normalized_accuracy": 0.6666666666666666,
|
| 445 |
+
"min_accuracy": 0.0,
|
| 446 |
+
"max_edit_distance": 12,
|
| 447 |
+
"exact_matches": 2,
|
| 448 |
+
"total_records": 3,
|
| 449 |
+
"examples": [
|
| 450 |
+
{
|
| 451 |
+
"record_idx": 0,
|
| 452 |
+
"label": "",
|
| 453 |
+
"predict": "Kd. Myun 2/1",
|
| 454 |
+
"edit_distance": 12,
|
| 455 |
+
"accuracy": 0.0
|
| 456 |
+
}
|
| 457 |
+
]
|
| 458 |
+
},
|
| 459 |
+
"telepon_suami_keluarga": {
|
| 460 |
+
"exact_match_rate": 0.6666666666666666,
|
| 461 |
+
"average_edit_distance": 4.0,
|
| 462 |
+
"average_normalized_accuracy": 0.6666666666666666,
|
| 463 |
+
"min_accuracy": 0.0,
|
| 464 |
+
"max_edit_distance": 12,
|
| 465 |
+
"exact_matches": 2,
|
| 466 |
+
"total_records": 3,
|
| 467 |
+
"examples": [
|
| 468 |
+
{
|
| 469 |
+
"record_idx": 0,
|
| 470 |
+
"label": "",
|
| 471 |
+
"predict": "085776323896",
|
| 472 |
+
"edit_distance": 12,
|
| 473 |
+
"accuracy": 0.0
|
| 474 |
+
}
|
| 475 |
+
]
|
| 476 |
+
},
|
| 477 |
+
"anak_ke_suami_keluarga": {
|
| 478 |
+
"exact_match_rate": 1.0,
|
| 479 |
+
"average_edit_distance": 0.0,
|
| 480 |
+
"average_normalized_accuracy": 1.0,
|
| 481 |
+
"min_accuracy": 1.0,
|
| 482 |
+
"max_edit_distance": 0,
|
| 483 |
+
"exact_matches": 3,
|
| 484 |
+
"total_records": 3,
|
| 485 |
+
"examples": []
|
| 486 |
+
},
|
| 487 |
+
"nomor_akta_kelahiran_suami_keluarga": {
|
| 488 |
+
"exact_match_rate": 1.0,
|
| 489 |
+
"average_edit_distance": 0.0,
|
| 490 |
+
"average_normalized_accuracy": 1.0,
|
| 491 |
+
"min_accuracy": 1.0,
|
| 492 |
+
"max_edit_distance": 0,
|
| 493 |
+
"exact_matches": 3,
|
| 494 |
+
"total_records": 3,
|
| 495 |
+
"examples": []
|
| 496 |
+
},
|
| 497 |
+
"golongan_darah_suami_keluarga": {
|
| 498 |
+
"exact_match_rate": 1.0,
|
| 499 |
+
"average_edit_distance": 0.0,
|
| 500 |
+
"average_normalized_accuracy": 1.0,
|
| 501 |
+
"min_accuracy": 1.0,
|
| 502 |
+
"max_edit_distance": 0,
|
| 503 |
+
"exact_matches": 3,
|
| 504 |
+
"total_records": 3,
|
| 505 |
+
"examples": []
|
| 506 |
+
},
|
| 507 |
+
"nama_anak": {
|
| 508 |
+
"exact_match_rate": 1.0,
|
| 509 |
+
"average_edit_distance": 0.0,
|
| 510 |
+
"average_normalized_accuracy": 1.0,
|
| 511 |
+
"min_accuracy": 1.0,
|
| 512 |
+
"max_edit_distance": 0,
|
| 513 |
+
"exact_matches": 3,
|
| 514 |
+
"total_records": 3,
|
| 515 |
+
"examples": []
|
| 516 |
+
},
|
| 517 |
+
"nik_anak": {
|
| 518 |
+
"exact_match_rate": 1.0,
|
| 519 |
+
"average_edit_distance": 0.0,
|
| 520 |
+
"average_normalized_accuracy": 1.0,
|
| 521 |
+
"min_accuracy": 1.0,
|
| 522 |
+
"max_edit_distance": 0,
|
| 523 |
+
"exact_matches": 3,
|
| 524 |
+
"total_records": 3,
|
| 525 |
+
"examples": []
|
| 526 |
+
},
|
| 527 |
+
"no_jkn_anak": {
|
| 528 |
+
"exact_match_rate": 1.0,
|
| 529 |
+
"average_edit_distance": 0.0,
|
| 530 |
+
"average_normalized_accuracy": 1.0,
|
| 531 |
+
"min_accuracy": 1.0,
|
| 532 |
+
"max_edit_distance": 0,
|
| 533 |
+
"exact_matches": 3,
|
| 534 |
+
"total_records": 3,
|
| 535 |
+
"examples": []
|
| 536 |
+
},
|
| 537 |
+
"fasilitas_kesehatan_tk1_anak": {
|
| 538 |
+
"exact_match_rate": 1.0,
|
| 539 |
+
"average_edit_distance": 0.0,
|
| 540 |
+
"average_normalized_accuracy": 1.0,
|
| 541 |
+
"min_accuracy": 1.0,
|
| 542 |
+
"max_edit_distance": 0,
|
| 543 |
+
"exact_matches": 3,
|
| 544 |
+
"total_records": 3,
|
| 545 |
+
"examples": []
|
| 546 |
+
},
|
| 547 |
+
"fasilitas_kesehatan_rujukan_anak": {
|
| 548 |
+
"exact_match_rate": 1.0,
|
| 549 |
+
"average_edit_distance": 0.0,
|
| 550 |
+
"average_normalized_accuracy": 1.0,
|
| 551 |
+
"min_accuracy": 1.0,
|
| 552 |
+
"max_edit_distance": 0,
|
| 553 |
+
"exact_matches": 3,
|
| 554 |
+
"total_records": 3,
|
| 555 |
+
"examples": []
|
| 556 |
+
},
|
| 557 |
+
"tempat_tanggal_lahir_anak": {
|
| 558 |
+
"exact_match_rate": 1.0,
|
| 559 |
+
"average_edit_distance": 0.0,
|
| 560 |
+
"average_normalized_accuracy": 1.0,
|
| 561 |
+
"min_accuracy": 1.0,
|
| 562 |
+
"max_edit_distance": 0,
|
| 563 |
+
"exact_matches": 3,
|
| 564 |
+
"total_records": 3,
|
| 565 |
+
"examples": []
|
| 566 |
+
},
|
| 567 |
+
"pendidikan_anak": {
|
| 568 |
+
"exact_match_rate": 1.0,
|
| 569 |
+
"average_edit_distance": 0.0,
|
| 570 |
+
"average_normalized_accuracy": 1.0,
|
| 571 |
+
"min_accuracy": 1.0,
|
| 572 |
+
"max_edit_distance": 0,
|
| 573 |
+
"exact_matches": 3,
|
| 574 |
+
"total_records": 3,
|
| 575 |
+
"examples": []
|
| 576 |
+
},
|
| 577 |
+
"pekerjaan_anak": {
|
| 578 |
+
"exact_match_rate": 1.0,
|
| 579 |
+
"average_edit_distance": 0.0,
|
| 580 |
+
"average_normalized_accuracy": 1.0,
|
| 581 |
+
"min_accuracy": 1.0,
|
| 582 |
+
"max_edit_distance": 0,
|
| 583 |
+
"exact_matches": 3,
|
| 584 |
+
"total_records": 3,
|
| 585 |
+
"examples": []
|
| 586 |
+
},
|
| 587 |
+
"alamat_rumah_anak": {
|
| 588 |
+
"exact_match_rate": 1.0,
|
| 589 |
+
"average_edit_distance": 0.0,
|
| 590 |
+
"average_normalized_accuracy": 1.0,
|
| 591 |
+
"min_accuracy": 1.0,
|
| 592 |
+
"max_edit_distance": 0,
|
| 593 |
+
"exact_matches": 3,
|
| 594 |
+
"total_records": 3,
|
| 595 |
+
"examples": []
|
| 596 |
+
},
|
| 597 |
+
"telepon_anak": {
|
| 598 |
+
"exact_match_rate": 1.0,
|
| 599 |
+
"average_edit_distance": 0.0,
|
| 600 |
+
"average_normalized_accuracy": 1.0,
|
| 601 |
+
"min_accuracy": 1.0,
|
| 602 |
+
"max_edit_distance": 0,
|
| 603 |
+
"exact_matches": 3,
|
| 604 |
+
"total_records": 3,
|
| 605 |
+
"examples": []
|
| 606 |
+
},
|
| 607 |
+
"anak_ke_anak": {
|
| 608 |
+
"exact_match_rate": 1.0,
|
| 609 |
+
"average_edit_distance": 0.0,
|
| 610 |
+
"average_normalized_accuracy": 1.0,
|
| 611 |
+
"min_accuracy": 1.0,
|
| 612 |
+
"max_edit_distance": 0,
|
| 613 |
+
"exact_matches": 3,
|
| 614 |
+
"total_records": 3,
|
| 615 |
+
"examples": []
|
| 616 |
+
},
|
| 617 |
+
"nomor_akta_kelahiran_anak": {
|
| 618 |
+
"exact_match_rate": 1.0,
|
| 619 |
+
"average_edit_distance": 0.0,
|
| 620 |
+
"average_normalized_accuracy": 1.0,
|
| 621 |
+
"min_accuracy": 1.0,
|
| 622 |
+
"max_edit_distance": 0,
|
| 623 |
+
"exact_matches": 3,
|
| 624 |
+
"total_records": 3,
|
| 625 |
+
"examples": []
|
| 626 |
+
},
|
| 627 |
+
"golongan_darah_anak": {
|
| 628 |
+
"exact_match_rate": 1.0,
|
| 629 |
+
"average_edit_distance": 0.0,
|
| 630 |
+
"average_normalized_accuracy": 1.0,
|
| 631 |
+
"min_accuracy": 1.0,
|
| 632 |
+
"max_edit_distance": 0,
|
| 633 |
+
"exact_matches": 3,
|
| 634 |
+
"total_records": 3,
|
| 635 |
+
"examples": []
|
| 636 |
+
},
|
| 637 |
+
"usia_ibu": {
|
| 638 |
+
"exact_match_rate": 0.6666666666666666,
|
| 639 |
+
"average_edit_distance": 0.3333333333333333,
|
| 640 |
+
"average_normalized_accuracy": 0.9444444444444445,
|
| 641 |
+
"min_accuracy": 0.8333333333333334,
|
| 642 |
+
"max_edit_distance": 1,
|
| 643 |
+
"exact_matches": 2,
|
| 644 |
+
"total_records": 3,
|
| 645 |
+
"examples": [
|
| 646 |
+
{
|
| 647 |
+
"record_idx": 1,
|
| 648 |
+
"label": "35 th",
|
| 649 |
+
"predict": "35 th.",
|
| 650 |
+
"edit_distance": 1,
|
| 651 |
+
"accuracy": 0.8333333333333334
|
| 652 |
+
}
|
| 653 |
+
]
|
| 654 |
+
},
|
| 655 |
+
"kehamilan_ke": {
|
| 656 |
+
"exact_match_rate": 0.6666666666666666,
|
| 657 |
+
"average_edit_distance": 2.6666666666666665,
|
| 658 |
+
"average_normalized_accuracy": 0.8431372549019608,
|
| 659 |
+
"min_accuracy": 0.5294117647058824,
|
| 660 |
+
"max_edit_distance": 8,
|
| 661 |
+
"exact_matches": 2,
|
| 662 |
+
"total_records": 3,
|
| 663 |
+
"examples": [
|
| 664 |
+
{
|
| 665 |
+
"record_idx": 1,
|
| 666 |
+
"label": "GV P2 A2 (26 th)",
|
| 667 |
+
"predict": "6E PI A2Y (26ln).",
|
| 668 |
+
"edit_distance": 8,
|
| 669 |
+
"accuracy": 0.5294117647058824
|
| 670 |
+
}
|
| 671 |
+
]
|
| 672 |
+
},
|
| 673 |
+
"jumlah_anak_lahir_hidup": {
|
| 674 |
+
"exact_match_rate": 0.6666666666666666,
|
| 675 |
+
"average_edit_distance": 2.6666666666666665,
|
| 676 |
+
"average_normalized_accuracy": 0.8333333333333334,
|
| 677 |
+
"min_accuracy": 0.5,
|
| 678 |
+
"max_edit_distance": 8,
|
| 679 |
+
"exact_matches": 2,
|
| 680 |
+
"total_records": 3,
|
| 681 |
+
"examples": [
|
| 682 |
+
{
|
| 683 |
+
"record_idx": 1,
|
| 684 |
+
"label": "2 (17 th, 11 th)",
|
| 685 |
+
"predict": "2. (17R, 11R).",
|
| 686 |
+
"edit_distance": 8,
|
| 687 |
+
"accuracy": 0.5
|
| 688 |
+
}
|
| 689 |
+
]
|
| 690 |
+
},
|
| 691 |
+
"riwayat_keguguran": {
|
| 692 |
+
"exact_match_rate": 1.0,
|
| 693 |
+
"average_edit_distance": 0.0,
|
| 694 |
+
"average_normalized_accuracy": 1.0,
|
| 695 |
+
"min_accuracy": 1.0,
|
| 696 |
+
"max_edit_distance": 0,
|
| 697 |
+
"exact_matches": 3,
|
| 698 |
+
"total_records": 3,
|
| 699 |
+
"examples": []
|
| 700 |
+
},
|
| 701 |
+
"riwayat_penyakit_ibu": {
|
| 702 |
+
"exact_match_rate": 0.6666666666666666,
|
| 703 |
+
"average_edit_distance": 0.3333333333333333,
|
| 704 |
+
"average_normalized_accuracy": 0.9166666666666666,
|
| 705 |
+
"min_accuracy": 0.75,
|
| 706 |
+
"max_edit_distance": 1,
|
| 707 |
+
"exact_matches": 2,
|
| 708 |
+
"total_records": 3,
|
| 709 |
+
"examples": [
|
| 710 |
+
{
|
| 711 |
+
"record_idx": 1,
|
| 712 |
+
"label": "Maag",
|
| 713 |
+
"predict": "Maas",
|
| 714 |
+
"edit_distance": 1,
|
| 715 |
+
"accuracy": 0.75
|
| 716 |
+
}
|
| 717 |
+
]
|
| 718 |
+
}
|
| 719 |
+
}
|