Spaces:
Runtime error
Runtime error
| import math | |
| from collections import Counter | |
| def flatten_ocr_json(payload: dict[str, str]) -> str: | |
| parts = [f"{key}\t{payload[key]}" for key in sorted(payload, key=_sort_key)] | |
| return "\n".join(parts) | |
| def _sort_key(value: str): | |
| head, _, tail = value.partition("-") | |
| try: | |
| return (int(head), int(tail) if tail else -1, value) | |
| except ValueError: | |
| return (math.inf, math.inf, value) | |
| def levenshtein_similarity(reference: str, prediction: str) -> float: | |
| if reference == prediction: | |
| return 100.0 | |
| if not reference and not prediction: | |
| return 100.0 | |
| if not reference or not prediction: | |
| return 0.0 | |
| prev = list(range(len(prediction) + 1)) | |
| for i, ref_char in enumerate(reference, start=1): | |
| current = [i] | |
| for j, pred_char in enumerate(prediction, start=1): | |
| substitution_cost = 0 if ref_char == pred_char else 1 | |
| current.append( | |
| min( | |
| prev[j] + 1, | |
| current[j - 1] + 1, | |
| prev[j - 1] + substitution_cost, | |
| ) | |
| ) | |
| prev = current | |
| distance = prev[-1] | |
| return max(0.0, (1 - (distance / max(len(reference), len(prediction)))) * 100.0) | |
| def bleu_score(reference: str, prediction: str, max_order: int = 4) -> float: | |
| ref_tokens = reference.split() | |
| pred_tokens = prediction.split() | |
| if not ref_tokens and not pred_tokens: | |
| return 100.0 | |
| if not ref_tokens or not pred_tokens: | |
| return 0.0 | |
| precisions = [] | |
| for order in range(1, max_order + 1): | |
| ref_counts = _ngram_counts(ref_tokens, order) | |
| pred_counts = _ngram_counts(pred_tokens, order) | |
| overlap = sum(min(count, ref_counts[ngram]) for ngram, count in pred_counts.items()) | |
| total = max(sum(pred_counts.values()), 1) | |
| precisions.append((overlap + 1) / (total + 1)) | |
| geo_mean = math.exp(sum(math.log(p) for p in precisions) / max_order) | |
| bp = 1.0 if len(pred_tokens) > len(ref_tokens) else math.exp(1 - (len(ref_tokens) / len(pred_tokens))) | |
| return geo_mean * bp * 100.0 | |
| def paired_ocr_metrics(reference: dict[str, str], prediction: dict[str, str]) -> dict[str, float]: | |
| reference_text = flatten_ocr_json(reference) | |
| prediction_text = flatten_ocr_json(prediction) | |
| return { | |
| "levenshtein": levenshtein_similarity(reference_text, prediction_text), | |
| "bleu": bleu_score(reference_text, prediction_text), | |
| } | |
| def _ngram_counts(tokens: list[str], order: int) -> Counter: | |
| if len(tokens) < order: | |
| return Counter() | |
| return Counter(tuple(tokens[i : i + order]) for i in range(len(tokens) - order + 1)) | |