Spaces:
Runtime error
Runtime error
File size: 2,678 Bytes
69dc570 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 | import math
from collections import Counter
def flatten_ocr_json(payload: dict[str, str]) -> str:
parts = [f"{key}\t{payload[key]}" for key in sorted(payload, key=_sort_key)]
return "\n".join(parts)
def _sort_key(value: str):
head, _, tail = value.partition("-")
try:
return (int(head), int(tail) if tail else -1, value)
except ValueError:
return (math.inf, math.inf, value)
def levenshtein_similarity(reference: str, prediction: str) -> float:
if reference == prediction:
return 100.0
if not reference and not prediction:
return 100.0
if not reference or not prediction:
return 0.0
prev = list(range(len(prediction) + 1))
for i, ref_char in enumerate(reference, start=1):
current = [i]
for j, pred_char in enumerate(prediction, start=1):
substitution_cost = 0 if ref_char == pred_char else 1
current.append(
min(
prev[j] + 1,
current[j - 1] + 1,
prev[j - 1] + substitution_cost,
)
)
prev = current
distance = prev[-1]
return max(0.0, (1 - (distance / max(len(reference), len(prediction)))) * 100.0)
def bleu_score(reference: str, prediction: str, max_order: int = 4) -> float:
ref_tokens = reference.split()
pred_tokens = prediction.split()
if not ref_tokens and not pred_tokens:
return 100.0
if not ref_tokens or not pred_tokens:
return 0.0
precisions = []
for order in range(1, max_order + 1):
ref_counts = _ngram_counts(ref_tokens, order)
pred_counts = _ngram_counts(pred_tokens, order)
overlap = sum(min(count, ref_counts[ngram]) for ngram, count in pred_counts.items())
total = max(sum(pred_counts.values()), 1)
precisions.append((overlap + 1) / (total + 1))
geo_mean = math.exp(sum(math.log(p) for p in precisions) / max_order)
bp = 1.0 if len(pred_tokens) > len(ref_tokens) else math.exp(1 - (len(ref_tokens) / len(pred_tokens)))
return geo_mean * bp * 100.0
def paired_ocr_metrics(reference: dict[str, str], prediction: dict[str, str]) -> dict[str, float]:
reference_text = flatten_ocr_json(reference)
prediction_text = flatten_ocr_json(prediction)
return {
"levenshtein": levenshtein_similarity(reference_text, prediction_text),
"bleu": bleu_score(reference_text, prediction_text),
}
def _ngram_counts(tokens: list[str], order: int) -> Counter:
if len(tokens) < order:
return Counter()
return Counter(tuple(tokens[i : i + order]) for i in range(len(tokens) - order + 1))
|