STT-Comparison / data /inference /punctuation_results.json
danielrosehill's picture
commit
b5a4032
{
"ground_truth_file": "/home/daniel/repos/github/Long-Form-Audio-Eval/data/ground-truth/truth_1.txt",
"total_runs_evaluated": 8,
"results": [
{
"run_id": "run-1",
"provider": "local",
"model": "whisper-base",
"metrics": {
"total_punctuation": {
"reference": 688,
"hypothesis": 292,
"difference": -396
},
"punctuation_density": {
"reference_percent": 14.49,
"hypothesis_percent": 6.17
},
"mark_accuracy": {
"!": {
"reference_count": 19,
"hypothesis_count": 0,
"count_accuracy": 0
},
"\"": {
"reference_count": 45,
"hypothesis_count": 0,
"count_accuracy": 0
},
".": {
"reference_count": 263,
"hypothesis_count": 42,
"count_accuracy": 15.97
},
"-": {
"reference_count": 33,
"hypothesis_count": 10,
"count_accuracy": 30.3
},
":": {
"reference_count": 2,
"hypothesis_count": 0,
"count_accuracy": 0
},
",": {
"reference_count": 104,
"hypothesis_count": 31,
"count_accuracy": 29.81
},
"'": {
"reference_count": 203,
"hypothesis_count": 202,
"count_accuracy": 99.51
},
"?": {
"reference_count": 19,
"hypothesis_count": 7,
"count_accuracy": 36.84
}
},
"context_match_accuracy": 13.02,
"overall_punctuation_score": 21.9
}
},
{
"run_id": "run-2",
"provider": "local",
"model": "whisper-tiny",
"metrics": {
"total_punctuation": {
"reference": 688,
"hypothesis": 288,
"difference": -400
},
"punctuation_density": {
"reference_percent": 14.49,
"hypothesis_percent": 6.16
},
"mark_accuracy": {
"!": {
"reference_count": 19,
"hypothesis_count": 0,
"count_accuracy": 0
},
"\"": {
"reference_count": 45,
"hypothesis_count": 0,
"count_accuracy": 0
},
".": {
"reference_count": 263,
"hypothesis_count": 45,
"count_accuracy": 17.11
},
"-": {
"reference_count": 33,
"hypothesis_count": 5,
"count_accuracy": 15.15
},
":": {
"reference_count": 2,
"hypothesis_count": 0,
"count_accuracy": 0
},
",": {
"reference_count": 104,
"hypothesis_count": 34,
"count_accuracy": 32.69
},
"'": {
"reference_count": 203,
"hypothesis_count": 199,
"count_accuracy": 98.03
},
"?": {
"reference_count": 19,
"hypothesis_count": 5,
"count_accuracy": 26.32
}
},
"context_match_accuracy": 8.6,
"overall_punctuation_score": 18.78
}
},
{
"run_id": "run-3",
"provider": "local",
"model": "whisper-base",
"metrics": {
"total_punctuation": {
"reference": 688,
"hypothesis": 292,
"difference": -396
},
"punctuation_density": {
"reference_percent": 14.49,
"hypothesis_percent": 6.17
},
"mark_accuracy": {
"!": {
"reference_count": 19,
"hypothesis_count": 0,
"count_accuracy": 0
},
"\"": {
"reference_count": 45,
"hypothesis_count": 0,
"count_accuracy": 0
},
".": {
"reference_count": 263,
"hypothesis_count": 42,
"count_accuracy": 15.97
},
"-": {
"reference_count": 33,
"hypothesis_count": 10,
"count_accuracy": 30.3
},
":": {
"reference_count": 2,
"hypothesis_count": 0,
"count_accuracy": 0
},
",": {
"reference_count": 104,
"hypothesis_count": 31,
"count_accuracy": 29.81
},
"'": {
"reference_count": 203,
"hypothesis_count": 202,
"count_accuracy": 99.51
},
"?": {
"reference_count": 19,
"hypothesis_count": 7,
"count_accuracy": 36.84
}
},
"context_match_accuracy": 13.02,
"overall_punctuation_score": 21.9
}
},
{
"run_id": "manual-1",
"provider": "gladia",
"model": "solaria-1",
"metrics": {
"total_punctuation": {
"reference": 688,
"hypothesis": 651,
"difference": -37
},
"punctuation_density": {
"reference_percent": 14.49,
"hypothesis_percent": 13.69
},
"mark_accuracy": {
"!": {
"reference_count": 19,
"hypothesis_count": 0,
"count_accuracy": 0
},
"\"": {
"reference_count": 45,
"hypothesis_count": 0,
"count_accuracy": 0
},
".": {
"reference_count": 263,
"hypothesis_count": 180,
"count_accuracy": 68.44
},
"-": {
"reference_count": 33,
"hypothesis_count": 9,
"count_accuracy": 27.27
},
":": {
"reference_count": 2,
"hypothesis_count": 0,
"count_accuracy": 0
},
",": {
"reference_count": 104,
"hypothesis_count": 251,
"count_accuracy": 0
},
"'": {
"reference_count": 203,
"hypothesis_count": 197,
"count_accuracy": 97.04
},
"?": {
"reference_count": 19,
"hypothesis_count": 14,
"count_accuracy": 73.68
}
},
"context_match_accuracy": 22.56,
"overall_punctuation_score": 44.13
}
},
{
"run_id": "manual-2",
"provider": "deepgram",
"model": "nova-3",
"metrics": {
"total_punctuation": {
"reference": 688,
"hypothesis": 698,
"difference": 10
},
"punctuation_density": {
"reference_percent": 14.49,
"hypothesis_percent": 15.19
},
"mark_accuracy": {
"!": {
"reference_count": 19,
"hypothesis_count": 0,
"count_accuracy": 0
},
"\"": {
"reference_count": 45,
"hypothesis_count": 0,
"count_accuracy": 0
},
".": {
"reference_count": 263,
"hypothesis_count": 222,
"count_accuracy": 84.41
},
"-": {
"reference_count": 33,
"hypothesis_count": 3,
"count_accuracy": 9.09
},
":": {
"reference_count": 2,
"hypothesis_count": 0,
"count_accuracy": 0
},
",": {
"reference_count": 104,
"hypothesis_count": 265,
"count_accuracy": 0
},
"'": {
"reference_count": 203,
"hypothesis_count": 189,
"count_accuracy": 93.1
},
"?": {
"reference_count": 19,
"hypothesis_count": 19,
"count_accuracy": 100.0
}
},
"context_match_accuracy": 32.33,
"overall_punctuation_score": 51.17
}
},
{
"run_id": "manual-3",
"provider": "assemblyai",
"model": "best",
"metrics": {
"total_punctuation": {
"reference": 688,
"hypothesis": 791,
"difference": 103
},
"punctuation_density": {
"reference_percent": 14.49,
"hypothesis_percent": 16.99
},
"mark_accuracy": {
"!": {
"reference_count": 19,
"hypothesis_count": 0,
"count_accuracy": 0
},
"\"": {
"reference_count": 45,
"hypothesis_count": 0,
"count_accuracy": 0
},
".": {
"reference_count": 263,
"hypothesis_count": 218,
"count_accuracy": 82.89
},
"-": {
"reference_count": 33,
"hypothesis_count": 7,
"count_accuracy": 21.21
},
":": {
"reference_count": 2,
"hypothesis_count": 0,
"count_accuracy": 0
},
",": {
"reference_count": 104,
"hypothesis_count": 356,
"count_accuracy": 0
},
"'": {
"reference_count": 203,
"hypothesis_count": 191,
"count_accuracy": 94.09
},
"?": {
"reference_count": 19,
"hypothesis_count": 19,
"count_accuracy": 100.0
}
},
"context_match_accuracy": 33.72,
"overall_punctuation_score": 48.43
}
},
{
"run_id": "manual-4",
"provider": "speechmatics",
"model": "slam-1-global-english",
"metrics": {
"total_punctuation": {
"reference": 688,
"hypothesis": 1003,
"difference": 315
},
"punctuation_density": {
"reference_percent": 14.49,
"hypothesis_percent": 20.66
},
"mark_accuracy": {
"!": {
"reference_count": 19,
"hypothesis_count": 0,
"count_accuracy": 0
},
"\"": {
"reference_count": 45,
"hypothesis_count": 0,
"count_accuracy": 0
},
".": {
"reference_count": 263,
"hypothesis_count": 238,
"count_accuracy": 90.49
},
"-": {
"reference_count": 33,
"hypothesis_count": 4,
"count_accuracy": 12.12
},
":": {
"reference_count": 2,
"hypothesis_count": 0,
"count_accuracy": 0
},
",": {
"reference_count": 104,
"hypothesis_count": 549,
"count_accuracy": 0
},
"'": {
"reference_count": 203,
"hypothesis_count": 195,
"count_accuracy": 96.06
},
"?": {
"reference_count": 19,
"hypothesis_count": 17,
"count_accuracy": 89.47
}
},
"context_match_accuracy": 30.0,
"overall_punctuation_score": 38.23
}
},
{
"run_id": "manual-5",
"provider": "openai",
"model": "whisper-1",
"metrics": {
"total_punctuation": {
"reference": 688,
"hypothesis": 911,
"difference": 223
},
"punctuation_density": {
"reference_percent": 14.49,
"hypothesis_percent": 19.15
},
"mark_accuracy": {
"!": {
"reference_count": 19,
"hypothesis_count": 0,
"count_accuracy": 0
},
"\"": {
"reference_count": 45,
"hypothesis_count": 0,
"count_accuracy": 0
},
".": {
"reference_count": 263,
"hypothesis_count": 221,
"count_accuracy": 84.03
},
"-": {
"reference_count": 33,
"hypothesis_count": 6,
"count_accuracy": 18.18
},
":": {
"reference_count": 2,
"hypothesis_count": 0,
"count_accuracy": 0
},
",": {
"reference_count": 104,
"hypothesis_count": 471,
"count_accuracy": 0
},
"'": {
"reference_count": 203,
"hypothesis_count": 197,
"count_accuracy": 97.04
},
"?": {
"reference_count": 19,
"hypothesis_count": 16,
"count_accuracy": 84.21
}
},
"context_match_accuracy": 34.42,
"overall_punctuation_score": 44.44
}
}
]
}