Spaces:

danielrosehill
/

STT-Comparison

Running

App Files Files Community

STT-Comparison / data /inference /punctuation_results.json

danielrosehill

commit

b5a4032 about 2 months ago

raw

history blame contribute delete

12.8 kB

	{
	"ground_truth_file": "/home/daniel/repos/github/Long-Form-Audio-Eval/data/ground-truth/truth_1.txt",
	"total_runs_evaluated": 8,
	"results": [
	{
	"run_id": "run-1",
	"provider": "local",
	"model": "whisper-base",
	"metrics": {
	"total_punctuation": {
	"reference": 688,
	"hypothesis": 292,
	"difference": -396
	},
	"punctuation_density": {
	"reference_percent": 14.49,
	"hypothesis_percent": 6.17
	},
	"mark_accuracy": {
	"!": {
	"reference_count": 19,
	"hypothesis_count": 0,
	"count_accuracy": 0
	},
	"\"": {
	"reference_count": 45,
	"hypothesis_count": 0,
	"count_accuracy": 0
	},
	".": {
	"reference_count": 263,
	"hypothesis_count": 42,
	"count_accuracy": 15.97
	},
	"-": {
	"reference_count": 33,
	"hypothesis_count": 10,
	"count_accuracy": 30.3
	},
	":": {
	"reference_count": 2,
	"hypothesis_count": 0,
	"count_accuracy": 0
	},
	",": {
	"reference_count": 104,
	"hypothesis_count": 31,
	"count_accuracy": 29.81
	},
	"'": {
	"reference_count": 203,
	"hypothesis_count": 202,
	"count_accuracy": 99.51
	},
	"?": {
	"reference_count": 19,
	"hypothesis_count": 7,
	"count_accuracy": 36.84
	}
	},
	"context_match_accuracy": 13.02,
	"overall_punctuation_score": 21.9
	}
	},
	{
	"run_id": "run-2",
	"provider": "local",
	"model": "whisper-tiny",
	"metrics": {
	"total_punctuation": {
	"reference": 688,
	"hypothesis": 288,
	"difference": -400
	},
	"punctuation_density": {
	"reference_percent": 14.49,
	"hypothesis_percent": 6.16
	},
	"mark_accuracy": {
	"!": {
	"reference_count": 19,
	"hypothesis_count": 0,
	"count_accuracy": 0
	},
	"\"": {
	"reference_count": 45,
	"hypothesis_count": 0,
	"count_accuracy": 0
	},
	".": {
	"reference_count": 263,
	"hypothesis_count": 45,
	"count_accuracy": 17.11
	},
	"-": {
	"reference_count": 33,
	"hypothesis_count": 5,
	"count_accuracy": 15.15
	},
	":": {
	"reference_count": 2,
	"hypothesis_count": 0,
	"count_accuracy": 0
	},
	",": {
	"reference_count": 104,
	"hypothesis_count": 34,
	"count_accuracy": 32.69
	},
	"'": {
	"reference_count": 203,
	"hypothesis_count": 199,
	"count_accuracy": 98.03
	},
	"?": {
	"reference_count": 19,
	"hypothesis_count": 5,
	"count_accuracy": 26.32
	}
	},
	"context_match_accuracy": 8.6,
	"overall_punctuation_score": 18.78
	}
	},
	{
	"run_id": "run-3",
	"provider": "local",
	"model": "whisper-base",
	"metrics": {
	"total_punctuation": {
	"reference": 688,
	"hypothesis": 292,
	"difference": -396
	},
	"punctuation_density": {
	"reference_percent": 14.49,
	"hypothesis_percent": 6.17
	},
	"mark_accuracy": {
	"!": {
	"reference_count": 19,
	"hypothesis_count": 0,
	"count_accuracy": 0
	},
	"\"": {
	"reference_count": 45,
	"hypothesis_count": 0,
	"count_accuracy": 0
	},
	".": {
	"reference_count": 263,
	"hypothesis_count": 42,
	"count_accuracy": 15.97
	},
	"-": {
	"reference_count": 33,
	"hypothesis_count": 10,
	"count_accuracy": 30.3
	},
	":": {
	"reference_count": 2,
	"hypothesis_count": 0,
	"count_accuracy": 0
	},
	",": {
	"reference_count": 104,
	"hypothesis_count": 31,
	"count_accuracy": 29.81
	},
	"'": {
	"reference_count": 203,
	"hypothesis_count": 202,
	"count_accuracy": 99.51
	},
	"?": {
	"reference_count": 19,
	"hypothesis_count": 7,
	"count_accuracy": 36.84
	}
	},
	"context_match_accuracy": 13.02,
	"overall_punctuation_score": 21.9
	}
	},
	{
	"run_id": "manual-1",
	"provider": "gladia",
	"model": "solaria-1",
	"metrics": {
	"total_punctuation": {
	"reference": 688,
	"hypothesis": 651,
	"difference": -37
	},
	"punctuation_density": {
	"reference_percent": 14.49,
	"hypothesis_percent": 13.69
	},
	"mark_accuracy": {
	"!": {
	"reference_count": 19,
	"hypothesis_count": 0,
	"count_accuracy": 0
	},
	"\"": {
	"reference_count": 45,
	"hypothesis_count": 0,
	"count_accuracy": 0
	},
	".": {
	"reference_count": 263,
	"hypothesis_count": 180,
	"count_accuracy": 68.44
	},
	"-": {
	"reference_count": 33,
	"hypothesis_count": 9,
	"count_accuracy": 27.27
	},
	":": {
	"reference_count": 2,
	"hypothesis_count": 0,
	"count_accuracy": 0
	},
	",": {
	"reference_count": 104,
	"hypothesis_count": 251,
	"count_accuracy": 0
	},
	"'": {
	"reference_count": 203,
	"hypothesis_count": 197,
	"count_accuracy": 97.04
	},
	"?": {
	"reference_count": 19,
	"hypothesis_count": 14,
	"count_accuracy": 73.68
	}
	},
	"context_match_accuracy": 22.56,
	"overall_punctuation_score": 44.13
	}
	},
	{
	"run_id": "manual-2",
	"provider": "deepgram",
	"model": "nova-3",
	"metrics": {
	"total_punctuation": {
	"reference": 688,
	"hypothesis": 698,
	"difference": 10
	},
	"punctuation_density": {
	"reference_percent": 14.49,
	"hypothesis_percent": 15.19
	},
	"mark_accuracy": {
	"!": {
	"reference_count": 19,
	"hypothesis_count": 0,
	"count_accuracy": 0
	},
	"\"": {
	"reference_count": 45,
	"hypothesis_count": 0,
	"count_accuracy": 0
	},
	".": {
	"reference_count": 263,
	"hypothesis_count": 222,
	"count_accuracy": 84.41
	},
	"-": {
	"reference_count": 33,
	"hypothesis_count": 3,
	"count_accuracy": 9.09
	},
	":": {
	"reference_count": 2,
	"hypothesis_count": 0,
	"count_accuracy": 0
	},
	",": {
	"reference_count": 104,
	"hypothesis_count": 265,
	"count_accuracy": 0
	},
	"'": {
	"reference_count": 203,
	"hypothesis_count": 189,
	"count_accuracy": 93.1
	},
	"?": {
	"reference_count": 19,
	"hypothesis_count": 19,
	"count_accuracy": 100.0
	}
	},
	"context_match_accuracy": 32.33,
	"overall_punctuation_score": 51.17
	}
	},
	{
	"run_id": "manual-3",
	"provider": "assemblyai",
	"model": "best",
	"metrics": {
	"total_punctuation": {
	"reference": 688,
	"hypothesis": 791,
	"difference": 103
	},
	"punctuation_density": {
	"reference_percent": 14.49,
	"hypothesis_percent": 16.99
	},
	"mark_accuracy": {
	"!": {
	"reference_count": 19,
	"hypothesis_count": 0,
	"count_accuracy": 0
	},
	"\"": {
	"reference_count": 45,
	"hypothesis_count": 0,
	"count_accuracy": 0
	},
	".": {
	"reference_count": 263,
	"hypothesis_count": 218,
	"count_accuracy": 82.89
	},
	"-": {
	"reference_count": 33,
	"hypothesis_count": 7,
	"count_accuracy": 21.21
	},
	":": {
	"reference_count": 2,
	"hypothesis_count": 0,
	"count_accuracy": 0
	},
	",": {
	"reference_count": 104,
	"hypothesis_count": 356,
	"count_accuracy": 0
	},
	"'": {
	"reference_count": 203,
	"hypothesis_count": 191,
	"count_accuracy": 94.09
	},
	"?": {
	"reference_count": 19,
	"hypothesis_count": 19,
	"count_accuracy": 100.0
	}
	},
	"context_match_accuracy": 33.72,
	"overall_punctuation_score": 48.43
	}
	},
	{
	"run_id": "manual-4",
	"provider": "speechmatics",
	"model": "slam-1-global-english",
	"metrics": {
	"total_punctuation": {
	"reference": 688,
	"hypothesis": 1003,
	"difference": 315
	},
	"punctuation_density": {
	"reference_percent": 14.49,
	"hypothesis_percent": 20.66
	},
	"mark_accuracy": {
	"!": {
	"reference_count": 19,
	"hypothesis_count": 0,
	"count_accuracy": 0
	},
	"\"": {
	"reference_count": 45,
	"hypothesis_count": 0,
	"count_accuracy": 0
	},
	".": {
	"reference_count": 263,
	"hypothesis_count": 238,
	"count_accuracy": 90.49
	},
	"-": {
	"reference_count": 33,
	"hypothesis_count": 4,
	"count_accuracy": 12.12
	},
	":": {
	"reference_count": 2,
	"hypothesis_count": 0,
	"count_accuracy": 0
	},
	",": {
	"reference_count": 104,
	"hypothesis_count": 549,
	"count_accuracy": 0
	},
	"'": {
	"reference_count": 203,
	"hypothesis_count": 195,
	"count_accuracy": 96.06
	},
	"?": {
	"reference_count": 19,
	"hypothesis_count": 17,
	"count_accuracy": 89.47
	}
	},
	"context_match_accuracy": 30.0,
	"overall_punctuation_score": 38.23
	}
	},
	{
	"run_id": "manual-5",
	"provider": "openai",
	"model": "whisper-1",
	"metrics": {
	"total_punctuation": {
	"reference": 688,
	"hypothesis": 911,
	"difference": 223
	},
	"punctuation_density": {
	"reference_percent": 14.49,
	"hypothesis_percent": 19.15
	},
	"mark_accuracy": {
	"!": {
	"reference_count": 19,
	"hypothesis_count": 0,
	"count_accuracy": 0
	},
	"\"": {
	"reference_count": 45,
	"hypothesis_count": 0,
	"count_accuracy": 0
	},
	".": {
	"reference_count": 263,
	"hypothesis_count": 221,
	"count_accuracy": 84.03
	},
	"-": {
	"reference_count": 33,
	"hypothesis_count": 6,
	"count_accuracy": 18.18
	},
	":": {
	"reference_count": 2,
	"hypothesis_count": 0,
	"count_accuracy": 0
	},
	",": {
	"reference_count": 104,
	"hypothesis_count": 471,
	"count_accuracy": 0
	},
	"'": {
	"reference_count": 203,
	"hypothesis_count": 197,
	"count_accuracy": 97.04
	},
	"?": {
	"reference_count": 19,
	"hypothesis_count": 16,
	"count_accuracy": 84.21
	}
	},
	"context_match_accuracy": 34.42,
	"overall_punctuation_score": 44.44
	}
	}
	]
	}