{
  "ground_truth_file": "/home/daniel/repos/github/Long-Form-Audio-Eval/data/ground-truth/truth_1.txt",
  "total_runs_evaluated": 8,
  "results": [
    {
      "run_id": "run-1",
      "provider": "local",
      "model": "whisper-base",
      "metrics": {
        "total_punctuation": {
          "reference": 688,
          "hypothesis": 292,
          "difference": -396
        },
        "punctuation_density": {
          "reference_percent": 14.49,
          "hypothesis_percent": 6.17
        },
        "mark_accuracy": {
          "!": {
            "reference_count": 19,
            "hypothesis_count": 0,
            "count_accuracy": 0
          },
          "\"": {
            "reference_count": 45,
            "hypothesis_count": 0,
            "count_accuracy": 0
          },
          ".": {
            "reference_count": 263,
            "hypothesis_count": 42,
            "count_accuracy": 15.97
          },
          "-": {
            "reference_count": 33,
            "hypothesis_count": 10,
            "count_accuracy": 30.3
          },
          ":": {
            "reference_count": 2,
            "hypothesis_count": 0,
            "count_accuracy": 0
          },
          ",": {
            "reference_count": 104,
            "hypothesis_count": 31,
            "count_accuracy": 29.81
          },
          "'": {
            "reference_count": 203,
            "hypothesis_count": 202,
            "count_accuracy": 99.51
          },
          "?": {
            "reference_count": 19,
            "hypothesis_count": 7,
            "count_accuracy": 36.84
          }
        },
        "context_match_accuracy": 13.02,
        "overall_punctuation_score": 21.9
      }
    },
    {
      "run_id": "run-2",
      "provider": "local",
      "model": "whisper-tiny",
      "metrics": {
        "total_punctuation": {
          "reference": 688,
          "hypothesis": 288,
          "difference": -400
        },
        "punctuation_density": {
          "reference_percent": 14.49,
          "hypothesis_percent": 6.16
        },
        "mark_accuracy": {
          "!": {
            "reference_count": 19,
            "hypothesis_count": 0,
            "count_accuracy": 0
          },
          "\"": {
            "reference_count": 45,
            "hypothesis_count": 0,
            "count_accuracy": 0
          },
          ".": {
            "reference_count": 263,
            "hypothesis_count": 45,
            "count_accuracy": 17.11
          },
          "-": {
            "reference_count": 33,
            "hypothesis_count": 5,
            "count_accuracy": 15.15
          },
          ":": {
            "reference_count": 2,
            "hypothesis_count": 0,
            "count_accuracy": 0
          },
          ",": {
            "reference_count": 104,
            "hypothesis_count": 34,
            "count_accuracy": 32.69
          },
          "'": {
            "reference_count": 203,
            "hypothesis_count": 199,
            "count_accuracy": 98.03
          },
          "?": {
            "reference_count": 19,
            "hypothesis_count": 5,
            "count_accuracy": 26.32
          }
        },
        "context_match_accuracy": 8.6,
        "overall_punctuation_score": 18.78
      }
    },
    {
      "run_id": "run-3",
      "provider": "local",
      "model": "whisper-base",
      "metrics": {
        "total_punctuation": {
          "reference": 688,
          "hypothesis": 292,
          "difference": -396
        },
        "punctuation_density": {
          "reference_percent": 14.49,
          "hypothesis_percent": 6.17
        },
        "mark_accuracy": {
          "!": {
            "reference_count": 19,
            "hypothesis_count": 0,
            "count_accuracy": 0
          },
          "\"": {
            "reference_count": 45,
            "hypothesis_count": 0,
            "count_accuracy": 0
          },
          ".": {
            "reference_count": 263,
            "hypothesis_count": 42,
            "count_accuracy": 15.97
          },
          "-": {
            "reference_count": 33,
            "hypothesis_count": 10,
            "count_accuracy": 30.3
          },
          ":": {
            "reference_count": 2,
            "hypothesis_count": 0,
            "count_accuracy": 0
          },
          ",": {
            "reference_count": 104,
            "hypothesis_count": 31,
            "count_accuracy": 29.81
          },
          "'": {
            "reference_count": 203,
            "hypothesis_count": 202,
            "count_accuracy": 99.51
          },
          "?": {
            "reference_count": 19,
            "hypothesis_count": 7,
            "count_accuracy": 36.84
          }
        },
        "context_match_accuracy": 13.02,
        "overall_punctuation_score": 21.9
      }
    },
    {
      "run_id": "manual-1",
      "provider": "gladia",
      "model": "solaria-1",
      "metrics": {
        "total_punctuation": {
          "reference": 688,
          "hypothesis": 651,
          "difference": -37
        },
        "punctuation_density": {
          "reference_percent": 14.49,
          "hypothesis_percent": 13.69
        },
        "mark_accuracy": {
          "!": {
            "reference_count": 19,
            "hypothesis_count": 0,
            "count_accuracy": 0
          },
          "\"": {
            "reference_count": 45,
            "hypothesis_count": 0,
            "count_accuracy": 0
          },
          ".": {
            "reference_count": 263,
            "hypothesis_count": 180,
            "count_accuracy": 68.44
          },
          "-": {
            "reference_count": 33,
            "hypothesis_count": 9,
            "count_accuracy": 27.27
          },
          ":": {
            "reference_count": 2,
            "hypothesis_count": 0,
            "count_accuracy": 0
          },
          ",": {
            "reference_count": 104,
            "hypothesis_count": 251,
            "count_accuracy": 0
          },
          "'": {
            "reference_count": 203,
            "hypothesis_count": 197,
            "count_accuracy": 97.04
          },
          "?": {
            "reference_count": 19,
            "hypothesis_count": 14,
            "count_accuracy": 73.68
          }
        },
        "context_match_accuracy": 22.56,
        "overall_punctuation_score": 44.13
      }
    },
    {
      "run_id": "manual-2",
      "provider": "deepgram",
      "model": "nova-3",
      "metrics": {
        "total_punctuation": {
          "reference": 688,
          "hypothesis": 698,
          "difference": 10
        },
        "punctuation_density": {
          "reference_percent": 14.49,
          "hypothesis_percent": 15.19
        },
        "mark_accuracy": {
          "!": {
            "reference_count": 19,
            "hypothesis_count": 0,
            "count_accuracy": 0
          },
          "\"": {
            "reference_count": 45,
            "hypothesis_count": 0,
            "count_accuracy": 0
          },
          ".": {
            "reference_count": 263,
            "hypothesis_count": 222,
            "count_accuracy": 84.41
          },
          "-": {
            "reference_count": 33,
            "hypothesis_count": 3,
            "count_accuracy": 9.09
          },
          ":": {
            "reference_count": 2,
            "hypothesis_count": 0,
            "count_accuracy": 0
          },
          ",": {
            "reference_count": 104,
            "hypothesis_count": 265,
            "count_accuracy": 0
          },
          "'": {
            "reference_count": 203,
            "hypothesis_count": 189,
            "count_accuracy": 93.1
          },
          "?": {
            "reference_count": 19,
            "hypothesis_count": 19,
            "count_accuracy": 100.0
          }
        },
        "context_match_accuracy": 32.33,
        "overall_punctuation_score": 51.17
      }
    },
    {
      "run_id": "manual-3",
      "provider": "assemblyai",
      "model": "best",
      "metrics": {
        "total_punctuation": {
          "reference": 688,
          "hypothesis": 791,
          "difference": 103
        },
        "punctuation_density": {
          "reference_percent": 14.49,
          "hypothesis_percent": 16.99
        },
        "mark_accuracy": {
          "!": {
            "reference_count": 19,
            "hypothesis_count": 0,
            "count_accuracy": 0
          },
          "\"": {
            "reference_count": 45,
            "hypothesis_count": 0,
            "count_accuracy": 0
          },
          ".": {
            "reference_count": 263,
            "hypothesis_count": 218,
            "count_accuracy": 82.89
          },
          "-": {
            "reference_count": 33,
            "hypothesis_count": 7,
            "count_accuracy": 21.21
          },
          ":": {
            "reference_count": 2,
            "hypothesis_count": 0,
            "count_accuracy": 0
          },
          ",": {
            "reference_count": 104,
            "hypothesis_count": 356,
            "count_accuracy": 0
          },
          "'": {
            "reference_count": 203,
            "hypothesis_count": 191,
            "count_accuracy": 94.09
          },
          "?": {
            "reference_count": 19,
            "hypothesis_count": 19,
            "count_accuracy": 100.0
          }
        },
        "context_match_accuracy": 33.72,
        "overall_punctuation_score": 48.43
      }
    },
    {
      "run_id": "manual-4",
      "provider": "speechmatics",
      "model": "slam-1-global-english",
      "metrics": {
        "total_punctuation": {
          "reference": 688,
          "hypothesis": 1003,
          "difference": 315
        },
        "punctuation_density": {
          "reference_percent": 14.49,
          "hypothesis_percent": 20.66
        },
        "mark_accuracy": {
          "!": {
            "reference_count": 19,
            "hypothesis_count": 0,
            "count_accuracy": 0
          },
          "\"": {
            "reference_count": 45,
            "hypothesis_count": 0,
            "count_accuracy": 0
          },
          ".": {
            "reference_count": 263,
            "hypothesis_count": 238,
            "count_accuracy": 90.49
          },
          "-": {
            "reference_count": 33,
            "hypothesis_count": 4,
            "count_accuracy": 12.12
          },
          ":": {
            "reference_count": 2,
            "hypothesis_count": 0,
            "count_accuracy": 0
          },
          ",": {
            "reference_count": 104,
            "hypothesis_count": 549,
            "count_accuracy": 0
          },
          "'": {
            "reference_count": 203,
            "hypothesis_count": 195,
            "count_accuracy": 96.06
          },
          "?": {
            "reference_count": 19,
            "hypothesis_count": 17,
            "count_accuracy": 89.47
          }
        },
        "context_match_accuracy": 30.0,
        "overall_punctuation_score": 38.23
      }
    },
    {
      "run_id": "manual-5",
      "provider": "openai",
      "model": "whisper-1",
      "metrics": {
        "total_punctuation": {
          "reference": 688,
          "hypothesis": 911,
          "difference": 223
        },
        "punctuation_density": {
          "reference_percent": 14.49,
          "hypothesis_percent": 19.15
        },
        "mark_accuracy": {
          "!": {
            "reference_count": 19,
            "hypothesis_count": 0,
            "count_accuracy": 0
          },
          "\"": {
            "reference_count": 45,
            "hypothesis_count": 0,
            "count_accuracy": 0
          },
          ".": {
            "reference_count": 263,
            "hypothesis_count": 221,
            "count_accuracy": 84.03
          },
          "-": {
            "reference_count": 33,
            "hypothesis_count": 6,
            "count_accuracy": 18.18
          },
          ":": {
            "reference_count": 2,
            "hypothesis_count": 0,
            "count_accuracy": 0
          },
          ",": {
            "reference_count": 104,
            "hypothesis_count": 471,
            "count_accuracy": 0
          },
          "'": {
            "reference_count": 203,
            "hypothesis_count": 197,
            "count_accuracy": 97.04
          },
          "?": {
            "reference_count": 19,
            "hypothesis_count": 16,
            "count_accuracy": 84.21
          }
        },
        "context_match_accuracy": 34.42,
        "overall_punctuation_score": 44.44
      }
    }
  ]
}