| { | |
| "token_analysis": { | |
| "recommended_threshold": 20, | |
| "best_f1": 0.805, | |
| "best_precision": 0.733, | |
| "best_recall": 0.892, | |
| "correlation_tokens_hhem": -0.54, | |
| "current_threshold": 30, | |
| "total_samples": 50, | |
| "hallucination_rate": 0.26, | |
| "mean_tokens": 181.1, | |
| "median_tokens": 128.0, | |
| "all_thresholds": [ | |
| { | |
| "precision": 0.733, | |
| "recall": 0.892, | |
| "f1": 0.805, | |
| "tp": 33, | |
| "fp": 12, | |
| "tn": 1, | |
| "fn": 4, | |
| "n_passed": 45, | |
| "n_refused": 5, | |
| "threshold": 20, | |
| "mean_hhem_above": 0.68 | |
| }, | |
| { | |
| "precision": 0.732, | |
| "recall": 0.811, | |
| "f1": 0.769, | |
| "tp": 30, | |
| "fp": 11, | |
| "tn": 2, | |
| "fn": 7, | |
| "n_passed": 41, | |
| "n_refused": 9, | |
| "threshold": 30, | |
| "mean_hhem_above": 0.675 | |
| }, | |
| { | |
| "precision": 0.718, | |
| "recall": 0.757, | |
| "f1": 0.737, | |
| "tp": 28, | |
| "fp": 11, | |
| "tn": 2, | |
| "fn": 9, | |
| "n_passed": 39, | |
| "n_refused": 11, | |
| "threshold": 40, | |
| "mean_hhem_above": 0.668 | |
| }, | |
| { | |
| "precision": 0.711, | |
| "recall": 0.73, | |
| "f1": 0.72, | |
| "tp": 27, | |
| "fp": 11, | |
| "tn": 2, | |
| "fn": 10, | |
| "n_passed": 38, | |
| "n_refused": 12, | |
| "threshold": 50, | |
| "mean_hhem_above": 0.666 | |
| }, | |
| { | |
| "precision": 0.694, | |
| "recall": 0.676, | |
| "f1": 0.685, | |
| "tp": 25, | |
| "fp": 11, | |
| "tn": 2, | |
| "fn": 12, | |
| "n_passed": 36, | |
| "n_refused": 14, | |
| "threshold": 60, | |
| "mean_hhem_above": 0.655 | |
| }, | |
| { | |
| "precision": 0.686, | |
| "recall": 0.649, | |
| "f1": 0.667, | |
| "tp": 24, | |
| "fp": 11, | |
| "tn": 2, | |
| "fn": 13, | |
| "n_passed": 35, | |
| "n_refused": 15, | |
| "threshold": 70, | |
| "mean_hhem_above": 0.646 | |
| }, | |
| { | |
| "precision": 0.667, | |
| "recall": 0.595, | |
| "f1": 0.629, | |
| "tp": 22, | |
| "fp": 11, | |
| "tn": 2, | |
| "fn": 15, | |
| "n_passed": 33, | |
| "n_refused": 17, | |
| "threshold": 80, | |
| "mean_hhem_above": 0.639 | |
| }, | |
| { | |
| "precision": 0.667, | |
| "recall": 0.541, | |
| "f1": 0.597, | |
| "tp": 20, | |
| "fp": 10, | |
| "tn": 3, | |
| "fn": 17, | |
| "n_passed": 30, | |
| "n_refused": 20, | |
| "threshold": 90, | |
| "mean_hhem_above": 0.642 | |
| }, | |
| { | |
| "precision": 0.655, | |
| "recall": 0.514, | |
| "f1": 0.576, | |
| "tp": 19, | |
| "fp": 10, | |
| "tn": 3, | |
| "fn": 18, | |
| "n_passed": 29, | |
| "n_refused": 21, | |
| "threshold": 100, | |
| "mean_hhem_above": 0.631 | |
| }, | |
| { | |
| "precision": 0.615, | |
| "recall": 0.432, | |
| "f1": 0.508, | |
| "tp": 16, | |
| "fp": 10, | |
| "tn": 3, | |
| "fn": 21, | |
| "n_passed": 26, | |
| "n_refused": 24, | |
| "threshold": 110, | |
| "mean_hhem_above": 0.616 | |
| }, | |
| { | |
| "precision": 0.615, | |
| "recall": 0.432, | |
| "f1": 0.508, | |
| "tp": 16, | |
| "fp": 10, | |
| "tn": 3, | |
| "fn": 21, | |
| "n_passed": 26, | |
| "n_refused": 24, | |
| "threshold": 120, | |
| "mean_hhem_above": 0.616 | |
| }, | |
| { | |
| "precision": 0.6, | |
| "recall": 0.405, | |
| "f1": 0.484, | |
| "tp": 15, | |
| "fp": 10, | |
| "tn": 3, | |
| "fn": 22, | |
| "n_passed": 25, | |
| "n_refused": 25, | |
| "threshold": 130, | |
| "mean_hhem_above": 0.604 | |
| }, | |
| { | |
| "precision": 0.583, | |
| "recall": 0.378, | |
| "f1": 0.459, | |
| "tp": 14, | |
| "fp": 10, | |
| "tn": 3, | |
| "fn": 23, | |
| "n_passed": 24, | |
| "n_refused": 26, | |
| "threshold": 140, | |
| "mean_hhem_above": 0.593 | |
| }, | |
| { | |
| "precision": 0.571, | |
| "recall": 0.324, | |
| "f1": 0.414, | |
| "tp": 12, | |
| "fp": 9, | |
| "tn": 4, | |
| "fn": 25, | |
| "n_passed": 21, | |
| "n_refused": 29, | |
| "threshold": 150, | |
| "mean_hhem_above": 0.589 | |
| }, | |
| { | |
| "precision": 0.5, | |
| "recall": 0.243, | |
| "f1": 0.327, | |
| "tp": 9, | |
| "fp": 9, | |
| "tn": 4, | |
| "fn": 28, | |
| "n_passed": 18, | |
| "n_refused": 32, | |
| "threshold": 160, | |
| "mean_hhem_above": 0.534 | |
| }, | |
| { | |
| "precision": 0.471, | |
| "recall": 0.216, | |
| "f1": 0.296, | |
| "tp": 8, | |
| "fp": 9, | |
| "tn": 4, | |
| "fn": 29, | |
| "n_passed": 17, | |
| "n_refused": 33, | |
| "threshold": 170, | |
| "mean_hhem_above": 0.508 | |
| }, | |
| { | |
| "precision": 0.438, | |
| "recall": 0.189, | |
| "f1": 0.264, | |
| "tp": 7, | |
| "fp": 9, | |
| "tn": 4, | |
| "fn": 30, | |
| "n_passed": 16, | |
| "n_refused": 34, | |
| "threshold": 180, | |
| "mean_hhem_above": 0.481 | |
| }, | |
| { | |
| "precision": 0.438, | |
| "recall": 0.189, | |
| "f1": 0.264, | |
| "tp": 7, | |
| "fp": 9, | |
| "tn": 4, | |
| "fn": 30, | |
| "n_passed": 16, | |
| "n_refused": 34, | |
| "threshold": 190, | |
| "mean_hhem_above": 0.481 | |
| } | |
| ] | |
| }, | |
| "chunk_analysis": { | |
| "recommended_threshold": 1, | |
| "best_f1": 0.851, | |
| "best_precision": 0.74, | |
| "best_recall": 1.0, | |
| "correlation_chunks_hhem": -0.514, | |
| "current_threshold": 1, | |
| "mean_chunks": 2.06, | |
| "all_thresholds": [ | |
| { | |
| "precision": 0.74, | |
| "recall": 1.0, | |
| "f1": 0.851, | |
| "tp": 37, | |
| "fp": 13, | |
| "tn": 0, | |
| "fn": 0, | |
| "n_passed": 50, | |
| "n_refused": 0, | |
| "threshold": 1, | |
| "mean_hhem_above": 0.679 | |
| }, | |
| { | |
| "precision": 0.522, | |
| "recall": 0.324, | |
| "f1": 0.4, | |
| "tp": 12, | |
| "fp": 11, | |
| "tn": 2, | |
| "fn": 25, | |
| "n_passed": 23, | |
| "n_refused": 27, | |
| "threshold": 2, | |
| "mean_hhem_above": 0.539 | |
| }, | |
| { | |
| "precision": 0.308, | |
| "recall": 0.108, | |
| "f1": 0.16, | |
| "tp": 4, | |
| "fp": 9, | |
| "tn": 4, | |
| "fn": 33, | |
| "n_passed": 13, | |
| "n_refused": 37, | |
| "threshold": 3, | |
| "mean_hhem_above": 0.398 | |
| }, | |
| { | |
| "precision": 0.375, | |
| "recall": 0.081, | |
| "f1": 0.133, | |
| "tp": 3, | |
| "fp": 5, | |
| "tn": 8, | |
| "fn": 34, | |
| "n_passed": 8, | |
| "n_refused": 42, | |
| "threshold": 4, | |
| "mean_hhem_above": 0.427 | |
| }, | |
| { | |
| "precision": 0.25, | |
| "recall": 0.027, | |
| "f1": 0.049, | |
| "tp": 1, | |
| "fp": 3, | |
| "tn": 10, | |
| "fn": 36, | |
| "n_passed": 4, | |
| "n_refused": 46, | |
| "threshold": 5, | |
| "mean_hhem_above": 0.402 | |
| } | |
| ] | |
| }, | |
| "combined_analysis": { | |
| "recommended_token_threshold": 20, | |
| "recommended_chunk_threshold": 1, | |
| "best_f1": 0.805, | |
| "best_precision": 0.733, | |
| "best_recall": 0.892, | |
| "current_token_threshold": 30, | |
| "current_chunk_threshold": 1, | |
| "all_combinations": [ | |
| { | |
| "precision": 0.733, | |
| "recall": 0.892, | |
| "f1": 0.805, | |
| "tp": 33, | |
| "fp": 12, | |
| "tn": 1, | |
| "fn": 4, | |
| "n_passed": 45, | |
| "n_refused": 5, | |
| "token_threshold": 20, | |
| "chunk_threshold": 1 | |
| }, | |
| { | |
| "precision": 0.522, | |
| "recall": 0.324, | |
| "f1": 0.4, | |
| "tp": 12, | |
| "fp": 11, | |
| "tn": 2, | |
| "fn": 25, | |
| "n_passed": 23, | |
| "n_refused": 27, | |
| "token_threshold": 20, | |
| "chunk_threshold": 2 | |
| }, | |
| { | |
| "precision": 0.308, | |
| "recall": 0.108, | |
| "f1": 0.16, | |
| "tp": 4, | |
| "fp": 9, | |
| "tn": 4, | |
| "fn": 33, | |
| "n_passed": 13, | |
| "n_refused": 37, | |
| "token_threshold": 20, | |
| "chunk_threshold": 3 | |
| }, | |
| { | |
| "precision": 0.732, | |
| "recall": 0.811, | |
| "f1": 0.769, | |
| "tp": 30, | |
| "fp": 11, | |
| "tn": 2, | |
| "fn": 7, | |
| "n_passed": 41, | |
| "n_refused": 9, | |
| "token_threshold": 30, | |
| "chunk_threshold": 1 | |
| }, | |
| { | |
| "precision": 0.522, | |
| "recall": 0.324, | |
| "f1": 0.4, | |
| "tp": 12, | |
| "fp": 11, | |
| "tn": 2, | |
| "fn": 25, | |
| "n_passed": 23, | |
| "n_refused": 27, | |
| "token_threshold": 30, | |
| "chunk_threshold": 2 | |
| }, | |
| { | |
| "precision": 0.308, | |
| "recall": 0.108, | |
| "f1": 0.16, | |
| "tp": 4, | |
| "fp": 9, | |
| "tn": 4, | |
| "fn": 33, | |
| "n_passed": 13, | |
| "n_refused": 37, | |
| "token_threshold": 30, | |
| "chunk_threshold": 3 | |
| }, | |
| { | |
| "precision": 0.718, | |
| "recall": 0.757, | |
| "f1": 0.737, | |
| "tp": 28, | |
| "fp": 11, | |
| "tn": 2, | |
| "fn": 9, | |
| "n_passed": 39, | |
| "n_refused": 11, | |
| "token_threshold": 40, | |
| "chunk_threshold": 1 | |
| }, | |
| { | |
| "precision": 0.522, | |
| "recall": 0.324, | |
| "f1": 0.4, | |
| "tp": 12, | |
| "fp": 11, | |
| "tn": 2, | |
| "fn": 25, | |
| "n_passed": 23, | |
| "n_refused": 27, | |
| "token_threshold": 40, | |
| "chunk_threshold": 2 | |
| }, | |
| { | |
| "precision": 0.308, | |
| "recall": 0.108, | |
| "f1": 0.16, | |
| "tp": 4, | |
| "fp": 9, | |
| "tn": 4, | |
| "fn": 33, | |
| "n_passed": 13, | |
| "n_refused": 37, | |
| "token_threshold": 40, | |
| "chunk_threshold": 3 | |
| }, | |
| { | |
| "precision": 0.711, | |
| "recall": 0.73, | |
| "f1": 0.72, | |
| "tp": 27, | |
| "fp": 11, | |
| "tn": 2, | |
| "fn": 10, | |
| "n_passed": 38, | |
| "n_refused": 12, | |
| "token_threshold": 50, | |
| "chunk_threshold": 1 | |
| }, | |
| { | |
| "precision": 0.522, | |
| "recall": 0.324, | |
| "f1": 0.4, | |
| "tp": 12, | |
| "fp": 11, | |
| "tn": 2, | |
| "fn": 25, | |
| "n_passed": 23, | |
| "n_refused": 27, | |
| "token_threshold": 50, | |
| "chunk_threshold": 2 | |
| }, | |
| { | |
| "precision": 0.308, | |
| "recall": 0.108, | |
| "f1": 0.16, | |
| "tp": 4, | |
| "fp": 9, | |
| "tn": 4, | |
| "fn": 33, | |
| "n_passed": 13, | |
| "n_refused": 37, | |
| "token_threshold": 50, | |
| "chunk_threshold": 3 | |
| }, | |
| { | |
| "precision": 0.667, | |
| "recall": 0.595, | |
| "f1": 0.629, | |
| "tp": 22, | |
| "fp": 11, | |
| "tn": 2, | |
| "fn": 15, | |
| "n_passed": 33, | |
| "n_refused": 17, | |
| "token_threshold": 75, | |
| "chunk_threshold": 1 | |
| }, | |
| { | |
| "precision": 0.522, | |
| "recall": 0.324, | |
| "f1": 0.4, | |
| "tp": 12, | |
| "fp": 11, | |
| "tn": 2, | |
| "fn": 25, | |
| "n_passed": 23, | |
| "n_refused": 27, | |
| "token_threshold": 75, | |
| "chunk_threshold": 2 | |
| }, | |
| { | |
| "precision": 0.308, | |
| "recall": 0.108, | |
| "f1": 0.16, | |
| "tp": 4, | |
| "fp": 9, | |
| "tn": 4, | |
| "fn": 33, | |
| "n_passed": 13, | |
| "n_refused": 37, | |
| "token_threshold": 75, | |
| "chunk_threshold": 3 | |
| }, | |
| { | |
| "precision": 0.655, | |
| "recall": 0.514, | |
| "f1": 0.576, | |
| "tp": 19, | |
| "fp": 10, | |
| "tn": 3, | |
| "fn": 18, | |
| "n_passed": 29, | |
| "n_refused": 21, | |
| "token_threshold": 100, | |
| "chunk_threshold": 1 | |
| }, | |
| { | |
| "precision": 0.524, | |
| "recall": 0.297, | |
| "f1": 0.379, | |
| "tp": 11, | |
| "fp": 10, | |
| "tn": 3, | |
| "fn": 26, | |
| "n_passed": 21, | |
| "n_refused": 29, | |
| "token_threshold": 100, | |
| "chunk_threshold": 2 | |
| }, | |
| { | |
| "precision": 0.308, | |
| "recall": 0.108, | |
| "f1": 0.16, | |
| "tp": 4, | |
| "fp": 9, | |
| "tn": 4, | |
| "fn": 33, | |
| "n_passed": 13, | |
| "n_refused": 37, | |
| "token_threshold": 100, | |
| "chunk_threshold": 3 | |
| } | |
| ] | |
| } | |
| } |