secureagentrag-api / evaluation /calibration.json
LeomordKaly's picture
deploy: phase 3 BYOK backend (Dockerfile.hf, FastAPI on 7860)
09ed8ca verified
{
"timestamp": "2026-05-23T07:33:21.839008+00:00",
"golden_set_path": "evaluation\\golden_set.jsonl",
"n_rows_total": 50,
"n_rows_usable": 50,
"confidence": {
"chosen_threshold": 0.35,
"chosen_metrics": {
"threshold": 0.35,
"precision": 1.0,
"recall": 0.4138,
"f1": 0.5854,
"tpr": 0.4138,
"fpr": 0.0,
"j": 0.4138,
"tp": 12,
"fp": 0,
"fn": 17,
"tn": 21
},
"curve": [
{
"threshold": 0.0,
"precision": 0.58,
"recall": 1.0,
"f1": 0.7342,
"tpr": 1.0,
"fpr": 1.0,
"j": 0.0,
"tp": 29,
"fp": 21,
"fn": 0,
"tn": 0
},
{
"threshold": 0.05,
"precision": 0.6444,
"recall": 1.0,
"f1": 0.7838,
"tpr": 1.0,
"fpr": 0.7619,
"j": 0.2381,
"tp": 29,
"fp": 16,
"fn": 0,
"tn": 5
},
{
"threshold": 0.1,
"precision": 0.6444,
"recall": 1.0,
"f1": 0.7838,
"tpr": 1.0,
"fpr": 0.7619,
"j": 0.2381,
"tp": 29,
"fp": 16,
"fn": 0,
"tn": 5
},
{
"threshold": 0.15,
"precision": 0.6444,
"recall": 1.0,
"f1": 0.7838,
"tpr": 1.0,
"fpr": 0.7619,
"j": 0.2381,
"tp": 29,
"fp": 16,
"fn": 0,
"tn": 5
},
{
"threshold": 0.2,
"precision": 0.6444,
"recall": 1.0,
"f1": 0.7838,
"tpr": 1.0,
"fpr": 0.7619,
"j": 0.2381,
"tp": 29,
"fp": 16,
"fn": 0,
"tn": 5
},
{
"threshold": 0.25,
"precision": 0.6444,
"recall": 1.0,
"f1": 0.7838,
"tpr": 1.0,
"fpr": 0.7619,
"j": 0.2381,
"tp": 29,
"fp": 16,
"fn": 0,
"tn": 5
},
{
"threshold": 0.3,
"precision": 0.6571,
"recall": 0.7931,
"f1": 0.7188,
"tpr": 0.7931,
"fpr": 0.5714,
"j": 0.2217,
"tp": 23,
"fp": 12,
"fn": 6,
"tn": 9
},
{
"threshold": 0.35,
"precision": 1.0,
"recall": 0.4138,
"f1": 0.5854,
"tpr": 0.4138,
"fpr": 0.0,
"j": 0.4138,
"tp": 12,
"fp": 0,
"fn": 17,
"tn": 21
},
{
"threshold": 0.4,
"precision": 1.0,
"recall": 0.4138,
"f1": 0.5854,
"tpr": 0.4138,
"fpr": 0.0,
"j": 0.4138,
"tp": 12,
"fp": 0,
"fn": 17,
"tn": 21
},
{
"threshold": 0.45,
"precision": 1.0,
"recall": 0.4138,
"f1": 0.5854,
"tpr": 0.4138,
"fpr": 0.0,
"j": 0.4138,
"tp": 12,
"fp": 0,
"fn": 17,
"tn": 21
},
{
"threshold": 0.5,
"precision": 1.0,
"recall": 0.4138,
"f1": 0.5854,
"tpr": 0.4138,
"fpr": 0.0,
"j": 0.4138,
"tp": 12,
"fp": 0,
"fn": 17,
"tn": 21
},
{
"threshold": 0.55,
"precision": 1.0,
"recall": 0.4138,
"f1": 0.5854,
"tpr": 0.4138,
"fpr": 0.0,
"j": 0.4138,
"tp": 12,
"fp": 0,
"fn": 17,
"tn": 21
},
{
"threshold": 0.6,
"precision": 1.0,
"recall": 0.3793,
"f1": 0.55,
"tpr": 0.3793,
"fpr": 0.0,
"j": 0.3793,
"tp": 11,
"fp": 0,
"fn": 18,
"tn": 21
},
{
"threshold": 0.65,
"precision": 1.0,
"recall": 0.3793,
"f1": 0.55,
"tpr": 0.3793,
"fpr": 0.0,
"j": 0.3793,
"tp": 11,
"fp": 0,
"fn": 18,
"tn": 21
},
{
"threshold": 0.7,
"precision": 1.0,
"recall": 0.3793,
"f1": 0.55,
"tpr": 0.3793,
"fpr": 0.0,
"j": 0.3793,
"tp": 11,
"fp": 0,
"fn": 18,
"tn": 21
},
{
"threshold": 0.75,
"precision": 1.0,
"recall": 0.3793,
"f1": 0.55,
"tpr": 0.3793,
"fpr": 0.0,
"j": 0.3793,
"tp": 11,
"fp": 0,
"fn": 18,
"tn": 21
},
{
"threshold": 0.8,
"precision": 1.0,
"recall": 0.3793,
"f1": 0.55,
"tpr": 0.3793,
"fpr": 0.0,
"j": 0.3793,
"tp": 11,
"fp": 0,
"fn": 18,
"tn": 21
},
{
"threshold": 0.85,
"precision": 1.0,
"recall": 0.3103,
"f1": 0.4737,
"tpr": 0.3103,
"fpr": 0.0,
"j": 0.3103,
"tp": 9,
"fp": 0,
"fn": 20,
"tn": 21
},
{
"threshold": 0.9,
"precision": 1.0,
"recall": 0.2069,
"f1": 0.3429,
"tpr": 0.2069,
"fpr": 0.0,
"j": 0.2069,
"tp": 6,
"fp": 0,
"fn": 23,
"tn": 21
},
{
"threshold": 0.95,
"precision": 1.0,
"recall": 0.1379,
"f1": 0.2424,
"tpr": 0.1379,
"fpr": 0.0,
"j": 0.1379,
"tp": 4,
"fp": 0,
"fn": 25,
"tn": 21
},
{
"threshold": 1.0,
"precision": 0.0,
"recall": 0.0,
"f1": 0.0,
"tpr": 0.0,
"fpr": 0.0,
"j": 0.0,
"tp": 0,
"fp": 0,
"fn": 29,
"tn": 21
}
],
"n_pos": 29,
"n_neg": 21,
"n_total": 50
},
"faithfulness": {
"chosen_threshold": 0.0,
"chosen_metrics": {
"threshold": 0.0,
"precision": 0.6667,
"recall": 1.0,
"f1": 0.8,
"tpr": 1.0,
"fpr": 1.0,
"j": 0.0,
"tp": 30,
"fp": 15,
"fn": 0,
"tn": 0
},
"curve": [
{
"threshold": 0.0,
"precision": 0.6667,
"recall": 1.0,
"f1": 0.8,
"tpr": 1.0,
"fpr": 1.0,
"j": 0.0,
"tp": 30,
"fp": 15,
"fn": 0,
"tn": 0
},
{
"threshold": 0.05,
"precision": 0.6667,
"recall": 1.0,
"f1": 0.8,
"tpr": 1.0,
"fpr": 1.0,
"j": 0.0,
"tp": 30,
"fp": 15,
"fn": 0,
"tn": 0
},
{
"threshold": 0.1,
"precision": 0.6667,
"recall": 1.0,
"f1": 0.8,
"tpr": 1.0,
"fpr": 1.0,
"j": 0.0,
"tp": 30,
"fp": 15,
"fn": 0,
"tn": 0
},
{
"threshold": 0.15,
"precision": 0.6667,
"recall": 1.0,
"f1": 0.8,
"tpr": 1.0,
"fpr": 1.0,
"j": 0.0,
"tp": 30,
"fp": 15,
"fn": 0,
"tn": 0
},
{
"threshold": 0.2,
"precision": 0.6667,
"recall": 1.0,
"f1": 0.8,
"tpr": 1.0,
"fpr": 1.0,
"j": 0.0,
"tp": 30,
"fp": 15,
"fn": 0,
"tn": 0
},
{
"threshold": 0.25,
"precision": 0.6667,
"recall": 1.0,
"f1": 0.8,
"tpr": 1.0,
"fpr": 1.0,
"j": 0.0,
"tp": 30,
"fp": 15,
"fn": 0,
"tn": 0
},
{
"threshold": 0.3,
"precision": 0.6667,
"recall": 1.0,
"f1": 0.8,
"tpr": 1.0,
"fpr": 1.0,
"j": 0.0,
"tp": 30,
"fp": 15,
"fn": 0,
"tn": 0
},
{
"threshold": 0.35,
"precision": 0.6667,
"recall": 1.0,
"f1": 0.8,
"tpr": 1.0,
"fpr": 1.0,
"j": 0.0,
"tp": 30,
"fp": 15,
"fn": 0,
"tn": 0
},
{
"threshold": 0.4,
"precision": 0.6667,
"recall": 1.0,
"f1": 0.8,
"tpr": 1.0,
"fpr": 1.0,
"j": 0.0,
"tp": 30,
"fp": 15,
"fn": 0,
"tn": 0
},
{
"threshold": 0.45,
"precision": 0.6667,
"recall": 1.0,
"f1": 0.8,
"tpr": 1.0,
"fpr": 1.0,
"j": 0.0,
"tp": 30,
"fp": 15,
"fn": 0,
"tn": 0
},
{
"threshold": 0.5,
"precision": 0.6667,
"recall": 1.0,
"f1": 0.8,
"tpr": 1.0,
"fpr": 1.0,
"j": 0.0,
"tp": 30,
"fp": 15,
"fn": 0,
"tn": 0
},
{
"threshold": 0.55,
"precision": 0.6512,
"recall": 0.9333,
"f1": 0.7671,
"tpr": 0.9333,
"fpr": 1.0,
"j": -0.0667,
"tp": 28,
"fp": 15,
"fn": 2,
"tn": 0
},
{
"threshold": 0.6,
"precision": 0.6512,
"recall": 0.9333,
"f1": 0.7671,
"tpr": 0.9333,
"fpr": 1.0,
"j": -0.0667,
"tp": 28,
"fp": 15,
"fn": 2,
"tn": 0
},
{
"threshold": 0.65,
"precision": 0.6512,
"recall": 0.9333,
"f1": 0.7671,
"tpr": 0.9333,
"fpr": 1.0,
"j": -0.0667,
"tp": 28,
"fp": 15,
"fn": 2,
"tn": 0
},
{
"threshold": 0.7,
"precision": 0.6341,
"recall": 0.8667,
"f1": 0.7324,
"tpr": 0.8667,
"fpr": 1.0,
"j": -0.1333,
"tp": 26,
"fp": 15,
"fn": 4,
"tn": 0
},
{
"threshold": 0.75,
"precision": 0.6341,
"recall": 0.8667,
"f1": 0.7324,
"tpr": 0.8667,
"fpr": 1.0,
"j": -0.1333,
"tp": 26,
"fp": 15,
"fn": 4,
"tn": 0
},
{
"threshold": 0.8,
"precision": 0.6341,
"recall": 0.8667,
"f1": 0.7324,
"tpr": 0.8667,
"fpr": 1.0,
"j": -0.1333,
"tp": 26,
"fp": 15,
"fn": 4,
"tn": 0
},
{
"threshold": 0.85,
"precision": 0.6341,
"recall": 0.8667,
"f1": 0.7324,
"tpr": 0.8667,
"fpr": 1.0,
"j": -0.1333,
"tp": 26,
"fp": 15,
"fn": 4,
"tn": 0
},
{
"threshold": 0.9,
"precision": 0.6341,
"recall": 0.8667,
"f1": 0.7324,
"tpr": 0.8667,
"fpr": 1.0,
"j": -0.1333,
"tp": 26,
"fp": 15,
"fn": 4,
"tn": 0
},
{
"threshold": 0.95,
"precision": 0.6341,
"recall": 0.8667,
"f1": 0.7324,
"tpr": 0.8667,
"fpr": 1.0,
"j": -0.1333,
"tp": 26,
"fp": 15,
"fn": 4,
"tn": 0
},
{
"threshold": 1.0,
"precision": 0.0,
"recall": 0.0,
"f1": 0.0,
"tpr": 0.0,
"fpr": 0.0,
"j": 0.0,
"tp": 0,
"fp": 0,
"fn": 30,
"tn": 15
}
],
"n_pos": 30,
"n_neg": 15,
"n_total": 45
}
}