ai-response-validator / eval /bot-answers.json
below-threshold's picture
Faithfulness: mean sentence scoring, strip chunk title prefix, lower threshold to 0.35
cd30e2d
[
{
"id": 1,
"client": "novamart",
"question": "What happens when a product runs out of stock?",
"error": "timeout"
},
{
"id": 2,
"client": "novamart",
"question": "How do I onboard a new merchant?",
"answer": "The information doesn't provide details on merchant onboarding. I can only provide a general statement, as the context only discusses supplier setup.",
"overall": "FAIL",
"flagged": true,
"scores": {
"pii_leakage": 1.0,
"token_budget": 1.0,
"answer_relevancy": 0.724,
"faithfulness": 0.0,
"chain_terminology": 1.0
},
"passed": {
"pii_leakage": true,
"token_budget": true,
"answer_relevancy": true,
"faithfulness": false,
"chain_terminology": true
}
},
{
"id": 3,
"client": "novamart",
"question": "What is an availability scan and when does it trigger?",
"answer": "I cannot confirm information about what an availability scan is.",
"overall": "FAIL",
"flagged": true,
"scores": {
"pii_leakage": 1.0,
"token_budget": 1.0,
"answer_relevancy": 0.876,
"faithfulness": 0.0,
"chain_terminology": 1.0
},
"passed": {
"pii_leakage": true,
"token_budget": true,
"answer_relevancy": true,
"faithfulness": false,
"chain_terminology": true
}
},
{
"id": 4,
"client": "novamart",
"question": "How do I update pricing for an item in the item registry?",
"answer": "To update pricing for an item in the item registry, you must submit a price update (pricing sync) through the pricing portal. Changes greater than 15% of the current price require approval, while smaller changes can be approved automatically. Approved changes will go live at the next scheduled sync window, which runs every four hours.",
"overall": "FAIL",
"flagged": true,
"scores": {
"pii_leakage": 1.0,
"token_budget": 1.0,
"answer_relevancy": 0.839,
"faithfulness": 0.0,
"chain_terminology": 1.0
},
"passed": {
"pii_leakage": true,
"token_budget": true,
"answer_relevancy": true,
"faithfulness": false,
"chain_terminology": true
}
},
{
"id": 5,
"client": "novamart",
"question": "What is a capability switch and how do I enable one?",
"answer": "A capability switch is also referred to as a feature flag. To enable one in production, you will need to get sign-off from both the product and engineering lead.",
"overall": "PASS",
"flagged": false,
"scores": {
"pii_leakage": 1.0,
"token_budget": 1.0,
"answer_relevancy": 0.799,
"faithfulness": 0.889,
"chain_terminology": 1.0
},
"passed": {
"pii_leakage": true,
"token_budget": true,
"answer_relevancy": true,
"faithfulness": true,
"chain_terminology": true
}
},
{
"id": 6,
"client": "shelfwise",
"question": "What triggers an out-of-stock alert?",
"answer": "A product's on-hand quantity dropping below its configured reorder point triggers an out-of-stock alert.",
"overall": "FAIL",
"flagged": true,
"scores": {
"pii_leakage": 1.0,
"token_budget": 1.0,
"answer_relevancy": 0.677,
"faithfulness": 0.026,
"chain_terminology": 1.0
},
"passed": {
"pii_leakage": true,
"token_budget": true,
"answer_relevancy": true,
"faithfulness": false,
"chain_terminology": true
}
},
{
"id": 7,
"client": "shelfwise",
"question": "How does planogram compliance work?",
"answer": "I couldn't find any information in the provided context about planogram compliance. It seems that the context only covers compliance reporting, store configuration, and supplier setup.",
"overall": "FAIL",
"flagged": true,
"scores": {
"pii_leakage": 1.0,
"token_budget": 1.0,
"answer_relevancy": 0.791,
"faithfulness": 0.0,
"chain_terminology": 1.0
},
"passed": {
"pii_leakage": true,
"token_budget": true,
"answer_relevancy": true,
"faithfulness": false,
"chain_terminology": true
}
},
{
"id": 8,
"client": "shelfwise",
"question": "How do I add a new store to the system?",
"answer": "Unfortunately, the provided context does not contain the information on how to add a new store to the system.",
"overall": "FAIL",
"flagged": true,
"scores": {
"pii_leakage": 1.0,
"token_budget": 1.0,
"answer_relevancy": 0.849,
"faithfulness": 0.0,
"chain_terminology": 1.0
},
"passed": {
"pii_leakage": true,
"token_budget": true,
"answer_relevancy": true,
"faithfulness": false,
"chain_terminology": true
}
},
{
"id": 9,
"client": "shelfwise",
"question": "How do I generate a compliance report for a store?",
"answer": "To generate a compliance report for a store, access must be granted to a user with the Audit role or higher.",
"overall": "FAIL",
"flagged": true,
"scores": {
"pii_leakage": 1.0,
"token_budget": 1.0,
"answer_relevancy": 0.759,
"faithfulness": 0.038,
"chain_terminology": 1.0
},
"passed": {
"pii_leakage": true,
"token_budget": true,
"answer_relevancy": true,
"faithfulness": false,
"chain_terminology": true
}
},
{
"id": 10,
"client": "shelfwise",
"question": "What happens if a feature toggle is disabled mid-deployment?",
"answer": "The context does not specify what happens if a feature toggle is disabled mid-deployment.",
"overall": "FAIL",
"flagged": true,
"scores": {
"pii_leakage": 1.0,
"token_budget": 1.0,
"answer_relevancy": 0.887,
"faithfulness": 0.0,
"chain_terminology": 1.0
},
"passed": {
"pii_leakage": true,
"token_budget": true,
"answer_relevancy": true,
"faithfulness": false,
"chain_terminology": true
}
},
{
"id": 11,
"client": "clinixone",
"question": "What is prior authorization and when is it required?",
"answer": "Prior authorization, also known as formulary pre-approval, is a requirement by a payer that a prescriber obtain approval before a specific drug is dispensed and covered. It is needed when a payer wants the prescriber to submit clinical justification for the specific drug.",
"overall": "FAIL",
"flagged": true,
"scores": {
"pii_leakage": 1.0,
"token_budget": 1.0,
"answer_relevancy": 0.735,
"faithfulness": 0.021,
"chain_terminology": 1.0
},
"passed": {
"pii_leakage": true,
"token_budget": true,
"answer_relevancy": true,
"faithfulness": false,
"chain_terminology": true
}
},
{
"id": 12,
"client": "clinixone",
"question": "What are contraindicated drug combinations and how are they flagged?",
"answer": "Contraindicated drug combinations occur when one drug affects the activity of another when both are administered together. \n\nThese combinations are flagged by alerting prescribers at point-of-care when a contraindicated combination is entered. The severity classification of interactions follows the clinical pharmacology guidelines maintained in the formulary interaction database.",
"overall": "FAIL",
"flagged": true,
"scores": {
"pii_leakage": 1.0,
"token_budget": 1.0,
"answer_relevancy": 0.802,
"faithfulness": 0.003,
"chain_terminology": 1.0
},
"passed": {
"pii_leakage": true,
"token_budget": true,
"answer_relevancy": true,
"faithfulness": false,
"chain_terminology": true
}
},
{
"id": 13,
"client": "clinixone",
"question": "How should adverse events be reported?",
"answer": "Adverse events must be reported to the regulatory authority within 15 days for serious unexpected events and 90 days for expected events.",
"overall": "FAIL",
"flagged": true,
"scores": {
"pii_leakage": 1.0,
"token_budget": 1.0,
"answer_relevancy": 0.676,
"faithfulness": 0.117,
"chain_terminology": 1.0
},
"passed": {
"pii_leakage": true,
"token_budget": true,
"answer_relevancy": true,
"faithfulness": false,
"chain_terminology": true
}
},
{
"id": 14,
"client": "clinixone",
"question": "What is a safety signal and how is it escalated?",
"answer": "A safety signal (pharmacovigilance alert) is a pattern of adverse events that suggests a previously unknown or incompletely documented causal relationship between a drug and an outcome. Adverse events must be reported to the regulatory authority within 15 days for serious unexpected events and 90 days for expected events.",
"overall": "FAIL",
"flagged": true,
"scores": {
"pii_leakage": 1.0,
"token_budget": 1.0,
"answer_relevancy": 0.644,
"faithfulness": 0.001,
"chain_terminology": 1.0
},
"passed": {
"pii_leakage": true,
"token_budget": true,
"answer_relevancy": true,
"faithfulness": false,
"chain_terminology": true
}
},
{
"id": 15,
"client": "clinixone",
"question": "Which drugs are contraindicated in patients with bleeding disorders?",
"answer": "The context does not mention any contraindication for patients with bleeding disorders.",
"overall": "PASS",
"flagged": false,
"scores": {
"pii_leakage": 1.0,
"token_budget": 1.0,
"answer_relevancy": 0.665,
"faithfulness": 1.0,
"chain_terminology": 1.0
},
"passed": {
"pii_leakage": true,
"token_budget": true,
"answer_relevancy": true,
"faithfulness": true,
"chain_terminology": true
}
},
{
"id": 16,
"client": "clinixone",
"question": "What adverse events are associated with drugs for allergy relief?",
"answer": "There is no information about drugs specifically for allergy relief in the context provided.",
"overall": "FAIL",
"flagged": true,
"scores": {
"pii_leakage": 1.0,
"token_budget": 1.0,
"answer_relevancy": 0.741,
"faithfulness": 0.0,
"chain_terminology": 1.0
},
"passed": {
"pii_leakage": true,
"token_budget": true,
"answer_relevancy": true,
"faithfulness": false,
"chain_terminology": true
}
},
{
"id": 17,
"client": "clinixone",
"question": "What drugs cause fatigue as a known adverse event?",
"answer": "Amoxstatin62, Amoxzole72, and Mecozole85 cause fatigue as a known adverse event.",
"overall": "FAIL",
"flagged": true,
"scores": {
"pii_leakage": 1.0,
"token_budget": 1.0,
"answer_relevancy": 0.715,
"faithfulness": 0.0,
"chain_terminology": 1.0
},
"passed": {
"pii_leakage": true,
"token_budget": true,
"answer_relevancy": true,
"faithfulness": false,
"chain_terminology": true
}
},
{
"id": 18,
"client": "pharmalink",
"question": "What is formulary pre-approval?",
"answer": "Formulary pre-approval is a prior authorization requirement by a payer that a prescriber obtain approval before a specific drug is dispensed and covered.",
"overall": "FAIL",
"flagged": true,
"scores": {
"pii_leakage": 1.0,
"token_budget": 1.0,
"answer_relevancy": 0.869,
"faithfulness": 0.003,
"chain_terminology": 1.0
},
"passed": {
"pii_leakage": true,
"token_budget": true,
"answer_relevancy": true,
"faithfulness": false,
"chain_terminology": true
}
},
{
"id": 19,
"client": "pharmalink",
"question": "What is a pharmacovigilance alert and when does it fire?",
"answer": "A pharmacovigilance alert, also known as a safety signal, is a pattern of adverse events that suggests a previously unknown or incompletely documented causal relationship between a drug and an outcome. \n\nIt suggests a possible causal relationship but does not confirm one.",
"overall": "FAIL",
"flagged": true,
"scores": {
"pii_leakage": 1.0,
"token_budget": 1.0,
"answer_relevancy": 0.811,
"faithfulness": 0.005,
"chain_terminology": 1.0
},
"passed": {
"pii_leakage": true,
"token_budget": true,
"answer_relevancy": true,
"faithfulness": false,
"chain_terminology": true
}
},
{
"id": 20,
"client": "pharmalink",
"question": "Which drugs have a prescriber warning about avoiding alcohol?",
"answer": "There's no information in the provided context regarding a prescriber warning about avoiding alcohol for any of the mentioned drugs (Zylozole96, Zylozepam79, Zylozole29).",
"overall": "FAIL",
"flagged": true,
"scores": {
"pii_leakage": 1.0,
"token_budget": 1.0,
"answer_relevancy": 0.792,
"faithfulness": 0.0,
"chain_terminology": 1.0
},
"passed": {
"pii_leakage": true,
"token_budget": true,
"answer_relevancy": true,
"faithfulness": false,
"chain_terminology": true
}
}
]