ai-engineering-project / evaluation /enhanced_results.json
GitHub Action
Clean deployment without binary files
f884e6e
{
"summary": {
"target": "http://localhost:5000",
"evaluation_method": "enhanced_llm_based",
"n_questions": 20,
"successful_evaluations": 0,
"success_rate": 0.0,
"latency_p50_s": 0.0021209716796875,
"latency_p95_s": 0.0031218528747558594,
"avg_latency_s": 0.00264354944229126,
"avg_groundedness_score": null,
"avg_citation_accuracy": null,
"groundedness_method": "token_overlap_fallback",
"grounded_responses": 0,
"ungrounded_responses": 0,
"perfect_citations": 0,
"no_citations": 0
},
"results": [
{
"id": "1",
"question": "When are employees eligible for remote work?",
"status_code": 403,
"error": "",
"latency_s": 0.01120901107788086
},
{
"id": "2",
"question": "How many days of PTO do employees accrue per year?",
"status_code": 403,
"error": "",
"latency_s": 0.0026290416717529297
},
{
"id": "3",
"question": "What is the parental leave policy for new parents?",
"status_code": 403,
"error": "",
"latency_s": 0.0022759437561035156
},
{
"id": "4",
"question": "How should an employee report workplace harassment?",
"status_code": 403,
"error": "",
"latency_s": 0.0022132396697998047
},
{
"id": "5",
"question": "What is the expense reimbursement limit for domestic travel?",
"status_code": 403,
"error": "",
"latency_s": 0.0020639896392822266
},
{
"id": "6",
"question": "What are the password complexity requirements for company systems?",
"status_code": 403,
"error": "",
"latency_s": 0.0018489360809326172
},
{
"id": "7",
"question": "How do employees enroll in health insurance?",
"status_code": 403,
"error": "",
"latency_s": 0.0021140575408935547
},
{
"id": "8",
"question": "What is the company's emergency response procedure?",
"status_code": 403,
"error": "",
"latency_s": 0.0031218528747558594
},
{
"id": "9",
"question": "When is performance review feedback provided?",
"status_code": 403,
"error": "",
"latency_s": 0.002635955810546875
},
{
"id": "10",
"question": "What is the policy for approval of business travel?",
"status_code": 403,
"error": "",
"latency_s": 0.0016291141510009766
},
{
"id": "11",
"question": "How often are payroll errors corrected after reporting?",
"status_code": 403,
"error": "",
"latency_s": 0.002141237258911133
},
{
"id": "12",
"question": "What steps are required to request a procurement?",
"status_code": 403,
"error": "",
"latency_s": 0.001837015151977539
},
{
"id": "13",
"question": "Who should you contact about parental leave questions?",
"status_code": 403,
"error": "",
"latency_s": 0.0021278858184814453
},
{
"id": "14",
"question": "What is the company's policy on remote onboarding?",
"status_code": 403,
"error": "",
"latency_s": 0.0019068717956542969
},
{
"id": "15",
"question": "What types of expenses are NOT reimbursable?",
"status_code": 403,
"error": "",
"latency_s": 0.002079010009765625
},
{
"id": "16",
"question": "What is the process for requesting time off for jury duty?",
"status_code": 403,
"error": "",
"latency_s": 0.002544879913330078
},
{
"id": "17",
"question": "How is confidential client information required to be handled?",
"status_code": 403,
"error": "",
"latency_s": 0.0026450157165527344
},
{
"id": "18",
"question": "What's the escalation path for unresolved HR issues?",
"status_code": 403,
"error": "",
"latency_s": 0.001993894577026367
},
{
"id": "19",
"question": "What is the acceptable use policy for company devices?",
"status_code": 403,
"error": "",
"latency_s": 0.0019969940185546875
},
{
"id": "20",
"question": "Where can employees find the holiday schedule?",
"status_code": 403,
"error": "",
"latency_s": 0.0018570423126220703
}
],
"metadata": {
"evaluation_timestamp": 1761872513.876975,
"evaluation_version": "enhanced_v1.0",
"groundedness_model": "token_overlap",
"target_endpoint": "http://localhost:5000/chat"
}
}