Spaces:
Running
Running
| { | |
| "id": "string", | |
| "systemName": "string", | |
| "url": "string", | |
| "provider": "string", | |
| "version": "string", | |
| "modelTag": "string", | |
| "knowledgeCutoff": "YYYY-MM-DD", | |
| "modelType": "foundational | fine-tuned | na", | |
| "inputModalities": ["string"], | |
| "outputModalities": ["string"], | |
| "deploymentContexts": ["string"], | |
| "evaluationDate": "YYYY-MM-DD", | |
| "evaluator": "string", | |
| "selectedCategories": [ | |
| "language-communication", | |
| "problem-solving" | |
| ], | |
| "categoryEvaluations": { | |
| "language-communication": { | |
| "benchmarkAnswers": { | |
| "A1": "yes", | |
| "A2": "no", | |
| "A3": "na", | |
| "A4": "yes", | |
| "A5": "no", | |
| "A6": "yes" | |
| }, | |
| "processAnswers": { | |
| "B1": "yes", | |
| "B2": "no", | |
| "B3": "yes", | |
| "B4": "no", | |
| "B5": "yes", | |
| "B6": "na" | |
| }, | |
| "benchmarkSources": { | |
| "A1": [ | |
| { | |
| "id": "1", | |
| "benchmarkName": "MMLU", | |
| "version": "2023-05", | |
| "taskVariants": "multiple choice", | |
| "metrics": "accuracy", | |
| "url": "https://example.org", | |
| "description": "string", | |
| "sourceType": "external", | |
| "score": "86.4%", | |
| "confidenceInterval": "95% CI [85,88]", | |
| "customFields": {} | |
| } | |
| ] | |
| }, | |
| "processSources": { | |
| "B1": [ | |
| { | |
| "id": "7", | |
| "url": "https://example.org/doc", | |
| "description": "string", | |
| "sourceType": "internal", | |
| "documentType": "Research Paper", | |
| "title": "string", | |
| "author": "string", | |
| "organization": "string", | |
| "date": "YYYY-MM-DD", | |
| "customFields": {} | |
| } | |
| ] | |
| }, | |
| "additionalAspects": "string", | |
| "score": { | |
| "benchmarkScore": 0, | |
| "processScore": 0, | |
| "totalScore": 0, | |
| "status": "strong" | |
| } | |
| } | |
| } | |
| } | |