Spaces:
Running
Running
| /** | |
| * | |
| * Copyright 2023-2025 InspectorRAGet Team | |
| * | |
| * Licensed under the Apache License, Version 2.0 (the "License"); | |
| * you may not use this file except in compliance with the License. | |
| * You may obtain a copy of the License at | |
| * | |
| * http://www.apache.org/licenses/LICENSE-2.0 | |
| * | |
| * Unless required by applicable law or agreed to in writing, software | |
| * distributed under the License is distributed on an "AS IS" BASIS, | |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| * See the License for the specific language governing permissions and | |
| * limitations under the License. | |
| * | |
| **/ | |
| import { isEmpty, isNumber } from 'lodash'; | |
| import { hash } from '@/src/utilities/strings'; | |
| import { snakeCaseKeys } from '@/src/utilities/objects'; | |
| import { | |
| Data, | |
| MetricValue, | |
| RawData, | |
| TaskEvaluation, | |
| DisqualificationReason, | |
| DisqualifiedTasks, | |
| Task, | |
| RetrievedDocument, | |
| Notification, | |
| } from '@/src/types'; | |
| export const DataErrorKinds = { | |
| MISSING_METRIC: 'MISSING METRIC', | |
| MISSING_MODEL: 'MISSING MODEL', | |
| MISSING_VALUE: 'MISSING VALUE', | |
| }; | |
| function sortMetricValues(values: MetricValue[]) { | |
| values.sort((a, b) => { | |
| // For string values | |
| if (typeof a.value === 'string' && typeof b.value === 'string') { | |
| // Do direct value comparison in numerical values exists | |
| if ( | |
| (a.numericValue != undefined || a.numericValue != null) && | |
| isNumber(a.numericValue) && | |
| (b.numericValue != undefined || b.numericValue != null) && | |
| isNumber(b.numericValue) | |
| ) { | |
| return a.numericValue - b.numericValue; | |
| } | |
| // Do string comparison with non-ASCII support | |
| return a.value.localeCompare(b.value); | |
| } | |
| // For numerical values, do direct value comparison | |
| else if (typeof a.value === 'number' && typeof b.value === 'number') { | |
| return a.value - b.value; | |
| } | |
| // Default: Preserve same order | |
| return 0; | |
| }); | |
| } | |
| function disqualifyEvaluation( | |
| reasons: DisqualificationReason[], | |
| evaluation: TaskEvaluation, | |
| disqualifiedTasks: DisqualifiedTasks, | |
| evaluationsPerTask: { [key: string]: TaskEvaluation[] }, | |
| ) { | |
| // Step 1: Move from evaluations per task list to disqualified tasks list, if required | |
| if (evaluationsPerTask.hasOwnProperty(evaluation.taskId)) { | |
| // Step 1.a: Copy task to remove | |
| const qualifiedEvaluations = evaluationsPerTask[evaluation.taskId]; | |
| // Step 1.b: Remove task from qualified tasks list | |
| delete evaluationsPerTask[evaluation.taskId]; | |
| // Step 1.c: Add to disqualified tasks list | |
| disqualifiedTasks[evaluation.taskId] = { | |
| reasons: reasons, | |
| evaluations: [...qualifiedEvaluations, evaluation], | |
| }; | |
| } else { | |
| // Step 1: Add to disqualified tasks list | |
| if (disqualifiedTasks.hasOwnProperty(evaluation.taskId)) { | |
| disqualifiedTasks[evaluation.taskId].reasons = [ | |
| ...disqualifiedTasks[evaluation.taskId].reasons, | |
| ...reasons, | |
| ]; | |
| disqualifiedTasks[evaluation.taskId].evaluations.push(evaluation); | |
| } else { | |
| disqualifiedTasks[evaluation.taskId] = { | |
| reasons: reasons, | |
| evaluations: [evaluation], | |
| }; | |
| } | |
| } | |
| } | |
| export function processData( | |
| data: RawData, | |
| ): [Data, DisqualifiedTasks, Notification[]] { | |
| // Step 0: Define notifications | |
| const notifications: Notification[] = []; | |
| // Step 1: Identify all plottable metrics and required model IDs | |
| const plottableMetrics = data.metrics.filter( | |
| (metric) => metric.type === 'numerical' || metric.type === 'categorical', | |
| ); | |
| const requiredModelIDs = new Set(data.models.map((model) => model.modelId)); | |
| /** | |
| * Step 2: Disqualify tasks based on following guidelines | |
| * 1. Only preserve evaluations for models specified in the models sections | |
| * 2. If task does not have evaluations for all the models from models section | |
| * 3. If task does not have every metric from metrics section for all the models from models section | |
| */ | |
| const disqualifiedTasks: DisqualifiedTasks = {}; | |
| const evaluationsPerTask: { [key: string]: TaskEvaluation[] } = {}; | |
| // Step 2.a: Iterate over every evaluation entry | |
| data.evaluations.forEach((evaluation) => { | |
| // Step 2.a.i: Verfify annotations for all plottable metrics exist | |
| const disqualificationReasons: DisqualificationReason[] = []; | |
| plottableMetrics.forEach((metric) => { | |
| if (!evaluation.annotations.hasOwnProperty(metric.name)) { | |
| disqualificationReasons.push({ | |
| kind: DataErrorKinds.MISSING_METRIC, | |
| data: metric.name, | |
| }); | |
| } else { | |
| if (isEmpty(evaluation.annotations[metric.name])) { | |
| disqualificationReasons.push({ | |
| kind: DataErrorKinds.MISSING_VALUE, | |
| data: metric.name, | |
| }); | |
| } else { | |
| for (const evaluator of Object.keys( | |
| evaluation.annotations[metric.name], | |
| )) { | |
| if ( | |
| !evaluation.annotations[metric.name][evaluator].hasOwnProperty( | |
| 'value', | |
| ) | |
| ) { | |
| disqualificationReasons.push({ | |
| kind: DataErrorKinds.MISSING_VALUE, | |
| data: metric.name, | |
| }); | |
| } | |
| } | |
| } | |
| } | |
| }); | |
| // Step 2.a.ii: If annotations for all plottable metrics exist | |
| if (isEmpty(disqualificationReasons)) { | |
| // Step 2.a.ii.*: Only add if evaluation belongs to one of the models specified in the models section | |
| if (requiredModelIDs.has(evaluation.modelId)) { | |
| if (evaluationsPerTask.hasOwnProperty(evaluation.taskId)) { | |
| evaluationsPerTask[evaluation.taskId].push(evaluation); | |
| } else { | |
| if (disqualifiedTasks.hasOwnProperty(evaluation.taskId)) { | |
| disqualifiedTasks[evaluation.taskId].evaluations.push(evaluation); | |
| } else { | |
| evaluationsPerTask[evaluation.taskId] = [evaluation]; | |
| } | |
| } | |
| } | |
| } else { | |
| // Step 2.a.ii: Disqualify evaluation and associated task | |
| disqualifyEvaluation( | |
| disqualificationReasons, | |
| evaluation, | |
| disqualifiedTasks, | |
| evaluationsPerTask, | |
| ); | |
| } | |
| }); | |
| // Step 3.: Verify evaluations exist for every model from the models section | |
| // Step 3.a: Check first in all disqualified tasks | |
| Object.keys(disqualifiedTasks).forEach((taskId) => { | |
| // Step 3.a.i: If more or less number of evaluations exists | |
| if (disqualifiedTasks[taskId].evaluations.length !== data.models.length) { | |
| const availableModelIDs = new Set( | |
| disqualifiedTasks[taskId].evaluations.map( | |
| (evaluation) => evaluation.modelId, | |
| ), | |
| ); | |
| // Step 3.a.i.*: Missing model IDs | |
| const missingModelIDs = [...requiredModelIDs].filter( | |
| (modelId) => !availableModelIDs.has(modelId), | |
| ); | |
| // Step 3.a.i.**: Update disqualified task's reasons | |
| if (!isEmpty(missingModelIDs)) { | |
| disqualifiedTasks[taskId].reasons = [ | |
| ...disqualifiedTasks[taskId].reasons, | |
| ...missingModelIDs.map((modelId) => { | |
| return { kind: DataErrorKinds.MISSING_MODEL, data: modelId }; | |
| }), | |
| ]; | |
| } | |
| } | |
| }); | |
| // Step 3.b: Check in qualified tasks | |
| Object.keys(evaluationsPerTask).forEach((taskId) => { | |
| // Step 3.b.i: If more or less number of evaluations exists | |
| if (data.models.length !== evaluationsPerTask[taskId].length) { | |
| const availableModelIDs = new Set( | |
| evaluationsPerTask[taskId].map((evaluation) => evaluation.modelId), | |
| ); | |
| // Step 3.b.i.*: Missing model IDs | |
| const missingModelIDs = [...requiredModelIDs].filter( | |
| (modelId) => !availableModelIDs.has(modelId), | |
| ); | |
| // Step 3.b.i.**: Move task from qualified task list to disqualified task list | |
| if (!isEmpty(missingModelIDs)) { | |
| const disqualifiedEvaluations = evaluationsPerTask[taskId]; | |
| disqualifiedTasks[taskId] = { | |
| reasons: missingModelIDs.map((modelId) => { | |
| return { kind: DataErrorKinds.MISSING_MODEL, data: modelId }; | |
| }), | |
| evaluations: disqualifiedEvaluations, | |
| }; | |
| delete evaluationsPerTask[taskId]; | |
| } | |
| } | |
| }); | |
| // Step 5: Flatten qualified tasks into qualified evaluations list | |
| // Step 5.a: Retain unique qaulified task ID, annotator and qualified evaluation | |
| const uniqueQuailifiedTaskIds = new Set<string>(); | |
| const annotators = new Set<string>(); | |
| const qualifiedEvaluations: TaskEvaluation[] = []; | |
| // Step 5.b: Iterate over each qualified task | |
| Object.keys(evaluationsPerTask).forEach((taskId) => { | |
| uniqueQuailifiedTaskIds.add(taskId); | |
| evaluationsPerTask[taskId].forEach((evaluation) => { | |
| Object.keys(evaluation.annotations).forEach((metric) => { | |
| const entry = evaluation.annotations[metric]; | |
| Object.keys(entry).forEach((annotator) => annotators.add(annotator)); | |
| }); | |
| qualifiedEvaluations.push(evaluation); | |
| }); | |
| }); | |
| // Step 6: Create a list of qualified tasks | |
| const tasksMap = new Map( | |
| data.tasks.map((task) => { | |
| return [task.taskId, task]; | |
| }), | |
| ); | |
| const qualifiedTasks: Task[] = []; | |
| Array.from(uniqueQuailifiedTaskIds).forEach((taskId) => { | |
| const task = tasksMap.get(taskId); | |
| if (task) { | |
| qualifiedTasks.push(task); | |
| } | |
| }); | |
| return [ | |
| { | |
| name: data.name || 'Example', | |
| exampleId: hash(JSON.stringify(data)), | |
| models: data.models, | |
| metrics: data.metrics.map((metric) => { | |
| // Step 1: Sort metric values, if present | |
| if (metric.values) { | |
| sortMetricValues(metric.values); | |
| } | |
| // Step 2: Return with additional attributes | |
| return { | |
| ...metric, | |
| ...(metric.type === 'categorical' && | |
| metric.values && { | |
| minValue: metric.values[0], | |
| maxValue: metric.values[metric.values.length - 1], | |
| }), | |
| ...(metric.type === 'numerical' && | |
| metric.range && | |
| metric.range.length >= 2 && { | |
| minValue: metric.range[0], | |
| maxValue: metric.range[1], | |
| }), | |
| }; | |
| }), | |
| ...(data.filters && { filters: data.filters }), | |
| tasks: qualifiedTasks.map((task) => { | |
| return { | |
| ...task, | |
| taskType: task.taskType, | |
| }; | |
| }), | |
| documents: data.documents, | |
| evaluations: qualifiedEvaluations, | |
| annotators: Array.from(annotators), | |
| numTasks: qualifiedTasks.length, | |
| }, | |
| disqualifiedTasks, | |
| notifications, | |
| ]; | |
| } | |
| export function exportData( | |
| data: Data | undefined, | |
| tasks: Task[] | undefined, | |
| ): boolean { | |
| // Step 0: Verify if data is provided | |
| if (data) { | |
| let dataToExport: RawData = { | |
| name: data.name, | |
| ...(data.exampleId && { exampleId: data.exampleId }), | |
| ...(data.filters && { filters: data.filters }), | |
| models: data.models, | |
| metrics: data.metrics, | |
| ...(data.documents && { | |
| documents: data.documents, | |
| }), | |
| tasks: data.tasks, | |
| evaluations: data.evaluations.map((evaluation) => { | |
| return { | |
| taskId: evaluation.taskId, | |
| modelId: evaluation.modelId, | |
| modelResponse: evaluation.modelResponse, | |
| annotations: evaluation.annotations, | |
| ...(evaluation.contexts && { contexts: evaluation.contexts }), | |
| }; | |
| }), | |
| }; | |
| // Step 1: If tasks are defined | |
| if (tasks) { | |
| // Step 0: update flagged property | |
| tasks.forEach((task) => { | |
| if (!task.hasOwnProperty('flagged')) { | |
| task.flagged = false; | |
| } | |
| }); | |
| // Step 1.a: Create reduced analytics data, if not all tasks are specified | |
| if (data.tasks.length !== tasks.length) { | |
| // Step 1.a.i: Build documents map | |
| const documentsMap: Map<string, RetrievedDocument> = new Map( | |
| data.documents?.map((document) => [document.documentId, document]), | |
| ); | |
| // Step 1.a.ii: Necessary variables | |
| const relevantDocuments: Set<RetrievedDocument> = | |
| new Set<RetrievedDocument>(); | |
| const relevantTaskIds: Set<string> = new Set<string>(); | |
| // Step 1.a.iii: Iterate over tasks to identify referened documents/relevant context | |
| tasks.forEach((task) => { | |
| // Add task ID to relevant task ID set | |
| relevantTaskIds.add(task.taskId); | |
| if (documentsMap.size !== 0) { | |
| task.contexts?.forEach((context) => { | |
| // Add referenced document to relevant documents list | |
| if (typeof context !== 'string') { | |
| const referenceDocument = documentsMap.get(context.documentId); | |
| if (referenceDocument) { | |
| relevantDocuments.add(referenceDocument); | |
| } | |
| } | |
| }); | |
| } | |
| }); | |
| // Step 1.a.iv: Create an object to be exported | |
| dataToExport = { | |
| name: data.name, | |
| ...(data.exampleId && { exampleId: data.exampleId }), | |
| ...(data.filters && { filters: data.filters }), | |
| models: data.models, | |
| metrics: data.metrics, | |
| ...(relevantDocuments.size !== 0 && { | |
| documents: Array.from(relevantDocuments), | |
| }), | |
| tasks: tasks, | |
| evaluations: data.evaluations | |
| .filter((evaluation) => relevantTaskIds.has(evaluation.taskId)) | |
| .map((evaluation) => { | |
| return { | |
| taskId: evaluation.taskId, | |
| modelId: evaluation.modelId, | |
| modelResponse: evaluation.modelResponse, | |
| annotations: evaluation.annotations, | |
| ...(evaluation.contexts && { contexts: evaluation.contexts }), | |
| }; | |
| }), | |
| }; | |
| } else { | |
| // Step 1.b: Create an object to be exported by copying over tasks information | |
| dataToExport = { | |
| name: data.name, | |
| ...(data.exampleId && { exampleId: data.exampleId }), | |
| ...(data.filters && { filters: data.filters }), | |
| models: data.models, | |
| metrics: data.metrics, | |
| ...(data.documents && { | |
| documents: data.documents, | |
| }), | |
| tasks: tasks, | |
| evaluations: data.evaluations.map((evaluation) => { | |
| return { | |
| taskId: evaluation.taskId, | |
| modelId: evaluation.modelId, | |
| modelResponse: evaluation.modelResponse, | |
| annotations: evaluation.annotations, | |
| ...(evaluation.contexts && { contexts: evaluation.contexts }), | |
| }; | |
| }), | |
| }; | |
| } | |
| } | |
| // Step 2: Create <a> tag | |
| var element = document.createElement('a'); | |
| // Step 2.a: Set attributes | |
| element.setAttribute( | |
| 'href', | |
| 'data:application/json;charset=utf-8, ' + | |
| encodeURIComponent(JSON.stringify(snakeCaseKeys(dataToExport))), | |
| ); | |
| element.setAttribute('download', 'analytics.json'); | |
| // Step 2.b: Add to DOM tree and click it | |
| document.body.appendChild(element); | |
| element.click(); | |
| // Step 2.c : Cleanup | |
| document.body.removeChild(element); | |
| // Step 3: Retun "true" indicating success | |
| return true; | |
| } | |
| // Step 3: Retun "false" indicating failure | |
| return false; | |
| } | |