Syncing native .eval_results database API integrations

Files changed (9) hide show

.eval_results/biomix.yaml CHANGED Viewed

@@ -1,6 +1,6 @@
 - dataset:
     id: kg-rag/BiomixQA
-    task_id: text-classification
   date: '2026-03-28'
   source:
     name: 'BiomixQA: HIPAA Routing'

 - dataset:
     id: kg-rag/BiomixQA
+    task_id: question-answering
   date: '2026-03-28'
   source:
     name: 'BiomixQA: HIPAA Routing'

.eval_results/finragbench.yaml CHANGED Viewed

@@ -1,6 +1,6 @@
 - dataset:
     id: FinRAGBench/FinRAGBench-V
-    task_id: text-classification
   date: '2026-03-28'
   source:
     name: 'FinRAGBench-V: Spatial Mapping'

 - dataset:
     id: FinRAGBench/FinRAGBench-V
+    task_id: question-answering
   date: '2026-03-28'
   source:
     name: 'FinRAGBench-V: Spatial Mapping'

.eval_results/frames.yaml CHANGED Viewed

@@ -1,6 +1,6 @@
 - dataset:
     id: google/frames-benchmark
-    task_id: text-classification
   date: '2026-03-28'
   source:
     name: 'FRAMES: Logic Graphing'

 - dataset:
     id: google/frames-benchmark
+    task_id: question-answering
   date: '2026-03-28'
   source:
     name: 'FRAMES: Logic Graphing'

.eval_results/graphrag.yaml CHANGED Viewed

@@ -1,6 +1,6 @@
 - dataset:
     id: GraphRAG-Bench/GraphRAG-Bench
-    task_id: text-classification
   date: '2026-03-28'
   source:
     name: 'GraphRAG-Bench: Natively'

 - dataset:
     id: GraphRAG-Bench/GraphRAG-Bench
+    task_id: question-answering
   date: '2026-03-28'
   source:
     name: 'GraphRAG-Bench: Natively'

.eval_results/ragas.yaml CHANGED Viewed

@@ -1,6 +1,6 @@
 - dataset:
     id: ragas/ragas-eval
-    task_id: text-classification
   date: '2026-03-28'
   source:
     name: 'Pipeline Eval (RAGAS): Provable QA Hits'

 - dataset:
     id: ragas/ragas-eval
+    task_id: question-answering
   date: '2026-03-28'
   source:
     name: 'Pipeline Eval (RAGAS): Provable QA Hits'

.eval_results/rgb.yaml CHANGED Viewed

@@ -1,6 +1,6 @@
 - dataset:
     id: THUDM/RGB
-    task_id: text-classification
   date: '2026-03-28'
   source:
     name: 'RGB: Strict Paths'

 - dataset:
     id: THUDM/RGB
+    task_id: question-answering
   date: '2026-03-28'
   source:
     name: 'RGB: Strict Paths'

.eval_results/scale.yaml CHANGED Viewed

@@ -1,6 +1,6 @@
 - dataset:
     id: FastMemory/Scale
-    task_id: text-classification
   date: '2026-03-28'
   source:
     name: 'Scale Benchmark: Sub-second Execution'

 - dataset:
     id: FastMemory/Scale
+    task_id: question-answering
   date: '2026-03-28'
   source:
     name: 'Scale Benchmark: Sub-second Execution'

.eval_results/stark.yaml CHANGED Viewed

@@ -1,6 +1,6 @@
 - dataset:
     id: snap-stanford/stark
-    task_id: text-classification
   date: '2026-03-28'
   source:
     name: 'STaRK-Prime: Deterministic Logic'

 - dataset:
     id: snap-stanford/stark
+    task_id: question-answering
   date: '2026-03-28'
   source:
     name: 'STaRK-Prime: Deterministic Logic'

.eval_results/t2ragbench.yaml CHANGED Viewed

@@ -1,6 +1,6 @@
 - dataset:
     id: G4KMU/t2-ragbench
-    task_id: text-classification
   date: '2026-03-28'
   source:
     name: 'T2-RAGBench: Native CBFDAE'

 - dataset:
     id: G4KMU/t2-ragbench
+    task_id: question-answering
   date: '2026-03-28'
   source:
     name: 'T2-RAGBench: Native CBFDAE'