Syncing native .eval_results database API integrations
Browse files- .eval_results/biomix.yaml +1 -1
- .eval_results/finragbench.yaml +1 -1
- .eval_results/frames.yaml +1 -1
- .eval_results/graphrag.yaml +1 -1
- .eval_results/ragas.yaml +1 -1
- .eval_results/rgb.yaml +1 -1
- .eval_results/scale.yaml +1 -1
- .eval_results/stark.yaml +1 -1
- .eval_results/t2ragbench.yaml +1 -1
.eval_results/biomix.yaml
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
- dataset:
|
| 2 |
id: kg-rag/BiomixQA
|
| 3 |
-
task_id:
|
| 4 |
date: '2026-03-28'
|
| 5 |
source:
|
| 6 |
name: 'BiomixQA: HIPAA Routing'
|
|
|
|
| 1 |
- dataset:
|
| 2 |
id: kg-rag/BiomixQA
|
| 3 |
+
task_id: question-answering
|
| 4 |
date: '2026-03-28'
|
| 5 |
source:
|
| 6 |
name: 'BiomixQA: HIPAA Routing'
|
.eval_results/finragbench.yaml
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
- dataset:
|
| 2 |
id: FinRAGBench/FinRAGBench-V
|
| 3 |
-
task_id:
|
| 4 |
date: '2026-03-28'
|
| 5 |
source:
|
| 6 |
name: 'FinRAGBench-V: Spatial Mapping'
|
|
|
|
| 1 |
- dataset:
|
| 2 |
id: FinRAGBench/FinRAGBench-V
|
| 3 |
+
task_id: question-answering
|
| 4 |
date: '2026-03-28'
|
| 5 |
source:
|
| 6 |
name: 'FinRAGBench-V: Spatial Mapping'
|
.eval_results/frames.yaml
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
- dataset:
|
| 2 |
id: google/frames-benchmark
|
| 3 |
-
task_id:
|
| 4 |
date: '2026-03-28'
|
| 5 |
source:
|
| 6 |
name: 'FRAMES: Logic Graphing'
|
|
|
|
| 1 |
- dataset:
|
| 2 |
id: google/frames-benchmark
|
| 3 |
+
task_id: question-answering
|
| 4 |
date: '2026-03-28'
|
| 5 |
source:
|
| 6 |
name: 'FRAMES: Logic Graphing'
|
.eval_results/graphrag.yaml
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
- dataset:
|
| 2 |
id: GraphRAG-Bench/GraphRAG-Bench
|
| 3 |
-
task_id:
|
| 4 |
date: '2026-03-28'
|
| 5 |
source:
|
| 6 |
name: 'GraphRAG-Bench: Natively'
|
|
|
|
| 1 |
- dataset:
|
| 2 |
id: GraphRAG-Bench/GraphRAG-Bench
|
| 3 |
+
task_id: question-answering
|
| 4 |
date: '2026-03-28'
|
| 5 |
source:
|
| 6 |
name: 'GraphRAG-Bench: Natively'
|
.eval_results/ragas.yaml
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
- dataset:
|
| 2 |
id: ragas/ragas-eval
|
| 3 |
-
task_id:
|
| 4 |
date: '2026-03-28'
|
| 5 |
source:
|
| 6 |
name: 'Pipeline Eval (RAGAS): Provable QA Hits'
|
|
|
|
| 1 |
- dataset:
|
| 2 |
id: ragas/ragas-eval
|
| 3 |
+
task_id: question-answering
|
| 4 |
date: '2026-03-28'
|
| 5 |
source:
|
| 6 |
name: 'Pipeline Eval (RAGAS): Provable QA Hits'
|
.eval_results/rgb.yaml
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
- dataset:
|
| 2 |
id: THUDM/RGB
|
| 3 |
-
task_id:
|
| 4 |
date: '2026-03-28'
|
| 5 |
source:
|
| 6 |
name: 'RGB: Strict Paths'
|
|
|
|
| 1 |
- dataset:
|
| 2 |
id: THUDM/RGB
|
| 3 |
+
task_id: question-answering
|
| 4 |
date: '2026-03-28'
|
| 5 |
source:
|
| 6 |
name: 'RGB: Strict Paths'
|
.eval_results/scale.yaml
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
- dataset:
|
| 2 |
id: FastMemory/Scale
|
| 3 |
-
task_id:
|
| 4 |
date: '2026-03-28'
|
| 5 |
source:
|
| 6 |
name: 'Scale Benchmark: Sub-second Execution'
|
|
|
|
| 1 |
- dataset:
|
| 2 |
id: FastMemory/Scale
|
| 3 |
+
task_id: question-answering
|
| 4 |
date: '2026-03-28'
|
| 5 |
source:
|
| 6 |
name: 'Scale Benchmark: Sub-second Execution'
|
.eval_results/stark.yaml
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
- dataset:
|
| 2 |
id: snap-stanford/stark
|
| 3 |
-
task_id:
|
| 4 |
date: '2026-03-28'
|
| 5 |
source:
|
| 6 |
name: 'STaRK-Prime: Deterministic Logic'
|
|
|
|
| 1 |
- dataset:
|
| 2 |
id: snap-stanford/stark
|
| 3 |
+
task_id: question-answering
|
| 4 |
date: '2026-03-28'
|
| 5 |
source:
|
| 6 |
name: 'STaRK-Prime: Deterministic Logic'
|
.eval_results/t2ragbench.yaml
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
- dataset:
|
| 2 |
id: G4KMU/t2-ragbench
|
| 3 |
-
task_id:
|
| 4 |
date: '2026-03-28'
|
| 5 |
source:
|
| 6 |
name: 'T2-RAGBench: Native CBFDAE'
|
|
|
|
| 1 |
- dataset:
|
| 2 |
id: G4KMU/t2-ragbench
|
| 3 |
+
task_id: question-answering
|
| 4 |
date: '2026-03-28'
|
| 5 |
source:
|
| 6 |
name: 'T2-RAGBench: Native CBFDAE'
|