prabhatkr commited on
Commit
4352c1a
·
verified ·
1 Parent(s): 890bb1e

Syncing native .eval_results database API integrations

Browse files
.eval_results/biomix.yaml CHANGED
@@ -1,6 +1,6 @@
1
  - dataset:
2
  id: kg-rag/BiomixQA
3
- task_id: text-classification
4
  date: '2026-03-28'
5
  source:
6
  name: 'BiomixQA: HIPAA Routing'
 
1
  - dataset:
2
  id: kg-rag/BiomixQA
3
+ task_id: question-answering
4
  date: '2026-03-28'
5
  source:
6
  name: 'BiomixQA: HIPAA Routing'
.eval_results/finragbench.yaml CHANGED
@@ -1,6 +1,6 @@
1
  - dataset:
2
  id: FinRAGBench/FinRAGBench-V
3
- task_id: text-classification
4
  date: '2026-03-28'
5
  source:
6
  name: 'FinRAGBench-V: Spatial Mapping'
 
1
  - dataset:
2
  id: FinRAGBench/FinRAGBench-V
3
+ task_id: question-answering
4
  date: '2026-03-28'
5
  source:
6
  name: 'FinRAGBench-V: Spatial Mapping'
.eval_results/frames.yaml CHANGED
@@ -1,6 +1,6 @@
1
  - dataset:
2
  id: google/frames-benchmark
3
- task_id: text-classification
4
  date: '2026-03-28'
5
  source:
6
  name: 'FRAMES: Logic Graphing'
 
1
  - dataset:
2
  id: google/frames-benchmark
3
+ task_id: question-answering
4
  date: '2026-03-28'
5
  source:
6
  name: 'FRAMES: Logic Graphing'
.eval_results/graphrag.yaml CHANGED
@@ -1,6 +1,6 @@
1
  - dataset:
2
  id: GraphRAG-Bench/GraphRAG-Bench
3
- task_id: text-classification
4
  date: '2026-03-28'
5
  source:
6
  name: 'GraphRAG-Bench: Natively'
 
1
  - dataset:
2
  id: GraphRAG-Bench/GraphRAG-Bench
3
+ task_id: question-answering
4
  date: '2026-03-28'
5
  source:
6
  name: 'GraphRAG-Bench: Natively'
.eval_results/ragas.yaml CHANGED
@@ -1,6 +1,6 @@
1
  - dataset:
2
  id: ragas/ragas-eval
3
- task_id: text-classification
4
  date: '2026-03-28'
5
  source:
6
  name: 'Pipeline Eval (RAGAS): Provable QA Hits'
 
1
  - dataset:
2
  id: ragas/ragas-eval
3
+ task_id: question-answering
4
  date: '2026-03-28'
5
  source:
6
  name: 'Pipeline Eval (RAGAS): Provable QA Hits'
.eval_results/rgb.yaml CHANGED
@@ -1,6 +1,6 @@
1
  - dataset:
2
  id: THUDM/RGB
3
- task_id: text-classification
4
  date: '2026-03-28'
5
  source:
6
  name: 'RGB: Strict Paths'
 
1
  - dataset:
2
  id: THUDM/RGB
3
+ task_id: question-answering
4
  date: '2026-03-28'
5
  source:
6
  name: 'RGB: Strict Paths'
.eval_results/scale.yaml CHANGED
@@ -1,6 +1,6 @@
1
  - dataset:
2
  id: FastMemory/Scale
3
- task_id: text-classification
4
  date: '2026-03-28'
5
  source:
6
  name: 'Scale Benchmark: Sub-second Execution'
 
1
  - dataset:
2
  id: FastMemory/Scale
3
+ task_id: question-answering
4
  date: '2026-03-28'
5
  source:
6
  name: 'Scale Benchmark: Sub-second Execution'
.eval_results/stark.yaml CHANGED
@@ -1,6 +1,6 @@
1
  - dataset:
2
  id: snap-stanford/stark
3
- task_id: text-classification
4
  date: '2026-03-28'
5
  source:
6
  name: 'STaRK-Prime: Deterministic Logic'
 
1
  - dataset:
2
  id: snap-stanford/stark
3
+ task_id: question-answering
4
  date: '2026-03-28'
5
  source:
6
  name: 'STaRK-Prime: Deterministic Logic'
.eval_results/t2ragbench.yaml CHANGED
@@ -1,6 +1,6 @@
1
  - dataset:
2
  id: G4KMU/t2-ragbench
3
- task_id: text-classification
4
  date: '2026-03-28'
5
  source:
6
  name: 'T2-RAGBench: Native CBFDAE'
 
1
  - dataset:
2
  id: G4KMU/t2-ragbench
3
+ task_id: question-answering
4
  date: '2026-03-28'
5
  source:
6
  name: 'T2-RAGBench: Native CBFDAE'