Upload README.md with huggingface_hub
Browse files
README.md
CHANGED
|
@@ -12,7 +12,7 @@ model-index:
|
|
| 12 |
type: question-answering
|
| 13 |
name: Financial Q&A
|
| 14 |
dataset:
|
| 15 |
-
name:
|
| 16 |
type: PatronusAI/financebench
|
| 17 |
config: financebench
|
| 18 |
split: train
|
|
@@ -24,7 +24,7 @@ model-index:
|
|
| 24 |
type: text2text-generation
|
| 25 |
name: Table Preservation
|
| 26 |
dataset:
|
| 27 |
-
name: T2-RAGBench
|
| 28 |
type: G4KMU/t2-ragbench
|
| 29 |
config: default
|
| 30 |
split: test
|
|
@@ -36,7 +36,7 @@ model-index:
|
|
| 36 |
type: text-retrieval
|
| 37 |
name: Multi-Doc Synthesis
|
| 38 |
dataset:
|
| 39 |
-
name: FRAMES
|
| 40 |
type: google/frames-benchmark
|
| 41 |
config: default
|
| 42 |
split: test
|
|
@@ -48,7 +48,7 @@ model-index:
|
|
| 48 |
type: visual-question-answering
|
| 49 |
name: Visual Reasoning
|
| 50 |
dataset:
|
| 51 |
-
name: FinRAGBench-V
|
| 52 |
type: FinRAGBench/FinRAGBench-V
|
| 53 |
config: default
|
| 54 |
split: test
|
|
@@ -60,7 +60,7 @@ model-index:
|
|
| 60 |
type: text-classification
|
| 61 |
name: Anti-Hallucination
|
| 62 |
dataset:
|
| 63 |
-
name: RGB
|
| 64 |
type: THUDM/RGB
|
| 65 |
config: default
|
| 66 |
split: test
|
|
@@ -72,7 +72,7 @@ model-index:
|
|
| 72 |
type: tabular-classification
|
| 73 |
name: End-to-End Latency
|
| 74 |
dataset:
|
| 75 |
-
name: Scale Benchmark
|
| 76 |
type: FastMemory/Scale
|
| 77 |
config: default
|
| 78 |
split: train
|
|
@@ -84,7 +84,7 @@ model-index:
|
|
| 84 |
type: text-retrieval
|
| 85 |
name: Multi-hop Routing
|
| 86 |
dataset:
|
| 87 |
-
name: GraphRAG-Bench
|
| 88 |
type: GraphRAG-Bench/GraphRAG-Bench
|
| 89 |
config: default
|
| 90 |
split: test
|
|
@@ -96,7 +96,7 @@ model-index:
|
|
| 96 |
type: text-retrieval
|
| 97 |
name: E-Commerce Graph
|
| 98 |
dataset:
|
| 99 |
-
name: STaRK-Prime
|
| 100 |
type: snap-stanford/stark
|
| 101 |
config: default
|
| 102 |
split: test
|
|
@@ -108,7 +108,7 @@ model-index:
|
|
| 108 |
type: question-answering
|
| 109 |
name: Biomedical Compliance
|
| 110 |
dataset:
|
| 111 |
-
name: BiomixQA
|
| 112 |
type: kg-rag/BiomixQA
|
| 113 |
config: mcq
|
| 114 |
split: train
|
|
@@ -120,7 +120,7 @@ model-index:
|
|
| 120 |
type: text-generation
|
| 121 |
name: Pipeline Eval (RAGAS)
|
| 122 |
dataset:
|
| 123 |
-
name: Pipeline Eval (RAGAS)
|
| 124 |
type: ragas/ragas-eval
|
| 125 |
config: default
|
| 126 |
split: train
|
|
|
|
| 12 |
type: question-answering
|
| 13 |
name: Financial Q&A
|
| 14 |
dataset:
|
| 15 |
+
name: "[FinanceBench](https://huggingface.co/datasets/PatronusAI/financebench)"
|
| 16 |
type: PatronusAI/financebench
|
| 17 |
config: financebench
|
| 18 |
split: train
|
|
|
|
| 24 |
type: text2text-generation
|
| 25 |
name: Table Preservation
|
| 26 |
dataset:
|
| 27 |
+
name: "[T2-RAGBench](https://huggingface.co/datasets/G4KMU/t2-ragbench)"
|
| 28 |
type: G4KMU/t2-ragbench
|
| 29 |
config: default
|
| 30 |
split: test
|
|
|
|
| 36 |
type: text-retrieval
|
| 37 |
name: Multi-Doc Synthesis
|
| 38 |
dataset:
|
| 39 |
+
name: "[FRAMES](https://huggingface.co/datasets/google/frames-benchmark)"
|
| 40 |
type: google/frames-benchmark
|
| 41 |
config: default
|
| 42 |
split: test
|
|
|
|
| 48 |
type: visual-question-answering
|
| 49 |
name: Visual Reasoning
|
| 50 |
dataset:
|
| 51 |
+
name: "[FinRAGBench-V](https://huggingface.co/datasets/FinRAGBench/FinRAGBench-V)"
|
| 52 |
type: FinRAGBench/FinRAGBench-V
|
| 53 |
config: default
|
| 54 |
split: test
|
|
|
|
| 60 |
type: text-classification
|
| 61 |
name: Anti-Hallucination
|
| 62 |
dataset:
|
| 63 |
+
name: "[RGB](https://huggingface.co/datasets/THUDM/RGB)"
|
| 64 |
type: THUDM/RGB
|
| 65 |
config: default
|
| 66 |
split: test
|
|
|
|
| 72 |
type: tabular-classification
|
| 73 |
name: End-to-End Latency
|
| 74 |
dataset:
|
| 75 |
+
name: "[Scale Benchmark](https://github.com/fastbuilderai/scale)"
|
| 76 |
type: FastMemory/Scale
|
| 77 |
config: default
|
| 78 |
split: train
|
|
|
|
| 84 |
type: text-retrieval
|
| 85 |
name: Multi-hop Routing
|
| 86 |
dataset:
|
| 87 |
+
name: "[GraphRAG-Bench](https://huggingface.co/datasets/GraphRAG-Bench/GraphRAG-Bench)"
|
| 88 |
type: GraphRAG-Bench/GraphRAG-Bench
|
| 89 |
config: default
|
| 90 |
split: test
|
|
|
|
| 96 |
type: text-retrieval
|
| 97 |
name: E-Commerce Graph
|
| 98 |
dataset:
|
| 99 |
+
name: "[STaRK-Prime](https://huggingface.co/datasets/snap-stanford/stark)"
|
| 100 |
type: snap-stanford/stark
|
| 101 |
config: default
|
| 102 |
split: test
|
|
|
|
| 108 |
type: question-answering
|
| 109 |
name: Biomedical Compliance
|
| 110 |
dataset:
|
| 111 |
+
name: "[BiomixQA](https://huggingface.co/datasets/kg-rag/BiomixQA)"
|
| 112 |
type: kg-rag/BiomixQA
|
| 113 |
config: mcq
|
| 114 |
split: train
|
|
|
|
| 120 |
type: text-generation
|
| 121 |
name: Pipeline Eval (RAGAS)
|
| 122 |
dataset:
|
| 123 |
+
name: "[Pipeline Eval (RAGAS)](https://huggingface.co/datasets/ragas/ragas-eval)"
|
| 124 |
type: ragas/ragas-eval
|
| 125 |
config: default
|
| 126 |
split: train
|