File size: 1,404 Bytes
991c049
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
"""Sample-data helper.

This repository ships with sample data already at
``data/samples/custom_rag_examples.jsonl``. This script verifies the file
exists and reports basic statistics. We deliberately do not fetch external
datasets here, because each requires conversion to the universal schema.
"""
from __future__ import annotations

import json
import sys
from pathlib import Path


SAMPLE_PATH = Path("data/samples/custom_rag_examples.jsonl")


def main() -> int:
    if not SAMPLE_PATH.exists():
        print(f"ERROR: sample file not found at {SAMPLE_PATH}.", file=sys.stderr)
        print("Re-clone the repository or restore the file from git.", file=sys.stderr)
        return 1
    n_examples = 0
    n_claims = 0
    n_contexts = 0
    with open(SAMPLE_PATH, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if not line:
                continue
            obj = json.loads(line)
            n_examples += 1
            n_claims += len(obj.get("gold_claim_labels") or [])
            n_contexts += len(obj.get("contexts") or [])
    print(f"Sample file: {SAMPLE_PATH}")
    print(f"  examples:        {n_examples}")
    print(f"  total claims:    {n_claims}")
    print(f"  total contexts:  {n_contexts}")
    print("\nAll sample examples are clearly marked as synthetic.")
    return 0


if __name__ == "__main__":
    raise SystemExit(main())