File size: 2,394 Bytes
2aed081
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import json
import os
import random

def load_data(raw_dir="data/raw/"):
    all_samples = []
    for f in os.listdir(raw_dir):
        if f.endswith(".json"):
            with open(os.path.join(raw_dir, f), 'r', encoding='utf-8') as fh:
                sample = json.load(fh)
                
                # Fix types since some fields are strings in the raw JSON
                if isinstance(sample.get("is_hallucination"), str):
                    sample["is_hallucination"] = sample["is_hallucination"].lower() == "true"
                
                if sample.get("hallucination_step") is not None:
                    sample["hallucination_step"] = int(sample["hallucination_step"])
                
                # The implementation plan uses 'trajectory', but raw data has 'history'
                if "history" in sample and "trajectory" not in sample:
                    sample["trajectory"] = sample["history"]
                    
                all_samples.append(sample)
    return all_samples

def random_baseline_accuracy(samples):
    correct = 0
    hallucinated_samples = [s for s in samples if s.get("is_hallucination")]
    
    if not hallucinated_samples:
        return 0.0

    for s in hallucinated_samples:
        n_steps = len(s.get("trajectory", []))
        if n_steps == 0:
            continue
        predicted_step = random.randint(1, n_steps)
        if predicted_step == s.get("hallucination_step"):
            correct += 1
            
    return correct / len(hallucinated_samples)

if __name__ == "__main__":
    # Ensure reproducibility
    random.seed(42)
    
    # Load from the correct path relative to the root folder
    script_dir = os.path.dirname(os.path.abspath(__file__))
    project_root = os.path.join(script_dir, "..", "..")
    data_dir = os.path.join(project_root, "data", "raw")
    
    samples = load_data(data_dir)
    print(f"Total samples loaded: {len(samples)}")
    
    hallucinated = [s for s in samples if s.get("is_hallucination")]
    clean = [s for s in samples if not s.get("is_hallucination")]
    print(f"Hallucinated: {len(hallucinated)}")
    print(f"Clean: {len(clean)}")
    
    # Run 1000 times and average
    scores = [random_baseline_accuracy(samples) for _ in range(1000)]
    avg_score = sum(scores) / len(scores) * 100
    print(f"Random baseline step localization accuracy: {avg_score:.2f}%")