File size: 3,914 Bytes
9aa5185
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
#!/usr/bin/env python3
"""
Test script for batch runner

This script tests the batch runner with a small sample dataset
to verify functionality before running large batches.
"""

import pytest
pytestmark = pytest.mark.integration

import json
import shutil
from pathlib import Path


def create_test_dataset():
    """Create a small test dataset."""
    test_file = Path("tests/test_dataset.jsonl")
    test_file.parent.mkdir(exist_ok=True)
    
    prompts = [
        {"prompt": "What is 2 + 2?"},
        {"prompt": "What is the capital of France?"},
        {"prompt": "Explain what Python is in one sentence."},
    ]
    
    with open(test_file, 'w') as f:
        for prompt in prompts:
            f.write(json.dumps(prompt, ensure_ascii=False) + "\n")
    
    print(f"βœ… Created test dataset: {test_file}")
    return test_file


def cleanup_test_run(run_name):
    """Clean up test run output."""
    output_dir = Path("data") / run_name
    if output_dir.exists():
        shutil.rmtree(output_dir)
        print(f"πŸ—‘οΈ  Cleaned up test output: {output_dir}")


def verify_output(run_name):
    """Verify that output files were created correctly."""
    output_dir = Path("data") / run_name
    
    # Check directory exists
    if not output_dir.exists():
        print(f"❌ Output directory not found: {output_dir}")
        return False
    
    # Check for checkpoint
    checkpoint_file = output_dir / "checkpoint.json"
    if not checkpoint_file.exists():
        print(f"❌ Checkpoint file not found: {checkpoint_file}")
        return False
    
    # Check for statistics
    stats_file = output_dir / "statistics.json"
    if not stats_file.exists():
        print(f"❌ Statistics file not found: {stats_file}")
        return False
    
    # Check for batch files
    batch_files = list(output_dir.glob("batch_*.jsonl"))
    if not batch_files:
        print(f"❌ No batch files found in: {output_dir}")
        return False
    
    print(f"βœ… Output verification passed:")
    print(f"   - Checkpoint: {checkpoint_file}")
    print(f"   - Statistics: {stats_file}")
    print(f"   - Batch files: {len(batch_files)}")
    
    # Load and display statistics
    with open(stats_file) as f:
        stats = json.load(f)
    
    print(f"\nπŸ“Š Statistics Summary:")
    print(f"   - Total prompts: {stats['total_prompts']}")
    print(f"   - Total batches: {stats['total_batches']}")
    print(f"   - Duration: {stats['duration_seconds']}s")
    
    if stats.get('tool_statistics'):
        print(f"   - Tool calls:")
        for tool, tool_stats in stats['tool_statistics'].items():
            print(f"     β€’ {tool}: {tool_stats['count']} calls, {tool_stats['success_rate']:.1f}% success")
    
    return True


def main():
    """Run the test."""
    print("πŸ§ͺ Batch Runner Test")
    print("=" * 60)
    
    run_name = "test_run"
    
    # Clean up any previous test run
    cleanup_test_run(run_name)
    
    # Create test dataset
    test_file = create_test_dataset()
    
    print(f"\nπŸ“ To run the test manually:")
    print(f"   python batch_runner.py \\")
    print(f"       --dataset_file={test_file} \\")
    print(f"       --batch_size=2 \\")
    print(f"       --run_name={run_name} \\")
    print(f"       --distribution=minimal \\")
    print(f"       --num_workers=2")
    
    print(f"\nπŸ’‘ Or test with different distributions:")
    print(f"   python batch_runner.py --list_distributions")
    
    print(f"\nπŸ” After running, you can verify output with:")
    print(f"   python tests/test_batch_runner.py --verify")
    
    # Note: We don't actually run the batch runner here to avoid API calls during testing
    # Users should run it manually with their API keys configured


if __name__ == "__main__":
    import sys
    
    if "--verify" in sys.argv:
        run_name = "test_run"
        verify_output(run_name)
    else:
        main()