hermes / tests /integration /test_batch_runner.py
lenson78's picture
initial upload: v2026.3.23 with HF Spaces deployment
9aa5185 verified
#!/usr/bin/env python3
"""
Test script for batch runner
This script tests the batch runner with a small sample dataset
to verify functionality before running large batches.
"""
import pytest
pytestmark = pytest.mark.integration
import json
import shutil
from pathlib import Path
def create_test_dataset():
"""Create a small test dataset."""
test_file = Path("tests/test_dataset.jsonl")
test_file.parent.mkdir(exist_ok=True)
prompts = [
{"prompt": "What is 2 + 2?"},
{"prompt": "What is the capital of France?"},
{"prompt": "Explain what Python is in one sentence."},
]
with open(test_file, 'w') as f:
for prompt in prompts:
f.write(json.dumps(prompt, ensure_ascii=False) + "\n")
print(f"βœ… Created test dataset: {test_file}")
return test_file
def cleanup_test_run(run_name):
"""Clean up test run output."""
output_dir = Path("data") / run_name
if output_dir.exists():
shutil.rmtree(output_dir)
print(f"πŸ—‘οΈ Cleaned up test output: {output_dir}")
def verify_output(run_name):
"""Verify that output files were created correctly."""
output_dir = Path("data") / run_name
# Check directory exists
if not output_dir.exists():
print(f"❌ Output directory not found: {output_dir}")
return False
# Check for checkpoint
checkpoint_file = output_dir / "checkpoint.json"
if not checkpoint_file.exists():
print(f"❌ Checkpoint file not found: {checkpoint_file}")
return False
# Check for statistics
stats_file = output_dir / "statistics.json"
if not stats_file.exists():
print(f"❌ Statistics file not found: {stats_file}")
return False
# Check for batch files
batch_files = list(output_dir.glob("batch_*.jsonl"))
if not batch_files:
print(f"❌ No batch files found in: {output_dir}")
return False
print(f"βœ… Output verification passed:")
print(f" - Checkpoint: {checkpoint_file}")
print(f" - Statistics: {stats_file}")
print(f" - Batch files: {len(batch_files)}")
# Load and display statistics
with open(stats_file) as f:
stats = json.load(f)
print(f"\nπŸ“Š Statistics Summary:")
print(f" - Total prompts: {stats['total_prompts']}")
print(f" - Total batches: {stats['total_batches']}")
print(f" - Duration: {stats['duration_seconds']}s")
if stats.get('tool_statistics'):
print(f" - Tool calls:")
for tool, tool_stats in stats['tool_statistics'].items():
print(f" β€’ {tool}: {tool_stats['count']} calls, {tool_stats['success_rate']:.1f}% success")
return True
def main():
"""Run the test."""
print("πŸ§ͺ Batch Runner Test")
print("=" * 60)
run_name = "test_run"
# Clean up any previous test run
cleanup_test_run(run_name)
# Create test dataset
test_file = create_test_dataset()
print(f"\nπŸ“ To run the test manually:")
print(f" python batch_runner.py \\")
print(f" --dataset_file={test_file} \\")
print(f" --batch_size=2 \\")
print(f" --run_name={run_name} \\")
print(f" --distribution=minimal \\")
print(f" --num_workers=2")
print(f"\nπŸ’‘ Or test with different distributions:")
print(f" python batch_runner.py --list_distributions")
print(f"\nπŸ” After running, you can verify output with:")
print(f" python tests/test_batch_runner.py --verify")
# Note: We don't actually run the batch runner here to avoid API calls during testing
# Users should run it manually with their API keys configured
if __name__ == "__main__":
import sys
if "--verify" in sys.argv:
run_name = "test_run"
verify_output(run_name)
else:
main()