Spaces:

lenson78
/

hermes

Paused

App Files Files Community

hermes / tests /integration /test_batch_runner.py

lenson78

initial upload: v2026.3.23 with HF Spaces deployment

9aa5185 verified 5 days ago

raw

history blame contribute delete

3.91 kB

	#!/usr/bin/env python3
	"""
	Test script for batch runner

	This script tests the batch runner with a small sample dataset
	to verify functionality before running large batches.
	"""

	import pytest
	pytestmark = pytest.mark.integration

	import json
	import shutil
	from pathlib import Path


	def create_test_dataset():
	"""Create a small test dataset."""
	test_file = Path("tests/test_dataset.jsonl")
	test_file.parent.mkdir(exist_ok=True)

	prompts = [
	{"prompt": "What is 2 + 2?"},
	{"prompt": "What is the capital of France?"},
	{"prompt": "Explain what Python is in one sentence."},
	]

	with open(test_file, 'w') as f:
	for prompt in prompts:
	f.write(json.dumps(prompt, ensure_ascii=False) + "\n")

	print(f"✅ Created test dataset: {test_file}")
	return test_file


	def cleanup_test_run(run_name):
	"""Clean up test run output."""
	output_dir = Path("data") / run_name
	if output_dir.exists():
	shutil.rmtree(output_dir)
	print(f"🗑️ Cleaned up test output: {output_dir}")


	def verify_output(run_name):
	"""Verify that output files were created correctly."""
	output_dir = Path("data") / run_name

	# Check directory exists
	if not output_dir.exists():
	print(f"❌ Output directory not found: {output_dir}")
	return False

	# Check for checkpoint
	checkpoint_file = output_dir / "checkpoint.json"
	if not checkpoint_file.exists():
	print(f"❌ Checkpoint file not found: {checkpoint_file}")
	return False

	# Check for statistics
	stats_file = output_dir / "statistics.json"
	if not stats_file.exists():
	print(f"❌ Statistics file not found: {stats_file}")
	return False

	# Check for batch files
	batch_files = list(output_dir.glob("batch_*.jsonl"))
	if not batch_files:
	print(f"❌ No batch files found in: {output_dir}")
	return False

	print(f"✅ Output verification passed:")
	print(f" - Checkpoint: {checkpoint_file}")
	print(f" - Statistics: {stats_file}")
	print(f" - Batch files: {len(batch_files)}")

	# Load and display statistics
	with open(stats_file) as f:
	stats = json.load(f)

	print(f"\n📊 Statistics Summary:")
	print(f" - Total prompts: {stats['total_prompts']}")
	print(f" - Total batches: {stats['total_batches']}")
	print(f" - Duration: {stats['duration_seconds']}s")

	if stats.get('tool_statistics'):
	print(f" - Tool calls:")
	for tool, tool_stats in stats['tool_statistics'].items():
	print(f" • {tool}: {tool_stats['count']} calls, {tool_stats['success_rate']:.1f}% success")

	return True


	def main():
	"""Run the test."""
	print("🧪 Batch Runner Test")
	print("=" * 60)

	run_name = "test_run"

	# Clean up any previous test run
	cleanup_test_run(run_name)

	# Create test dataset
	test_file = create_test_dataset()

	print(f"\n📝 To run the test manually:")
	print(f" python batch_runner.py \\")
	print(f" --dataset_file={test_file} \\")
	print(f" --batch_size=2 \\")
	print(f" --run_name={run_name} \\")
	print(f" --distribution=minimal \\")
	print(f" --num_workers=2")

	print(f"\n💡 Or test with different distributions:")
	print(f" python batch_runner.py --list_distributions")

	print(f"\n🔍 After running, you can verify output with:")
	print(f" python tests/test_batch_runner.py --verify")

	# Note: We don't actually run the batch runner here to avoid API calls during testing
	# Users should run it manually with their API keys configured


	if __name__ == "__main__":
	import sys

	if "--verify" in sys.argv:
	run_name = "test_run"
	verify_output(run_name)
	else:
	main()