morphological-transformer

Runtime error

morphological-transformer / scripts /test_optimizations.py

akki2825

Initial deployment of Morphological Transformer with ZeroGPU

1f39ae1 5 months ago

6.62 kB

	#!/usr/bin/env python3
	"""
	Test script to verify training optimizations work correctly
	"""

	import torch
	import os
	import sys
	from pathlib import Path

	def test_mixed_precision():
	"""Test mixed precision training setup"""
	print("Testing Mixed Precision Training...")

	try:
	from torch.cuda.amp import GradScaler, autocast

	# Test scaler creation
	scaler = GradScaler(enabled=True)
	print("✓ GradScaler created successfully")

	# Test autocast context
	with autocast():
	x = torch.randn(10, 10)
	y = torch.randn(10, 10)
	z = x @ y
	print("✓ Autocast context works")

	return True
	except Exception as e:
	print(f"✗ Mixed precision test failed: {e}")
	return False

	def test_optimized_dataset():
	"""Test optimized dataset functionality"""
	print("\nTesting Optimized Dataset...")

	try:
	from morphological_dataset import MorphologicalDataset, build_vocabulary

	# Create dummy data files
	os.makedirs("test_data", exist_ok=True)

	with open("test_data/test.src", "w") as f:
	f.write("hello world\n")
	f.write("test sequence\n")

	with open("test_data/test.tgt", "w") as f:
	f.write("hola mundo\n")
	f.write("secuencia prueba\n")

	# Test vocabulary building
	src_vocab = build_vocabulary(["test_data/test.src"])
	tgt_vocab = build_vocabulary(["test_data/test.tgt"])
	print("✓ Vocabulary building works")

	# Test dataset creation
	dataset = MorphologicalDataset("test_data/test.src", "test_data/test.tgt",
	src_vocab, tgt_vocab, max_length=10)
	print("✓ Dataset creation works")

	# Test data loading
	item = dataset[0]
	print(f"✓ Dataset item shape: {len(item)}")

	# Cleanup
	import shutil
	shutil.rmtree("test_data")

	return True
	except Exception as e:
	print(f"✗ Dataset test failed: {e}")
	return False

	def test_optimized_dataloader():
	"""Test optimized DataLoader configuration"""
	print("\nTesting Optimized DataLoader...")

	try:
	from torch.utils.data import DataLoader
	from morphological_dataset import MorphologicalDataset, build_vocabulary, collate_fn

	# Create test dataset
	os.makedirs("test_data", exist_ok=True)

	with open("test_data/test.src", "w") as f:
	f.write("hello world\n")
	f.write("test sequence\n")

	with open("test_data/test.tgt", "w") as f:
	f.write("hola mundo\n")
	f.write("secuencia prueba\n")

	src_vocab = build_vocabulary(["test_data/test.src"])
	tgt_vocab = build_vocabulary(["test_data/test.tgt"])
	dataset = MorphologicalDataset("test_data/test.src", "test_data/test.tgt",
	src_vocab, tgt_vocab, max_length=10)

	# Test optimized DataLoader
	dataloader = DataLoader(
	dataset,
	batch_size=2,
	shuffle=True,
	collate_fn=lambda batch: collate_fn(batch, src_vocab, tgt_vocab, 10),
	num_workers=0, # Use 0 for testing
	pin_memory=False, # Disable for testing
	persistent_workers=False, # Disable for testing
	drop_last=True
	)

	# Test iteration
	for batch in dataloader:
	src, src_mask, tgt, tgt_mask = batch
	print(f"✓ Batch shapes - src: {src.shape}, tgt: {tgt.shape}")
	break

	# Cleanup
	import shutil
	shutil.rmtree("test_data")

	return True
	except Exception as e:
	print(f"✗ DataLoader test failed: {e}")
	return False

	def test_cuda_optimizations():
	"""Test CUDA optimizations"""
	print("\nTesting CUDA Optimizations...")

	if not torch.cuda.is_available():
	print("⚠ CUDA not available, skipping CUDA tests")
	return True

	try:
	# Test CUDA optimizations
	torch.backends.cudnn.benchmark = True
	torch.backends.cudnn.deterministic = False
	print("✓ CUDA optimizations enabled")

	# Test non-blocking transfers
	x = torch.randn(100, 100)
	y = x.cuda(non_blocking=True)
	print("✓ Non-blocking CUDA transfer works")

	return True
	except Exception as e:
	print(f"✗ CUDA test failed: {e}")
	return False

	def test_model_creation():
	"""Test model creation with optimizations"""
	print("\nTesting Model Creation...")

	try:
	from transformer import TagTransformer

	# Test model creation
	model = TagTransformer(
	src_vocab_size=1000,
	trg_vocab_size=1000,
	embed_dim=256,
	nb_heads=4,
	src_hid_size=1024,
	src_nb_layers=2,
	trg_hid_size=1024,
	trg_nb_layers=2,
	dropout_p=0.1,
	tie_trg_embed=True,
	label_smooth=0.1,
	nb_attr=5,
	src_c2i={},
	trg_c2i={},
	attr_c2i={}
	)

	print("✓ Model creation works")

	# Test parameter count
	param_count = model.count_nb_params()
	print(f"✓ Model has {param_count:,} parameters")

	return True
	except Exception as e:
	print(f"✗ Model test failed: {e}")
	return False

	def run_all_tests():
	"""Run all optimization tests"""
	print("=== Testing Training Optimizations ===\n")

	tests = [
	test_mixed_precision,
	test_optimized_dataset,
	test_optimized_dataloader,
	test_cuda_optimizations,
	test_model_creation
	]

	passed = 0
	total = len(tests)

	for test in tests:
	try:
	if test():
	passed += 1
	except Exception as e:
	print(f"✗ Test {test.__name__} failed with exception: {e}")

	print(f"\n=== Test Results ===")
	print(f"Passed: {passed}/{total}")

	if passed == total:
	print("🎉 All tests passed! Optimizations are working correctly.")
	return True
	else:
	print("❌ Some tests failed. Check the errors above.")
	return False

	if __name__ == '__main__':
	success = run_all_tests()
	sys.exit(0 if success else 1)