quantum-nsn-integration / test_nsn_integration.py
Nurcholish's picture
Upload 17 files
517f71b verified
# -*- coding: utf-8 -*-
"""
Test Suite for NSN Integration
Validates all three stages of NSN integration
"""
import sys
import os
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../..')))
import unittest
from quantum_integration.nsn_integration import (
BackendAwareRankSelector,
BackendType,
MultilingualNSNEvaluator,
NSNLeaderboard,
NSNDashboard
)
class TestBackendAwareRankSelector(unittest.TestCase):
"""Test Stage 1: Backend-Aware Rank Selection"""
def setUp(self):
self.selector = BackendAwareRankSelector()
def test_rank_selection_low_qubit(self):
"""Test rank selection for low-qubit backend"""
rank_config = self.selector.select_rank(
BackendType.IBM_MANILA,
target_reliability=0.85
)
self.assertEqual(rank_config.rank, 8, "Low-qubit backend should select rank 8")
self.assertLess(rank_config.flops, 1e7, "Low rank should have low FLOPs")
def test_rank_selection_high_fidelity(self):
"""Test rank selection for high-fidelity backend"""
rank_config = self.selector.select_rank(
BackendType.IBM_WASHINGTON,
target_reliability=0.90
)
self.assertGreaterEqual(rank_config.rank, 64, "High-fidelity backend should support high rank")
self.assertGreater(rank_config.expected_reliability, 0.85)
def test_flops_vs_reliability_curve(self):
"""Test FLOPs vs reliability curve generation"""
curve = self.selector.compute_flops_vs_reliability(BackendType.IBM_WASHINGTON)
self.assertGreater(len(curve), 0, "Curve should have points")
# Verify curve is monotonically increasing in FLOPs
flops_values = [point[0] for point in curve]
self.assertEqual(flops_values, sorted(flops_values), "FLOPs should be increasing")
def test_rank_recommendation(self):
"""Test rank recommendation with constraints"""
recommendation = self.selector.get_rank_recommendation(
backend_type=BackendType.RUSSIAN_SIMULATOR,
compute_budget=1e8,
min_reliability=0.90
)
self.assertIn('recommended_rank', recommendation)
self.assertIn('expected_reliability', recommendation)
self.assertIn('rationale', recommendation)
self.assertLessEqual(recommendation['flops'], 1e8, "Should respect compute budget")
class TestMultilingualNSNEvaluator(unittest.TestCase):
"""Test Stage 2: Multilingual Edit Reliability"""
def setUp(self):
self.evaluator = MultilingualNSNEvaluator()
def test_language_edit_evaluation(self):
"""Test single language edit evaluation"""
result = self.evaluator.evaluate_language_edit('english', rank=64)
self.assertEqual(result.language, 'english')
self.assertEqual(result.rank, 64)
self.assertGreater(result.edit_accuracy, 0)
self.assertLess(result.edit_accuracy, 1)
self.assertGreater(result.uncertainty, 0)
def test_resource_level_accuracy(self):
"""Test that high-resource languages have higher accuracy"""
high_resource = self.evaluator.evaluate_language_edit('english', rank=64)
low_resource = self.evaluator.evaluate_language_edit('swahili', rank=64)
self.assertGreater(high_resource.edit_accuracy, low_resource.edit_accuracy,
"High-resource language should have higher accuracy")
def test_rank_scaling(self):
"""Test that higher rank improves accuracy"""
low_rank = self.evaluator.evaluate_language_edit('indonesian', rank=8)
high_rank = self.evaluator.evaluate_language_edit('indonesian', rank=128)
self.assertGreater(high_rank.edit_accuracy, low_rank.edit_accuracy,
"Higher rank should improve accuracy")
self.assertLess(high_rank.uncertainty, low_rank.uncertainty,
"Higher rank should reduce uncertainty")
def test_subspace_containment(self):
"""Test subspace containment analysis"""
containment = self.evaluator.evaluate_subspace_containment(
source_lang='indonesian',
target_lang='english',
rank=64
)
self.assertEqual(containment.source_lang, 'indonesian')
self.assertEqual(containment.target_lang, 'english')
self.assertGreater(containment.containment_score, 0)
self.assertLess(containment.containment_score, 1)
def test_uncertainty_weights(self):
"""Test uncertainty weight computation"""
languages = ['english', 'indonesian', 'swahili']
weights = self.evaluator.compute_uncertainty_weights(languages)
self.assertEqual(len(weights), 3)
self.assertAlmostEqual(sum(weights.values()), 1.0, places=5,
msg="Weights should sum to 1")
# Low-resource languages should have higher weights
self.assertGreater(weights['swahili'], weights['english'])
def test_rank_language_matrix(self):
"""Test comprehensive rank-language analysis"""
languages = ['english', 'chinese', 'indonesian']
analysis = self.evaluator.analyze_rank_language_matrix(languages)
self.assertIn('accuracy_matrix', analysis)
self.assertIn('containment_analysis', analysis)
self.assertIn('uncertainty_weights', analysis)
# Verify all languages are in matrix
for lang in languages:
self.assertIn(lang, analysis['accuracy_matrix'])
class TestNSNLeaderboard(unittest.TestCase):
"""Test Stage 3: Contributor Challenges"""
def setUp(self):
self.leaderboard = NSNLeaderboard()
def test_challenge_creation(self):
"""Test challenge creation"""
challenge = self.leaderboard.create_challenge(
challenge_id="test_challenge",
title="Test Challenge",
description="Test description",
languages=['english', 'chinese'],
ranks=[8, 32, 64]
)
self.assertEqual(challenge.challenge_id, "test_challenge")
self.assertEqual(len(challenge.languages), 2)
self.assertEqual(len(challenge.ranks_to_evaluate), 3)
def test_submission(self):
"""Test edit submission"""
# Create challenge
self.leaderboard.create_challenge(
challenge_id="test_challenge",
title="Test",
description="Test",
languages=['english'],
ranks=[8, 32]
)
# Submit edit
rank_results = {
8: {'accuracy': 0.75, 'uncertainty': 0.20, 'flops': 6.4e5, 'efficiency': 0.012},
32: {'accuracy': 0.88, 'uncertainty': 0.12, 'flops': 1.02e7, 'efficiency': 0.009}
}
submission = self.leaderboard.submit_edit(
challenge_id="test_challenge",
contributor_id="test_contributor",
language="english",
edit_description="Test edit",
rank_results=rank_results
)
self.assertEqual(submission.contributor_id, "test_contributor")
self.assertEqual(len(submission.ranks_evaluated), 2)
def test_leaderboard_ranking(self):
"""Test leaderboard ranking computation"""
# Create challenge
self.leaderboard.create_challenge(
challenge_id="test_challenge",
title="Test",
description="Test",
languages=['english'],
ranks=[32]
)
# Submit multiple edits
for i in range(3):
rank_results = {
32: {
'accuracy': 0.80 + i * 0.05,
'uncertainty': 0.15 - i * 0.02,
'flops': 1e7,
'efficiency': 0.008 + i * 0.001
}
}
self.leaderboard.submit_edit(
challenge_id="test_challenge",
contributor_id=f"contributor_{i}",
language="english",
edit_description=f"Edit {i}",
rank_results=rank_results
)
# Get leaderboard
rankings = self.leaderboard.get_leaderboard("test_challenge")
self.assertEqual(len(rankings), 3)
self.assertEqual(rankings[0]['position'], 1)
# Verify descending order
scores = [r['score'] for r in rankings]
self.assertEqual(scores, sorted(scores, reverse=True))
def test_pareto_frontier(self):
"""Test Pareto frontier computation"""
# Create challenge and submit edits
self.leaderboard.create_challenge(
challenge_id="test_challenge",
title="Test",
description="Test",
languages=['english'],
ranks=[8, 32, 64]
)
rank_results = {
8: {'accuracy': 0.75, 'uncertainty': 0.20, 'flops': 6.4e5, 'efficiency': 0.012},
32: {'accuracy': 0.88, 'uncertainty': 0.12, 'flops': 1.02e7, 'efficiency': 0.009},
64: {'accuracy': 0.92, 'uncertainty': 0.08, 'flops': 4.1e7, 'efficiency': 0.007}
}
self.leaderboard.submit_edit(
challenge_id="test_challenge",
contributor_id="test_contributor",
language="english",
edit_description="Test",
rank_results=rank_results
)
# Compute frontier
frontier_data = self.leaderboard.compute_pareto_frontier("test_challenge")
self.assertIn('frontier', frontier_data)
self.assertIn('all_points', frontier_data)
self.assertGreater(len(frontier_data['frontier']), 0)
def test_feedback_generation(self):
"""Test feedback generation"""
# Create challenge and submit
self.leaderboard.create_challenge(
challenge_id="test_challenge",
title="Test",
description="Test",
languages=['english'],
ranks=[32]
)
rank_results = {
32: {'accuracy': 0.88, 'uncertainty': 0.12, 'flops': 1.02e7, 'efficiency': 0.009}
}
submission = self.leaderboard.submit_edit(
challenge_id="test_challenge",
contributor_id="test_contributor",
language="english",
edit_description="Test",
rank_results=rank_results
)
# Generate feedback
feedback = self.leaderboard.generate_feedback(submission.submission_id)
self.assertIn('rank_specific_feedback', feedback)
self.assertIn('recommendations', feedback)
self.assertIn(32, feedback['rank_specific_feedback'])
class TestNSNDashboard(unittest.TestCase):
"""Test Dashboard Visualizations"""
def setUp(self):
self.dashboard = NSNDashboard()
def test_dashboard_creation(self):
"""Test dashboard initialization"""
self.assertIsNotNone(self.dashboard)
self.assertEqual(self.dashboard.figsize, (15, 10))
# Note: Visualization tests would require matplotlib backend setup
# and are typically run separately or mocked
def run_tests():
"""Run all tests"""
loader = unittest.TestLoader()
suite = unittest.TestSuite()
# Add all test classes
suite.addTests(loader.loadTestsFromTestCase(TestBackendAwareRankSelector))
suite.addTests(loader.loadTestsFromTestCase(TestMultilingualNSNEvaluator))
suite.addTests(loader.loadTestsFromTestCase(TestNSNLeaderboard))
suite.addTests(loader.loadTestsFromTestCase(TestNSNDashboard))
# Run tests
runner = unittest.TextTestRunner(verbosity=2)
result = runner.run(suite)
return result.wasSuccessful()
if __name__ == "__main__":
import logging
logging.basicConfig(level=logging.WARNING) # Reduce noise during tests
success = run_tests()
sys.exit(0 if success else 1)