|
|
|
|
|
"""
|
|
|
Test Suite for NSN Integration
|
|
|
Validates all three stages of NSN integration
|
|
|
"""
|
|
|
import sys
|
|
|
import os
|
|
|
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../..')))
|
|
|
|
|
|
import unittest
|
|
|
from quantum_integration.nsn_integration import (
|
|
|
BackendAwareRankSelector,
|
|
|
BackendType,
|
|
|
MultilingualNSNEvaluator,
|
|
|
NSNLeaderboard,
|
|
|
NSNDashboard
|
|
|
)
|
|
|
|
|
|
|
|
|
class TestBackendAwareRankSelector(unittest.TestCase):
|
|
|
"""Test Stage 1: Backend-Aware Rank Selection"""
|
|
|
|
|
|
def setUp(self):
|
|
|
self.selector = BackendAwareRankSelector()
|
|
|
|
|
|
def test_rank_selection_low_qubit(self):
|
|
|
"""Test rank selection for low-qubit backend"""
|
|
|
rank_config = self.selector.select_rank(
|
|
|
BackendType.IBM_MANILA,
|
|
|
target_reliability=0.85
|
|
|
)
|
|
|
self.assertEqual(rank_config.rank, 8, "Low-qubit backend should select rank 8")
|
|
|
self.assertLess(rank_config.flops, 1e7, "Low rank should have low FLOPs")
|
|
|
|
|
|
def test_rank_selection_high_fidelity(self):
|
|
|
"""Test rank selection for high-fidelity backend"""
|
|
|
rank_config = self.selector.select_rank(
|
|
|
BackendType.IBM_WASHINGTON,
|
|
|
target_reliability=0.90
|
|
|
)
|
|
|
self.assertGreaterEqual(rank_config.rank, 64, "High-fidelity backend should support high rank")
|
|
|
self.assertGreater(rank_config.expected_reliability, 0.85)
|
|
|
|
|
|
def test_flops_vs_reliability_curve(self):
|
|
|
"""Test FLOPs vs reliability curve generation"""
|
|
|
curve = self.selector.compute_flops_vs_reliability(BackendType.IBM_WASHINGTON)
|
|
|
self.assertGreater(len(curve), 0, "Curve should have points")
|
|
|
|
|
|
|
|
|
flops_values = [point[0] for point in curve]
|
|
|
self.assertEqual(flops_values, sorted(flops_values), "FLOPs should be increasing")
|
|
|
|
|
|
def test_rank_recommendation(self):
|
|
|
"""Test rank recommendation with constraints"""
|
|
|
recommendation = self.selector.get_rank_recommendation(
|
|
|
backend_type=BackendType.RUSSIAN_SIMULATOR,
|
|
|
compute_budget=1e8,
|
|
|
min_reliability=0.90
|
|
|
)
|
|
|
|
|
|
self.assertIn('recommended_rank', recommendation)
|
|
|
self.assertIn('expected_reliability', recommendation)
|
|
|
self.assertIn('rationale', recommendation)
|
|
|
self.assertLessEqual(recommendation['flops'], 1e8, "Should respect compute budget")
|
|
|
|
|
|
|
|
|
class TestMultilingualNSNEvaluator(unittest.TestCase):
|
|
|
"""Test Stage 2: Multilingual Edit Reliability"""
|
|
|
|
|
|
def setUp(self):
|
|
|
self.evaluator = MultilingualNSNEvaluator()
|
|
|
|
|
|
def test_language_edit_evaluation(self):
|
|
|
"""Test single language edit evaluation"""
|
|
|
result = self.evaluator.evaluate_language_edit('english', rank=64)
|
|
|
|
|
|
self.assertEqual(result.language, 'english')
|
|
|
self.assertEqual(result.rank, 64)
|
|
|
self.assertGreater(result.edit_accuracy, 0)
|
|
|
self.assertLess(result.edit_accuracy, 1)
|
|
|
self.assertGreater(result.uncertainty, 0)
|
|
|
|
|
|
def test_resource_level_accuracy(self):
|
|
|
"""Test that high-resource languages have higher accuracy"""
|
|
|
high_resource = self.evaluator.evaluate_language_edit('english', rank=64)
|
|
|
low_resource = self.evaluator.evaluate_language_edit('swahili', rank=64)
|
|
|
|
|
|
self.assertGreater(high_resource.edit_accuracy, low_resource.edit_accuracy,
|
|
|
"High-resource language should have higher accuracy")
|
|
|
|
|
|
def test_rank_scaling(self):
|
|
|
"""Test that higher rank improves accuracy"""
|
|
|
low_rank = self.evaluator.evaluate_language_edit('indonesian', rank=8)
|
|
|
high_rank = self.evaluator.evaluate_language_edit('indonesian', rank=128)
|
|
|
|
|
|
self.assertGreater(high_rank.edit_accuracy, low_rank.edit_accuracy,
|
|
|
"Higher rank should improve accuracy")
|
|
|
self.assertLess(high_rank.uncertainty, low_rank.uncertainty,
|
|
|
"Higher rank should reduce uncertainty")
|
|
|
|
|
|
def test_subspace_containment(self):
|
|
|
"""Test subspace containment analysis"""
|
|
|
containment = self.evaluator.evaluate_subspace_containment(
|
|
|
source_lang='indonesian',
|
|
|
target_lang='english',
|
|
|
rank=64
|
|
|
)
|
|
|
|
|
|
self.assertEqual(containment.source_lang, 'indonesian')
|
|
|
self.assertEqual(containment.target_lang, 'english')
|
|
|
self.assertGreater(containment.containment_score, 0)
|
|
|
self.assertLess(containment.containment_score, 1)
|
|
|
|
|
|
def test_uncertainty_weights(self):
|
|
|
"""Test uncertainty weight computation"""
|
|
|
languages = ['english', 'indonesian', 'swahili']
|
|
|
weights = self.evaluator.compute_uncertainty_weights(languages)
|
|
|
|
|
|
self.assertEqual(len(weights), 3)
|
|
|
self.assertAlmostEqual(sum(weights.values()), 1.0, places=5,
|
|
|
msg="Weights should sum to 1")
|
|
|
|
|
|
|
|
|
self.assertGreater(weights['swahili'], weights['english'])
|
|
|
|
|
|
def test_rank_language_matrix(self):
|
|
|
"""Test comprehensive rank-language analysis"""
|
|
|
languages = ['english', 'chinese', 'indonesian']
|
|
|
analysis = self.evaluator.analyze_rank_language_matrix(languages)
|
|
|
|
|
|
self.assertIn('accuracy_matrix', analysis)
|
|
|
self.assertIn('containment_analysis', analysis)
|
|
|
self.assertIn('uncertainty_weights', analysis)
|
|
|
|
|
|
|
|
|
for lang in languages:
|
|
|
self.assertIn(lang, analysis['accuracy_matrix'])
|
|
|
|
|
|
|
|
|
class TestNSNLeaderboard(unittest.TestCase):
|
|
|
"""Test Stage 3: Contributor Challenges"""
|
|
|
|
|
|
def setUp(self):
|
|
|
self.leaderboard = NSNLeaderboard()
|
|
|
|
|
|
def test_challenge_creation(self):
|
|
|
"""Test challenge creation"""
|
|
|
challenge = self.leaderboard.create_challenge(
|
|
|
challenge_id="test_challenge",
|
|
|
title="Test Challenge",
|
|
|
description="Test description",
|
|
|
languages=['english', 'chinese'],
|
|
|
ranks=[8, 32, 64]
|
|
|
)
|
|
|
|
|
|
self.assertEqual(challenge.challenge_id, "test_challenge")
|
|
|
self.assertEqual(len(challenge.languages), 2)
|
|
|
self.assertEqual(len(challenge.ranks_to_evaluate), 3)
|
|
|
|
|
|
def test_submission(self):
|
|
|
"""Test edit submission"""
|
|
|
|
|
|
self.leaderboard.create_challenge(
|
|
|
challenge_id="test_challenge",
|
|
|
title="Test",
|
|
|
description="Test",
|
|
|
languages=['english'],
|
|
|
ranks=[8, 32]
|
|
|
)
|
|
|
|
|
|
|
|
|
rank_results = {
|
|
|
8: {'accuracy': 0.75, 'uncertainty': 0.20, 'flops': 6.4e5, 'efficiency': 0.012},
|
|
|
32: {'accuracy': 0.88, 'uncertainty': 0.12, 'flops': 1.02e7, 'efficiency': 0.009}
|
|
|
}
|
|
|
|
|
|
submission = self.leaderboard.submit_edit(
|
|
|
challenge_id="test_challenge",
|
|
|
contributor_id="test_contributor",
|
|
|
language="english",
|
|
|
edit_description="Test edit",
|
|
|
rank_results=rank_results
|
|
|
)
|
|
|
|
|
|
self.assertEqual(submission.contributor_id, "test_contributor")
|
|
|
self.assertEqual(len(submission.ranks_evaluated), 2)
|
|
|
|
|
|
def test_leaderboard_ranking(self):
|
|
|
"""Test leaderboard ranking computation"""
|
|
|
|
|
|
self.leaderboard.create_challenge(
|
|
|
challenge_id="test_challenge",
|
|
|
title="Test",
|
|
|
description="Test",
|
|
|
languages=['english'],
|
|
|
ranks=[32]
|
|
|
)
|
|
|
|
|
|
|
|
|
for i in range(3):
|
|
|
rank_results = {
|
|
|
32: {
|
|
|
'accuracy': 0.80 + i * 0.05,
|
|
|
'uncertainty': 0.15 - i * 0.02,
|
|
|
'flops': 1e7,
|
|
|
'efficiency': 0.008 + i * 0.001
|
|
|
}
|
|
|
}
|
|
|
|
|
|
self.leaderboard.submit_edit(
|
|
|
challenge_id="test_challenge",
|
|
|
contributor_id=f"contributor_{i}",
|
|
|
language="english",
|
|
|
edit_description=f"Edit {i}",
|
|
|
rank_results=rank_results
|
|
|
)
|
|
|
|
|
|
|
|
|
rankings = self.leaderboard.get_leaderboard("test_challenge")
|
|
|
|
|
|
self.assertEqual(len(rankings), 3)
|
|
|
self.assertEqual(rankings[0]['position'], 1)
|
|
|
|
|
|
|
|
|
scores = [r['score'] for r in rankings]
|
|
|
self.assertEqual(scores, sorted(scores, reverse=True))
|
|
|
|
|
|
def test_pareto_frontier(self):
|
|
|
"""Test Pareto frontier computation"""
|
|
|
|
|
|
self.leaderboard.create_challenge(
|
|
|
challenge_id="test_challenge",
|
|
|
title="Test",
|
|
|
description="Test",
|
|
|
languages=['english'],
|
|
|
ranks=[8, 32, 64]
|
|
|
)
|
|
|
|
|
|
rank_results = {
|
|
|
8: {'accuracy': 0.75, 'uncertainty': 0.20, 'flops': 6.4e5, 'efficiency': 0.012},
|
|
|
32: {'accuracy': 0.88, 'uncertainty': 0.12, 'flops': 1.02e7, 'efficiency': 0.009},
|
|
|
64: {'accuracy': 0.92, 'uncertainty': 0.08, 'flops': 4.1e7, 'efficiency': 0.007}
|
|
|
}
|
|
|
|
|
|
self.leaderboard.submit_edit(
|
|
|
challenge_id="test_challenge",
|
|
|
contributor_id="test_contributor",
|
|
|
language="english",
|
|
|
edit_description="Test",
|
|
|
rank_results=rank_results
|
|
|
)
|
|
|
|
|
|
|
|
|
frontier_data = self.leaderboard.compute_pareto_frontier("test_challenge")
|
|
|
|
|
|
self.assertIn('frontier', frontier_data)
|
|
|
self.assertIn('all_points', frontier_data)
|
|
|
self.assertGreater(len(frontier_data['frontier']), 0)
|
|
|
|
|
|
def test_feedback_generation(self):
|
|
|
"""Test feedback generation"""
|
|
|
|
|
|
self.leaderboard.create_challenge(
|
|
|
challenge_id="test_challenge",
|
|
|
title="Test",
|
|
|
description="Test",
|
|
|
languages=['english'],
|
|
|
ranks=[32]
|
|
|
)
|
|
|
|
|
|
rank_results = {
|
|
|
32: {'accuracy': 0.88, 'uncertainty': 0.12, 'flops': 1.02e7, 'efficiency': 0.009}
|
|
|
}
|
|
|
|
|
|
submission = self.leaderboard.submit_edit(
|
|
|
challenge_id="test_challenge",
|
|
|
contributor_id="test_contributor",
|
|
|
language="english",
|
|
|
edit_description="Test",
|
|
|
rank_results=rank_results
|
|
|
)
|
|
|
|
|
|
|
|
|
feedback = self.leaderboard.generate_feedback(submission.submission_id)
|
|
|
|
|
|
self.assertIn('rank_specific_feedback', feedback)
|
|
|
self.assertIn('recommendations', feedback)
|
|
|
self.assertIn(32, feedback['rank_specific_feedback'])
|
|
|
|
|
|
|
|
|
class TestNSNDashboard(unittest.TestCase):
|
|
|
"""Test Dashboard Visualizations"""
|
|
|
|
|
|
def setUp(self):
|
|
|
self.dashboard = NSNDashboard()
|
|
|
|
|
|
def test_dashboard_creation(self):
|
|
|
"""Test dashboard initialization"""
|
|
|
self.assertIsNotNone(self.dashboard)
|
|
|
self.assertEqual(self.dashboard.figsize, (15, 10))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def run_tests():
|
|
|
"""Run all tests"""
|
|
|
loader = unittest.TestLoader()
|
|
|
suite = unittest.TestSuite()
|
|
|
|
|
|
|
|
|
suite.addTests(loader.loadTestsFromTestCase(TestBackendAwareRankSelector))
|
|
|
suite.addTests(loader.loadTestsFromTestCase(TestMultilingualNSNEvaluator))
|
|
|
suite.addTests(loader.loadTestsFromTestCase(TestNSNLeaderboard))
|
|
|
suite.addTests(loader.loadTestsFromTestCase(TestNSNDashboard))
|
|
|
|
|
|
|
|
|
runner = unittest.TextTestRunner(verbosity=2)
|
|
|
result = runner.run(suite)
|
|
|
|
|
|
return result.wasSuccessful()
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
import logging
|
|
|
logging.basicConfig(level=logging.WARNING)
|
|
|
|
|
|
success = run_tests()
|
|
|
sys.exit(0 if success else 1)
|
|
|
|