| |
|
| | """
|
| | LIMIT-Graph NSN Integration
|
| | Embeds NSN rank-selection logic into LIMIT-Graph benchmarking harness
|
| | """
|
| | import sys
|
| | import os
|
| | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../..')))
|
| |
|
| | from typing import Dict, List, Optional, Any
|
| | from dataclasses import dataclass
|
| | import logging
|
| |
|
| | from quantum_integration.nsn_integration import (
|
| | BackendAwareRankSelector,
|
| | BackendType,
|
| | MultilingualNSNEvaluator
|
| | )
|
| |
|
| | logger = logging.getLogger(__name__)
|
| |
|
| |
|
| | @dataclass
|
| | class BenchmarkConfig:
|
| | """Configuration for LIMIT-Graph benchmark with NSN"""
|
| | backend_type: BackendType
|
| | languages: List[str]
|
| | target_reliability: float = 0.85
|
| | compute_budget: float = 1e8
|
| | enable_rank_adaptation: bool = True
|
| | enable_multilingual_weighting: bool = True
|
| |
|
| |
|
| | class LIMITGraphNSNBenchmark:
|
| | """
|
| | LIMIT-Graph benchmarking harness with NSN integration
|
| | """
|
| |
|
| | def __init__(self, config: BenchmarkConfig):
|
| | """
|
| | Initialize benchmark harness
|
| |
|
| | Args:
|
| | config: Benchmark configuration
|
| | """
|
| | self.config = config
|
| | self.rank_selector = BackendAwareRankSelector()
|
| | self.multilingual_evaluator = MultilingualNSNEvaluator()
|
| |
|
| |
|
| | self.selected_rank = self.rank_selector.select_rank(
|
| | backend_type=config.backend_type,
|
| | target_reliability=config.target_reliability
|
| | )
|
| |
|
| | logger.info(f"Initialized LIMIT-Graph NSN Benchmark")
|
| | logger.info(f"Backend: {config.backend_type.value}")
|
| | logger.info(f"Selected Rank: {self.selected_rank.rank}")
|
| | logger.info(f"Expected Reliability: {self.selected_rank.expected_reliability:.3f}")
|
| |
|
| | def run_benchmark(self, test_cases: List[Dict[str, Any]]) -> Dict:
|
| | """
|
| | Run benchmark with NSN-aware evaluation
|
| |
|
| | Args:
|
| | test_cases: List of test case dictionaries
|
| |
|
| | Returns:
|
| | Benchmark results
|
| | """
|
| | logger.info(f"Running benchmark with {len(test_cases)} test cases...")
|
| |
|
| | results = {
|
| | 'config': {
|
| | 'backend': self.config.backend_type.value,
|
| | 'rank': self.selected_rank.rank,
|
| | 'languages': self.config.languages
|
| | },
|
| | 'test_results': [],
|
| | 'language_performance': {},
|
| | 'overall_metrics': {}
|
| | }
|
| |
|
| |
|
| | for i, test_case in enumerate(test_cases):
|
| | language = test_case.get('language', 'english')
|
| |
|
| |
|
| | eval_result = self.multilingual_evaluator.evaluate_language_edit(
|
| | language=language,
|
| | rank=self.selected_rank.rank,
|
| | edit_text=test_case.get('text', '')
|
| | )
|
| |
|
| | test_result = {
|
| | 'test_id': i,
|
| | 'language': language,
|
| | 'rank': self.selected_rank.rank,
|
| | 'accuracy': eval_result.edit_accuracy,
|
| | 'uncertainty': eval_result.uncertainty,
|
| | 'flops': eval_result.flops,
|
| | 'resource_level': eval_result.resource_level
|
| | }
|
| |
|
| | results['test_results'].append(test_result)
|
| |
|
| |
|
| | if language not in results['language_performance']:
|
| | results['language_performance'][language] = {
|
| | 'count': 0,
|
| | 'total_accuracy': 0.0,
|
| | 'total_uncertainty': 0.0
|
| | }
|
| |
|
| | results['language_performance'][language]['count'] += 1
|
| | results['language_performance'][language]['total_accuracy'] += eval_result.edit_accuracy
|
| | results['language_performance'][language]['total_uncertainty'] += eval_result.uncertainty
|
| |
|
| |
|
| | if results['test_results']:
|
| | results['overall_metrics'] = {
|
| | 'mean_accuracy': sum(r['accuracy'] for r in results['test_results']) / len(results['test_results']),
|
| | 'mean_uncertainty': sum(r['uncertainty'] for r in results['test_results']) / len(results['test_results']),
|
| | 'total_flops': sum(r['flops'] for r in results['test_results']),
|
| | 'num_tests': len(results['test_results'])
|
| | }
|
| |
|
| |
|
| | for lang, perf in results['language_performance'].items():
|
| | perf['avg_accuracy'] = perf['total_accuracy'] / perf['count']
|
| | perf['avg_uncertainty'] = perf['total_uncertainty'] / perf['count']
|
| |
|
| | logger.info(f"Benchmark completed: {len(results['test_results'])} tests")
|
| | logger.info(f"Overall accuracy: {results['overall_metrics']['mean_accuracy']:.3f}")
|
| |
|
| | return results
|
| |
|
| | def visualize_benchmark_results(self, results: Dict, save_path: Optional[str] = None):
|
| | """
|
| | Visualize benchmark results with NSN dashboard
|
| |
|
| | Args:
|
| | results: Benchmark results from run_benchmark
|
| | save_path: Optional path to save visualization
|
| | """
|
| | from quantum_integration.nsn_integration import NSNDashboard
|
| | import matplotlib.pyplot as plt
|
| |
|
| | dashboard = NSNDashboard()
|
| |
|
| |
|
| | fig, axes = plt.subplots(2, 2, figsize=(14, 10))
|
| |
|
| |
|
| | ax1 = axes[0, 0]
|
| | languages = list(results['language_performance'].keys())
|
| | accuracies = [results['language_performance'][lang]['avg_accuracy'] for lang in languages]
|
| | ax1.bar(languages, accuracies, color='skyblue', edgecolor='black')
|
| | ax1.set_ylabel('Average Accuracy', fontweight='bold')
|
| | ax1.set_title('Accuracy by Language', fontweight='bold')
|
| | ax1.set_ylim([0, 1])
|
| | ax1.grid(True, alpha=0.3, axis='y')
|
| | plt.setp(ax1.xaxis.get_majorticklabels(), rotation=45, ha='right')
|
| |
|
| |
|
| | ax2 = axes[0, 1]
|
| | uncertainties = [results['language_performance'][lang]['avg_uncertainty'] for lang in languages]
|
| | ax2.bar(languages, uncertainties, color='salmon', edgecolor='black')
|
| | ax2.set_ylabel('Average Uncertainty', fontweight='bold')
|
| | ax2.set_title('Uncertainty by Language', fontweight='bold')
|
| | ax2.grid(True, alpha=0.3, axis='y')
|
| | plt.setp(ax2.xaxis.get_majorticklabels(), rotation=45, ha='right')
|
| |
|
| |
|
| | ax3 = axes[1, 0]
|
| | for test in results['test_results']:
|
| | ax3.scatter(test['uncertainty'], test['accuracy'],
|
| | alpha=0.6, s=100, edgecolors='black')
|
| | ax3.set_xlabel('Uncertainty', fontweight='bold')
|
| | ax3.set_ylabel('Accuracy', fontweight='bold')
|
| | ax3.set_title('Accuracy-Uncertainty Trade-off', fontweight='bold')
|
| | ax3.grid(True, alpha=0.3)
|
| |
|
| |
|
| | ax4 = axes[1, 1]
|
| | ax4.axis('off')
|
| | summary_text = f"""
|
| | BENCHMARK SUMMARY
|
| |
|
| | Backend: {results['config']['backend']}
|
| | Rank: {results['config']['rank']}
|
| |
|
| | Overall Metrics:
|
| | • Mean Accuracy: {results['overall_metrics']['mean_accuracy']:.3f}
|
| | • Mean Uncertainty: {results['overall_metrics']['mean_uncertainty']:.3f}
|
| | • Total FLOPs: {results['overall_metrics']['total_flops']:.2e}
|
| | • Num Tests: {results['overall_metrics']['num_tests']}
|
| |
|
| | Languages Tested: {len(languages)}
|
| | """
|
| | ax4.text(0.1, 0.5, summary_text, fontsize=11, family='monospace',
|
| | verticalalignment='center')
|
| |
|
| | plt.suptitle('LIMIT-Graph NSN Benchmark Results',
|
| | fontsize=16, fontweight='bold')
|
| | plt.tight_layout()
|
| |
|
| | if save_path:
|
| | plt.savefig(save_path, dpi=300, bbox_inches='tight')
|
| | logger.info(f"Saved benchmark visualization to {save_path}")
|
| |
|
| | plt.show()
|
| | return fig
|
| |
|
| | def export_results(self, results: Dict, filepath: str):
|
| | """Export benchmark results to JSON"""
|
| | import json
|
| |
|
| | with open(filepath, 'w') as f:
|
| | json.dump(results, f, indent=2)
|
| |
|
| | logger.info(f"Exported results to {filepath}")
|
| |
|
| | def compare_backends(self, test_cases: List[Dict[str, Any]]) -> Dict:
|
| | """
|
| | Compare performance across different quantum backends
|
| |
|
| | Args:
|
| | test_cases: List of test cases
|
| |
|
| | Returns:
|
| | Comparison results
|
| | """
|
| | backends = [
|
| | BackendType.IBM_MANILA,
|
| | BackendType.IBM_WASHINGTON,
|
| | BackendType.RUSSIAN_SIMULATOR
|
| | ]
|
| |
|
| | comparison = {
|
| | 'backends': {},
|
| | 'test_cases': test_cases
|
| | }
|
| |
|
| | for backend in backends:
|
| | logger.info(f"\nBenchmarking {backend.value}...")
|
| |
|
| |
|
| | config = BenchmarkConfig(
|
| | backend_type=backend,
|
| | languages=self.config.languages,
|
| | target_reliability=self.config.target_reliability,
|
| | compute_budget=self.config.compute_budget
|
| | )
|
| |
|
| |
|
| | benchmark = LIMITGraphNSNBenchmark(config)
|
| |
|
| |
|
| | results = benchmark.run_benchmark(test_cases)
|
| |
|
| | comparison['backends'][backend.value] = {
|
| | 'selected_rank': benchmark.selected_rank.rank,
|
| | 'expected_reliability': benchmark.selected_rank.expected_reliability,
|
| | 'overall_metrics': results['overall_metrics'],
|
| | 'language_performance': results['language_performance']
|
| | }
|
| |
|
| | logger.info("\nBackend comparison completed")
|
| | return comparison
|
| |
|
| |
|
| | def create_limit_graph_nsn_benchmark(config: BenchmarkConfig) -> LIMITGraphNSNBenchmark:
|
| | """Factory function to create LIMIT-Graph NSN benchmark"""
|
| | return LIMITGraphNSNBenchmark(config)
|
| |
|
| |
|
| | def demo_limit_graph_integration():
|
| | """Demo LIMIT-Graph NSN integration"""
|
| | logger.info("=" * 80)
|
| | logger.info("LIMIT-GRAPH NSN INTEGRATION DEMO")
|
| | logger.info("=" * 80)
|
| |
|
| |
|
| | config = BenchmarkConfig(
|
| | backend_type=BackendType.IBM_WASHINGTON,
|
| | languages=['english', 'chinese', 'indonesian', 'swahili'],
|
| | target_reliability=0.85,
|
| | compute_budget=1e8
|
| | )
|
| |
|
| |
|
| | benchmark = create_limit_graph_nsn_benchmark(config)
|
| |
|
| |
|
| | test_cases = [
|
| | {'language': 'english', 'text': 'The capital of France is Paris'},
|
| | {'language': 'english', 'text': 'Python is a programming language'},
|
| | {'language': 'chinese', 'text': '北京是中国的首都'},
|
| | {'language': 'chinese', 'text': '机器学习是人工智能的一部分'},
|
| | {'language': 'indonesian', 'text': 'Jakarta adalah ibu kota Indonesia'},
|
| | {'language': 'swahili', 'text': 'Nairobi ni mji mkuu wa Kenya'}
|
| | ]
|
| |
|
| |
|
| | results = benchmark.run_benchmark(test_cases)
|
| |
|
| |
|
| | benchmark.visualize_benchmark_results(
|
| | results,
|
| | save_path='limit_graph_nsn_benchmark_results.png'
|
| | )
|
| |
|
| |
|
| | benchmark.export_results(results, 'limit_graph_nsn_results.json')
|
| |
|
| |
|
| | logger.info("\n" + "=" * 80)
|
| | logger.info("BACKEND COMPARISON")
|
| | logger.info("=" * 80)
|
| |
|
| | comparison = benchmark.compare_backends(test_cases[:3])
|
| |
|
| | logger.info("\n--- Backend Comparison Summary ---")
|
| | for backend_name, backend_data in comparison['backends'].items():
|
| | logger.info(f"\n{backend_name}:")
|
| | logger.info(f" Selected Rank: {backend_data['selected_rank']}")
|
| | logger.info(f" Expected Reliability: {backend_data['expected_reliability']:.3f}")
|
| | logger.info(f" Mean Accuracy: {backend_data['overall_metrics']['mean_accuracy']:.3f}")
|
| |
|
| | logger.info("\n" + "=" * 80)
|
| | logger.info("INTEGRATION DEMO COMPLETED")
|
| | logger.info("=" * 80)
|
| |
|
| |
|
| | if __name__ == "__main__":
|
| | logging.basicConfig(level=logging.INFO,
|
| | format='%(asctime)s - %(levelname)s - %(message)s')
|
| | demo_limit_graph_integration()
|
| |
|