|
|
|
|
|
"""
|
|
|
Quantum Backend Comparison Module
|
|
|
Compares Russian vs IBM quantum backends on multilingual edit reliability
|
|
|
"""
|
|
|
import time
|
|
|
import numpy as np
|
|
|
from typing import Dict, List, Optional, Tuple
|
|
|
from dataclasses import dataclass, asdict
|
|
|
import json
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
class BackendMetrics:
|
|
|
"""Metrics for backend performance"""
|
|
|
backend_name: str
|
|
|
edit_success_rate: float
|
|
|
hallucination_rate: float
|
|
|
correction_efficiency: float
|
|
|
avg_latency_ms: float
|
|
|
circuit_fidelity: float
|
|
|
throughput_edits_per_sec: float
|
|
|
language_accuracy: Dict[str, float]
|
|
|
domain_performance: Dict[str, float]
|
|
|
|
|
|
|
|
|
class QuantumBackendComparator:
|
|
|
"""Compare Russian and IBM quantum backends"""
|
|
|
|
|
|
def __init__(
|
|
|
self,
|
|
|
backends: List[str] = None,
|
|
|
languages: List[str] = None,
|
|
|
domains: List[str] = None
|
|
|
):
|
|
|
"""
|
|
|
Initialize backend comparator
|
|
|
|
|
|
Args:
|
|
|
backends: List of backend names ['russian', 'ibm']
|
|
|
languages: List of languages to test
|
|
|
domains: List of domains to test
|
|
|
"""
|
|
|
self.backends = backends or ['russian', 'ibm']
|
|
|
self.languages = languages or ['en', 'ru', 'es', 'fr', 'de', 'zh', 'ar']
|
|
|
self.domains = domains or ['code', 'math', 'text', 'scientific', 'legal', 'medical']
|
|
|
|
|
|
|
|
|
self.backend_configs = {
|
|
|
'russian': {
|
|
|
'base_fidelity': 0.92,
|
|
|
'base_latency': 85,
|
|
|
'cyrillic_boost': 0.05
|
|
|
},
|
|
|
'ibm': {
|
|
|
'base_fidelity': 0.94,
|
|
|
'base_latency': 92,
|
|
|
'cyrillic_boost': 0.0
|
|
|
}
|
|
|
}
|
|
|
|
|
|
self.results = {}
|
|
|
|
|
|
def compare_backends(
|
|
|
self,
|
|
|
edit_stream: List[Dict],
|
|
|
metrics: List[str] = None
|
|
|
) -> Dict[str, BackendMetrics]:
|
|
|
"""
|
|
|
Compare backends on edit stream
|
|
|
|
|
|
Args:
|
|
|
edit_stream: List of edits to process
|
|
|
metrics: Metrics to track
|
|
|
|
|
|
Returns:
|
|
|
Dict mapping backend names to metrics
|
|
|
"""
|
|
|
metrics = metrics or ['success_rate', 'latency', 'fidelity']
|
|
|
|
|
|
results = {}
|
|
|
for backend in self.backends:
|
|
|
print(f"\n🔬 Benchmarking {backend.upper()} backend...")
|
|
|
backend_metrics = self._benchmark_backend(backend, edit_stream)
|
|
|
results[backend] = backend_metrics
|
|
|
|
|
|
print(f"✓ {backend}: Success={backend_metrics.edit_success_rate:.1%}, "
|
|
|
f"Latency={backend_metrics.avg_latency_ms:.1f}ms")
|
|
|
|
|
|
self.results = results
|
|
|
return results
|
|
|
|
|
|
def _benchmark_backend(
|
|
|
self,
|
|
|
backend: str,
|
|
|
edit_stream: List[Dict]
|
|
|
) -> BackendMetrics:
|
|
|
"""Benchmark single backend"""
|
|
|
config = self.backend_configs.get(backend, self.backend_configs['ibm'])
|
|
|
|
|
|
|
|
|
successful_edits = 0
|
|
|
hallucinated_edits = 0
|
|
|
corrected_edits = 0
|
|
|
latencies = []
|
|
|
|
|
|
language_stats = {lang: {'total': 0, 'success': 0} for lang in self.languages}
|
|
|
domain_stats = {domain: {'total': 0, 'success': 0} for domain in self.domains}
|
|
|
|
|
|
for edit in edit_stream:
|
|
|
start_time = time.time()
|
|
|
|
|
|
|
|
|
lang = edit.get('lang', 'en')
|
|
|
domain = edit.get('domain', 'text')
|
|
|
|
|
|
|
|
|
base_success = 0.85
|
|
|
if backend == 'russian' and lang == 'ru':
|
|
|
base_success += config['cyrillic_boost']
|
|
|
|
|
|
|
|
|
success = np.random.rand() < base_success
|
|
|
hallucinated = np.random.rand() < 0.08
|
|
|
corrected = hallucinated and (np.random.rand() < 0.92)
|
|
|
|
|
|
if success:
|
|
|
successful_edits += 1
|
|
|
if hallucinated:
|
|
|
hallucinated_edits += 1
|
|
|
if corrected:
|
|
|
corrected_edits += 1
|
|
|
|
|
|
|
|
|
if lang in language_stats:
|
|
|
language_stats[lang]['total'] += 1
|
|
|
if success:
|
|
|
language_stats[lang]['success'] += 1
|
|
|
|
|
|
|
|
|
if domain in domain_stats:
|
|
|
domain_stats[domain]['total'] += 1
|
|
|
if success:
|
|
|
domain_stats[domain]['success'] += 1
|
|
|
|
|
|
|
|
|
latency = config['base_latency'] + np.random.normal(0, 10)
|
|
|
latencies.append(max(latency, 10))
|
|
|
|
|
|
time.sleep(0.001)
|
|
|
|
|
|
|
|
|
total_edits = len(edit_stream)
|
|
|
edit_success_rate = successful_edits / total_edits if total_edits > 0 else 0
|
|
|
hallucination_rate = hallucinated_edits / total_edits if total_edits > 0 else 0
|
|
|
correction_efficiency = corrected_edits / hallucinated_edits if hallucinated_edits > 0 else 1.0
|
|
|
avg_latency = np.mean(latencies) if latencies else 0
|
|
|
throughput = 1000 / avg_latency if avg_latency > 0 else 0
|
|
|
|
|
|
|
|
|
language_accuracy = {
|
|
|
lang: stats['success'] / stats['total'] if stats['total'] > 0 else 0
|
|
|
for lang, stats in language_stats.items()
|
|
|
}
|
|
|
|
|
|
|
|
|
domain_performance = {
|
|
|
domain: stats['success'] / stats['total'] if stats['total'] > 0 else 0
|
|
|
for domain, stats in domain_stats.items()
|
|
|
}
|
|
|
|
|
|
return BackendMetrics(
|
|
|
backend_name=backend,
|
|
|
edit_success_rate=edit_success_rate,
|
|
|
hallucination_rate=hallucination_rate,
|
|
|
correction_efficiency=correction_efficiency,
|
|
|
avg_latency_ms=avg_latency,
|
|
|
circuit_fidelity=config['base_fidelity'],
|
|
|
throughput_edits_per_sec=throughput,
|
|
|
language_accuracy=language_accuracy,
|
|
|
domain_performance=domain_performance
|
|
|
)
|
|
|
|
|
|
def quick_compare(self, num_edits: int = 100) -> Dict:
|
|
|
"""Quick comparison with synthetic data"""
|
|
|
|
|
|
edit_stream = []
|
|
|
for i in range(num_edits):
|
|
|
edit_stream.append({
|
|
|
'id': f'edit_{i}',
|
|
|
'lang': np.random.choice(self.languages),
|
|
|
'domain': np.random.choice(self.domains),
|
|
|
'code': f'sample_code_{i}'
|
|
|
})
|
|
|
|
|
|
return self.compare_backends(edit_stream)
|
|
|
|
|
|
def generate_report(
|
|
|
self,
|
|
|
results: Dict[str, BackendMetrics] = None,
|
|
|
output: str = 'backend_comparison.html'
|
|
|
):
|
|
|
"""Generate comparison report"""
|
|
|
results = results or self.results
|
|
|
|
|
|
if not results:
|
|
|
print("⚠️ No results to report. Run compare_backends() first.")
|
|
|
return
|
|
|
|
|
|
|
|
|
html = self._generate_html_report(results)
|
|
|
|
|
|
with open(output, 'w', encoding='utf-8') as f:
|
|
|
f.write(html)
|
|
|
|
|
|
print(f"✓ Report generated: {output}")
|
|
|
|
|
|
def _generate_html_report(self, results: Dict[str, BackendMetrics]) -> str:
|
|
|
"""Generate HTML report"""
|
|
|
html = """
|
|
|
<!DOCTYPE html>
|
|
|
<html>
|
|
|
<head>
|
|
|
<title>Backend Comparison Report</title>
|
|
|
<style>
|
|
|
body { font-family: Arial, sans-serif; margin: 20px; }
|
|
|
table { border-collapse: collapse; width: 100%; margin: 20px 0; }
|
|
|
th, td { border: 1px solid #ddd; padding: 12px; text-align: left; }
|
|
|
th { background-color: #4CAF50; color: white; }
|
|
|
tr:nth-child(even) { background-color: #f2f2f2; }
|
|
|
.metric { font-weight: bold; }
|
|
|
.good { color: green; }
|
|
|
.warning { color: orange; }
|
|
|
.bad { color: red; }
|
|
|
</style>
|
|
|
</head>
|
|
|
<body>
|
|
|
<h1>Quantum Backend Comparison Report</h1>
|
|
|
<h2>Performance Metrics</h2>
|
|
|
<table>
|
|
|
<tr>
|
|
|
<th>Metric</th>
|
|
|
"""
|
|
|
|
|
|
for backend in results.keys():
|
|
|
html += f" <th>{backend.upper()}</th>\n"
|
|
|
|
|
|
html += " </tr>\n"
|
|
|
|
|
|
|
|
|
metrics_to_show = [
|
|
|
('Edit Success Rate', 'edit_success_rate', '%'),
|
|
|
('Hallucination Rate', 'hallucination_rate', '%'),
|
|
|
('Correction Efficiency', 'correction_efficiency', '%'),
|
|
|
('Avg Latency', 'avg_latency_ms', 'ms'),
|
|
|
('Circuit Fidelity', 'circuit_fidelity', ''),
|
|
|
('Throughput', 'throughput_edits_per_sec', 'edits/s')
|
|
|
]
|
|
|
|
|
|
for metric_name, metric_key, unit in metrics_to_show:
|
|
|
html += f" <tr>\n <td class='metric'>{metric_name}</td>\n"
|
|
|
for backend_metrics in results.values():
|
|
|
value = getattr(backend_metrics, metric_key)
|
|
|
if unit == '%':
|
|
|
formatted = f"{value*100:.1f}%"
|
|
|
elif unit == 'ms':
|
|
|
formatted = f"{value:.1f}{unit}"
|
|
|
else:
|
|
|
formatted = f"{value:.2f}{unit}"
|
|
|
html += f" <td>{formatted}</td>\n"
|
|
|
html += " </tr>\n"
|
|
|
|
|
|
html += """
|
|
|
</table>
|
|
|
<p><em>Generated by Quantum LIMIT-GRAPH v2.4.0</em></p>
|
|
|
</body>
|
|
|
</html>
|
|
|
"""
|
|
|
return html
|
|
|
|
|
|
def export_results(self, filepath: str = 'backend_results.json'):
|
|
|
"""Export results to JSON"""
|
|
|
if not self.results:
|
|
|
print("⚠️ No results to export.")
|
|
|
return
|
|
|
|
|
|
export_data = {
|
|
|
backend: asdict(metrics)
|
|
|
for backend, metrics in self.results.items()
|
|
|
}
|
|
|
|
|
|
with open(filepath, 'w') as f:
|
|
|
json.dump(export_data, f, indent=2)
|
|
|
|
|
|
print(f"✓ Results exported: {filepath}")
|
|
|
|
|
|
|
|
|
|
|
|
def quick_benchmark(backends: List[str] = None, num_edits: int = 100) -> Dict:
|
|
|
"""Quick benchmark comparison"""
|
|
|
comparator = QuantumBackendComparator(backends=backends)
|
|
|
return comparator.quick_compare(num_edits=num_edits)
|
|
|
|