|
|
|
|
|
"""
|
|
|
Ensemble Inference Across Backends
|
|
|
Run edits across multiple backends and compute agreement scores
|
|
|
|
|
|
"""
|
|
|
import numpy as np
|
|
|
from typing import Dict, List, Optional, Tuple
|
|
|
from dataclasses import dataclass
|
|
|
import logging
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
class BackendResult:
|
|
|
"""Result from a single backend"""
|
|
|
backend_id: str
|
|
|
edit_vector: np.ndarray
|
|
|
output: np.ndarray
|
|
|
confidence: float
|
|
|
latency: float
|
|
|
success: bool
|
|
|
error_message: Optional[str] = None
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
class EnsembleResult:
|
|
|
"""Result from ensemble inference"""
|
|
|
edit_vector: np.ndarray
|
|
|
backend_results: List[BackendResult]
|
|
|
consensus_output: np.ndarray
|
|
|
agreement_score: float
|
|
|
reliability_boost: float
|
|
|
agreement_matrix: np.ndarray
|
|
|
best_backend: str
|
|
|
ensemble_confidence: float
|
|
|
|
|
|
|
|
|
class EnsembleInferenceManager:
|
|
|
"""
|
|
|
Run edits across multiple quantum backends and compute agreement scores.
|
|
|
|
|
|
Dashboard Extension:
|
|
|
- Agreement matrix across backends
|
|
|
- Reliability boost from ensemble consensus
|
|
|
"""
|
|
|
|
|
|
def __init__(self):
|
|
|
self.backend_configs = self._initialize_backend_configs()
|
|
|
self.inference_history: List[EnsembleResult] = []
|
|
|
|
|
|
def _initialize_backend_configs(self) -> Dict[str, Dict]:
|
|
|
"""Initialize backend configurations"""
|
|
|
return {
|
|
|
'ibm_manila': {
|
|
|
'qubits': 5,
|
|
|
'error_rate': 0.08,
|
|
|
'gate_fidelity': 0.92,
|
|
|
'coherence_time': 30.0,
|
|
|
'base_latency': 0.05
|
|
|
},
|
|
|
'ibm_washington': {
|
|
|
'qubits': 127,
|
|
|
'error_rate': 0.02,
|
|
|
'gate_fidelity': 0.98,
|
|
|
'coherence_time': 120.0,
|
|
|
'base_latency': 0.15
|
|
|
},
|
|
|
'russian_simulator': {
|
|
|
'qubits': 256,
|
|
|
'error_rate': 0.001,
|
|
|
'gate_fidelity': 0.999,
|
|
|
'coherence_time': 1000.0,
|
|
|
'base_latency': 0.30
|
|
|
},
|
|
|
'ibm_kyoto': {
|
|
|
'qubits': 127,
|
|
|
'error_rate': 0.025,
|
|
|
'gate_fidelity': 0.975,
|
|
|
'coherence_time': 100.0,
|
|
|
'base_latency': 0.12
|
|
|
},
|
|
|
'google_sycamore': {
|
|
|
'qubits': 53,
|
|
|
'error_rate': 0.015,
|
|
|
'gate_fidelity': 0.985,
|
|
|
'coherence_time': 80.0,
|
|
|
'base_latency': 0.08
|
|
|
}
|
|
|
}
|
|
|
|
|
|
def run_ensemble_inference(
|
|
|
self,
|
|
|
edit_vector: np.ndarray,
|
|
|
backend_list: List[str]
|
|
|
) -> EnsembleResult:
|
|
|
"""
|
|
|
Run inference across multiple backends and compute ensemble result.
|
|
|
|
|
|
Args:
|
|
|
edit_vector: Edit vector to apply
|
|
|
backend_list: List of backend IDs (e.g., ['ibm_manila', 'ibm_washington'])
|
|
|
|
|
|
Returns:
|
|
|
EnsembleResult with consensus and agreement metrics
|
|
|
"""
|
|
|
|
|
|
backend_results = []
|
|
|
|
|
|
for backend_id in backend_list:
|
|
|
result = self._run_single_backend(backend_id, edit_vector)
|
|
|
backend_results.append(result)
|
|
|
|
|
|
|
|
|
agreement_matrix = self._compute_agreement_matrix(backend_results)
|
|
|
|
|
|
|
|
|
consensus_output = self._compute_consensus(backend_results)
|
|
|
|
|
|
|
|
|
agreement_score = self._compute_overall_agreement(agreement_matrix)
|
|
|
|
|
|
|
|
|
reliability_boost = self._compute_reliability_boost(
|
|
|
backend_results, agreement_score
|
|
|
)
|
|
|
|
|
|
|
|
|
best_backend = self._select_best_backend(backend_results)
|
|
|
|
|
|
|
|
|
ensemble_confidence = self._compute_ensemble_confidence(
|
|
|
backend_results, agreement_score
|
|
|
)
|
|
|
|
|
|
result = EnsembleResult(
|
|
|
edit_vector=edit_vector,
|
|
|
backend_results=backend_results,
|
|
|
consensus_output=consensus_output,
|
|
|
agreement_score=agreement_score,
|
|
|
reliability_boost=reliability_boost,
|
|
|
agreement_matrix=agreement_matrix,
|
|
|
best_backend=best_backend,
|
|
|
ensemble_confidence=ensemble_confidence
|
|
|
)
|
|
|
|
|
|
self.inference_history.append(result)
|
|
|
|
|
|
logger.info(
|
|
|
f"Ensemble inference complete: {len(backend_list)} backends, "
|
|
|
f"agreement: {agreement_score:.3f}, boost: {reliability_boost:.3f}"
|
|
|
)
|
|
|
|
|
|
return result
|
|
|
|
|
|
def _run_single_backend(
|
|
|
self, backend_id: str, edit_vector: np.ndarray
|
|
|
) -> BackendResult:
|
|
|
"""Run inference on a single backend"""
|
|
|
config = self.backend_configs.get(backend_id)
|
|
|
|
|
|
if config is None:
|
|
|
logger.warning(f"Unknown backend: {backend_id}")
|
|
|
return BackendResult(
|
|
|
backend_id=backend_id,
|
|
|
edit_vector=edit_vector,
|
|
|
output=np.zeros_like(edit_vector),
|
|
|
confidence=0.0,
|
|
|
latency=0.0,
|
|
|
success=False,
|
|
|
error_message=f"Unknown backend: {backend_id}"
|
|
|
)
|
|
|
|
|
|
|
|
|
noise_level = config['error_rate']
|
|
|
noise = np.random.randn(*edit_vector.shape) * noise_level
|
|
|
|
|
|
output = edit_vector + noise
|
|
|
|
|
|
|
|
|
confidence = config['gate_fidelity']
|
|
|
|
|
|
|
|
|
latency = config['base_latency'] * (1 + len(edit_vector) / 1000.0)
|
|
|
|
|
|
return BackendResult(
|
|
|
backend_id=backend_id,
|
|
|
edit_vector=edit_vector,
|
|
|
output=output,
|
|
|
confidence=confidence,
|
|
|
latency=latency,
|
|
|
success=True
|
|
|
)
|
|
|
|
|
|
def _compute_agreement_matrix(
|
|
|
self, results: List[BackendResult]
|
|
|
) -> np.ndarray:
|
|
|
"""Compute pairwise agreement matrix between backends"""
|
|
|
n = len(results)
|
|
|
agreement_matrix = np.zeros((n, n))
|
|
|
|
|
|
for i in range(n):
|
|
|
for j in range(n):
|
|
|
if i == j:
|
|
|
agreement_matrix[i, j] = 1.0
|
|
|
else:
|
|
|
|
|
|
output_i = results[i].output
|
|
|
output_j = results[j].output
|
|
|
|
|
|
if np.linalg.norm(output_i) < 1e-6 or np.linalg.norm(output_j) < 1e-6:
|
|
|
agreement_matrix[i, j] = 0.0
|
|
|
else:
|
|
|
similarity = np.dot(output_i, output_j) / (
|
|
|
np.linalg.norm(output_i) * np.linalg.norm(output_j)
|
|
|
)
|
|
|
|
|
|
agreement_matrix[i, j] = (similarity + 1.0) / 2.0
|
|
|
|
|
|
return agreement_matrix
|
|
|
|
|
|
def _compute_consensus(
|
|
|
self, results: List[BackendResult]
|
|
|
) -> np.ndarray:
|
|
|
"""Compute consensus output from all backends"""
|
|
|
successful_results = [r for r in results if r.success]
|
|
|
|
|
|
if not successful_results:
|
|
|
return np.zeros_like(results[0].edit_vector)
|
|
|
|
|
|
|
|
|
total_confidence = sum(r.confidence for r in successful_results)
|
|
|
|
|
|
if total_confidence < 1e-6:
|
|
|
|
|
|
outputs = [r.output for r in successful_results]
|
|
|
return np.mean(outputs, axis=0)
|
|
|
|
|
|
|
|
|
consensus = np.zeros_like(successful_results[0].output)
|
|
|
|
|
|
for result in successful_results:
|
|
|
weight = result.confidence / total_confidence
|
|
|
consensus += weight * result.output
|
|
|
|
|
|
return consensus
|
|
|
|
|
|
def _compute_overall_agreement(self, agreement_matrix: np.ndarray) -> float:
|
|
|
"""Compute overall agreement score from matrix"""
|
|
|
|
|
|
n = agreement_matrix.shape[0]
|
|
|
|
|
|
if n <= 1:
|
|
|
return 1.0
|
|
|
|
|
|
|
|
|
total = 0.0
|
|
|
count = 0
|
|
|
|
|
|
for i in range(n):
|
|
|
for j in range(n):
|
|
|
if i != j:
|
|
|
total += agreement_matrix[i, j]
|
|
|
count += 1
|
|
|
|
|
|
return total / count if count > 0 else 0.0
|
|
|
|
|
|
def _compute_reliability_boost(
|
|
|
self, results: List[BackendResult], agreement_score: float
|
|
|
) -> float:
|
|
|
"""
|
|
|
Compute reliability boost from ensemble consensus.
|
|
|
|
|
|
Boost is higher when:
|
|
|
- More backends agree
|
|
|
- Individual backends have high confidence
|
|
|
- Agreement score is high
|
|
|
"""
|
|
|
if not results:
|
|
|
return 0.0
|
|
|
|
|
|
|
|
|
avg_confidence = np.mean([r.confidence for r in results if r.success])
|
|
|
|
|
|
|
|
|
ensemble_factor = min(len(results) / 5.0, 1.0)
|
|
|
|
|
|
|
|
|
boost = (
|
|
|
0.4 * agreement_score +
|
|
|
0.3 * avg_confidence +
|
|
|
0.3 * ensemble_factor
|
|
|
)
|
|
|
|
|
|
return float(np.clip(boost, 0.0, 1.0))
|
|
|
|
|
|
def _select_best_backend(self, results: List[BackendResult]) -> str:
|
|
|
"""Select best backend based on confidence and success"""
|
|
|
successful_results = [r for r in results if r.success]
|
|
|
|
|
|
if not successful_results:
|
|
|
return results[0].backend_id if results else "none"
|
|
|
|
|
|
|
|
|
scores = {}
|
|
|
|
|
|
for result in successful_results:
|
|
|
scores[result.backend_id] = (
|
|
|
0.7 * result.confidence +
|
|
|
0.3 * (1.0 / (1.0 + result.latency))
|
|
|
)
|
|
|
|
|
|
return max(scores, key=scores.get)
|
|
|
|
|
|
def _compute_ensemble_confidence(
|
|
|
self, results: List[BackendResult], agreement_score: float
|
|
|
) -> float:
|
|
|
"""Compute overall ensemble confidence"""
|
|
|
if not results:
|
|
|
return 0.0
|
|
|
|
|
|
|
|
|
avg_confidence = np.mean([r.confidence for r in results if r.success])
|
|
|
|
|
|
|
|
|
ensemble_confidence = 0.6 * avg_confidence + 0.4 * agreement_score
|
|
|
|
|
|
return float(np.clip(ensemble_confidence, 0.0, 1.0))
|
|
|
|
|
|
def compare_backends(
|
|
|
self, edit_vectors: List[np.ndarray]
|
|
|
) -> Dict[str, Dict[str, float]]:
|
|
|
"""
|
|
|
Compare all backends across multiple edit vectors.
|
|
|
|
|
|
Returns:
|
|
|
Dict mapping backend_id to performance metrics
|
|
|
"""
|
|
|
backend_stats = {
|
|
|
backend_id: {
|
|
|
'avg_confidence': [],
|
|
|
'avg_latency': [],
|
|
|
'success_rate': []
|
|
|
}
|
|
|
for backend_id in self.backend_configs.keys()
|
|
|
}
|
|
|
|
|
|
for edit_vector in edit_vectors:
|
|
|
for backend_id in self.backend_configs.keys():
|
|
|
result = self._run_single_backend(backend_id, edit_vector)
|
|
|
|
|
|
backend_stats[backend_id]['avg_confidence'].append(result.confidence)
|
|
|
backend_stats[backend_id]['avg_latency'].append(result.latency)
|
|
|
backend_stats[backend_id]['success_rate'].append(1.0 if result.success else 0.0)
|
|
|
|
|
|
|
|
|
comparison = {}
|
|
|
|
|
|
for backend_id, stats in backend_stats.items():
|
|
|
comparison[backend_id] = {
|
|
|
'avg_confidence': float(np.mean(stats['avg_confidence'])),
|
|
|
'avg_latency': float(np.mean(stats['avg_latency'])),
|
|
|
'success_rate': float(np.mean(stats['success_rate']))
|
|
|
}
|
|
|
|
|
|
return comparison
|
|
|
|
|
|
def get_agreement_heatmap(
|
|
|
self, backend_list: List[str], edit_vector: np.ndarray
|
|
|
) -> Tuple[np.ndarray, List[str]]:
|
|
|
"""
|
|
|
Get agreement heatmap for visualization.
|
|
|
|
|
|
Returns:
|
|
|
Tuple of (agreement_matrix, backend_labels)
|
|
|
"""
|
|
|
result = self.run_ensemble_inference(edit_vector, backend_list)
|
|
|
return result.agreement_matrix, backend_list
|
|
|
|
|
|
def compute_reliability_metrics(self) -> Dict[str, float]:
|
|
|
"""Compute overall reliability metrics from history"""
|
|
|
if not self.inference_history:
|
|
|
return {
|
|
|
'avg_agreement': 0.0,
|
|
|
'avg_reliability_boost': 0.0,
|
|
|
'avg_ensemble_confidence': 0.0
|
|
|
}
|
|
|
|
|
|
return {
|
|
|
'avg_agreement': float(np.mean([
|
|
|
r.agreement_score for r in self.inference_history
|
|
|
])),
|
|
|
'avg_reliability_boost': float(np.mean([
|
|
|
r.reliability_boost for r in self.inference_history
|
|
|
])),
|
|
|
'avg_ensemble_confidence': float(np.mean([
|
|
|
r.ensemble_confidence for r in self.inference_history
|
|
|
]))
|
|
|
}
|
|
|
|