Spaces:
Build error
Build error
Upload 4 files
Browse files- backend_telemetry_rank_adapter.py +0 -0
- edit_propagation_engine.py +398 -0
- ensemble_inference_manager.py +400 -0
- rank_feedback_generator.py +484 -0
backend_telemetry_rank_adapter.py
ADDED
|
File without changes
|
edit_propagation_engine.py
ADDED
|
@@ -0,0 +1,398 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
"""
|
| 3 |
+
Cross-Lingual Edit Propagation via Subspace Containment
|
| 4 |
+
Transfer high-resource corrections to low-resource languages using containment scores
|
| 5 |
+
|
| 6 |
+
Based on:
|
| 7 |
+
Zhang, Y., et al. (2024). "Deep Hierarchical Learning with Nested Subspace Networks."
|
| 8 |
+
arXiv preprint. NSN framework for hierarchical representation learning.
|
| 9 |
+
"""
|
| 10 |
+
import numpy as np
|
| 11 |
+
from typing import Dict, List, Optional, Tuple
|
| 12 |
+
from dataclasses import dataclass
|
| 13 |
+
import logging
|
| 14 |
+
|
| 15 |
+
logger = logging.getLogger(__name__)
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
@dataclass
|
| 19 |
+
class ContainmentScore:
|
| 20 |
+
"""Subspace containment analysis result"""
|
| 21 |
+
source_lang: str
|
| 22 |
+
target_lang: str
|
| 23 |
+
rank: int
|
| 24 |
+
containment_score: float # 0-1, how much target is contained in source
|
| 25 |
+
overlap_dimension: int # Dimension of overlap
|
| 26 |
+
confidence: float
|
| 27 |
+
propagation_recommended: bool
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
@dataclass
|
| 31 |
+
class PropagationResult:
|
| 32 |
+
"""Result of edit propagation"""
|
| 33 |
+
source_lang: str
|
| 34 |
+
target_lang: str
|
| 35 |
+
rank: int
|
| 36 |
+
edit_vector: np.ndarray
|
| 37 |
+
propagated_vector: np.ndarray
|
| 38 |
+
containment_score: float
|
| 39 |
+
success: bool
|
| 40 |
+
quality_score: float # Predicted quality after propagation
|
| 41 |
+
propagation_path: List[str] # Languages in propagation chain
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
class EditPropagationEngine:
|
| 45 |
+
"""
|
| 46 |
+
Transfer edits from high-resource to low-resource languages using
|
| 47 |
+
subspace containment analysis.
|
| 48 |
+
|
| 49 |
+
Dashboard Extension:
|
| 50 |
+
- Heatmap of containment scores across language pairs
|
| 51 |
+
- Flow arrows showing edit propagation paths
|
| 52 |
+
"""
|
| 53 |
+
|
| 54 |
+
def __init__(self):
|
| 55 |
+
self.language_embeddings = self._initialize_language_embeddings()
|
| 56 |
+
self.containment_cache: Dict[Tuple[str, str, int], ContainmentScore] = {}
|
| 57 |
+
self.propagation_history: List[PropagationResult] = []
|
| 58 |
+
|
| 59 |
+
def _initialize_language_embeddings(self) -> Dict[str, np.ndarray]:
|
| 60 |
+
"""Initialize language subspace embeddings"""
|
| 61 |
+
# Simulated language embeddings (in practice, learned from data)
|
| 62 |
+
np.random.seed(42)
|
| 63 |
+
|
| 64 |
+
languages = {
|
| 65 |
+
# High-resource languages (larger subspaces)
|
| 66 |
+
'english': np.random.randn(256),
|
| 67 |
+
'chinese': np.random.randn(256),
|
| 68 |
+
'spanish': np.random.randn(256),
|
| 69 |
+
'french': np.random.randn(256),
|
| 70 |
+
'german': np.random.randn(256),
|
| 71 |
+
|
| 72 |
+
# Medium-resource languages
|
| 73 |
+
'russian': np.random.randn(256),
|
| 74 |
+
'arabic': np.random.randn(256),
|
| 75 |
+
'japanese': np.random.randn(256),
|
| 76 |
+
'korean': np.random.randn(256),
|
| 77 |
+
'portuguese': np.random.randn(256),
|
| 78 |
+
|
| 79 |
+
# Low-resource languages (smaller subspaces)
|
| 80 |
+
'indonesian': np.random.randn(256),
|
| 81 |
+
'vietnamese': np.random.randn(256),
|
| 82 |
+
'thai': np.random.randn(256),
|
| 83 |
+
'swahili': np.random.randn(256),
|
| 84 |
+
'yoruba': np.random.randn(256)
|
| 85 |
+
}
|
| 86 |
+
|
| 87 |
+
# Normalize embeddings
|
| 88 |
+
for lang in languages:
|
| 89 |
+
languages[lang] = languages[lang] / np.linalg.norm(languages[lang])
|
| 90 |
+
|
| 91 |
+
return languages
|
| 92 |
+
|
| 93 |
+
def evaluate_subspace_containment(
|
| 94 |
+
self,
|
| 95 |
+
source_lang: str,
|
| 96 |
+
target_lang: str,
|
| 97 |
+
rank: int
|
| 98 |
+
) -> ContainmentScore:
|
| 99 |
+
"""
|
| 100 |
+
Evaluate how much target language subspace is contained in source.
|
| 101 |
+
|
| 102 |
+
Args:
|
| 103 |
+
source_lang: High-resource source language
|
| 104 |
+
target_lang: Low-resource target language
|
| 105 |
+
rank: NSN rank for analysis
|
| 106 |
+
|
| 107 |
+
Returns:
|
| 108 |
+
ContainmentScore with containment metrics
|
| 109 |
+
"""
|
| 110 |
+
cache_key = (source_lang, target_lang, rank)
|
| 111 |
+
if cache_key in self.containment_cache:
|
| 112 |
+
return self.containment_cache[cache_key]
|
| 113 |
+
|
| 114 |
+
# Get language embeddings
|
| 115 |
+
source_emb = self.language_embeddings.get(source_lang)
|
| 116 |
+
target_emb = self.language_embeddings.get(target_lang)
|
| 117 |
+
|
| 118 |
+
if source_emb is None or target_emb is None:
|
| 119 |
+
logger.warning(f"Unknown language: {source_lang} or {target_lang}")
|
| 120 |
+
return ContainmentScore(
|
| 121 |
+
source_lang=source_lang,
|
| 122 |
+
target_lang=target_lang,
|
| 123 |
+
rank=rank,
|
| 124 |
+
containment_score=0.0,
|
| 125 |
+
overlap_dimension=0,
|
| 126 |
+
confidence=0.0,
|
| 127 |
+
propagation_recommended=False
|
| 128 |
+
)
|
| 129 |
+
|
| 130 |
+
# Compute containment via projection
|
| 131 |
+
# Truncate to rank dimension
|
| 132 |
+
source_subspace = source_emb[:rank]
|
| 133 |
+
target_subspace = target_emb[:rank]
|
| 134 |
+
|
| 135 |
+
# Containment score: cosine similarity in rank-dimensional subspace
|
| 136 |
+
containment = float(np.dot(source_subspace, target_subspace))
|
| 137 |
+
containment = (containment + 1.0) / 2.0 # Normalize to [0, 1]
|
| 138 |
+
|
| 139 |
+
# Overlap dimension: effective rank of shared subspace
|
| 140 |
+
overlap_dim = int(rank * containment)
|
| 141 |
+
|
| 142 |
+
# Confidence based on rank and language resource levels
|
| 143 |
+
confidence = self._compute_containment_confidence(
|
| 144 |
+
source_lang, target_lang, rank, containment
|
| 145 |
+
)
|
| 146 |
+
|
| 147 |
+
# Recommend propagation if containment > 0.75 and confidence > 0.7
|
| 148 |
+
propagation_recommended = containment > 0.75 and confidence > 0.7
|
| 149 |
+
|
| 150 |
+
result = ContainmentScore(
|
| 151 |
+
source_lang=source_lang,
|
| 152 |
+
target_lang=target_lang,
|
| 153 |
+
rank=rank,
|
| 154 |
+
containment_score=containment,
|
| 155 |
+
overlap_dimension=overlap_dim,
|
| 156 |
+
confidence=confidence,
|
| 157 |
+
propagation_recommended=propagation_recommended
|
| 158 |
+
)
|
| 159 |
+
|
| 160 |
+
self.containment_cache[cache_key] = result
|
| 161 |
+
return result
|
| 162 |
+
|
| 163 |
+
def _compute_containment_confidence(
|
| 164 |
+
self,
|
| 165 |
+
source_lang: str,
|
| 166 |
+
target_lang: str,
|
| 167 |
+
rank: int,
|
| 168 |
+
containment: float
|
| 169 |
+
) -> float:
|
| 170 |
+
"""Compute confidence in containment score"""
|
| 171 |
+
# Higher confidence for:
|
| 172 |
+
# - Higher ranks (more dimensions to analyze)
|
| 173 |
+
# - Higher containment scores
|
| 174 |
+
# - Related language families
|
| 175 |
+
|
| 176 |
+
rank_factor = min(rank / 128.0, 1.0)
|
| 177 |
+
containment_factor = containment
|
| 178 |
+
|
| 179 |
+
# Language family bonus (simplified)
|
| 180 |
+
family_bonus = 0.0
|
| 181 |
+
if (source_lang in ['english', 'german', 'french', 'spanish'] and
|
| 182 |
+
target_lang in ['english', 'german', 'french', 'spanish']):
|
| 183 |
+
family_bonus = 0.1
|
| 184 |
+
|
| 185 |
+
confidence = 0.5 * rank_factor + 0.4 * containment_factor + family_bonus
|
| 186 |
+
return float(np.clip(confidence, 0.0, 1.0))
|
| 187 |
+
|
| 188 |
+
def propagate_edit(
|
| 189 |
+
self,
|
| 190 |
+
source_lang: str,
|
| 191 |
+
target_lang: str,
|
| 192 |
+
rank: int,
|
| 193 |
+
edit_vector: np.ndarray
|
| 194 |
+
) -> PropagationResult:
|
| 195 |
+
"""
|
| 196 |
+
Propagate edit from source to target language.
|
| 197 |
+
|
| 198 |
+
Args:
|
| 199 |
+
source_lang: Source language
|
| 200 |
+
target_lang: Target language
|
| 201 |
+
rank: NSN rank
|
| 202 |
+
edit_vector: Edit vector in source language
|
| 203 |
+
|
| 204 |
+
Returns:
|
| 205 |
+
PropagationResult with propagated edit
|
| 206 |
+
"""
|
| 207 |
+
# Evaluate containment
|
| 208 |
+
containment = self.evaluate_subspace_containment(
|
| 209 |
+
source_lang, target_lang, rank
|
| 210 |
+
)
|
| 211 |
+
|
| 212 |
+
if not containment.propagation_recommended:
|
| 213 |
+
logger.warning(
|
| 214 |
+
f"Propagation not recommended: {source_lang} β {target_lang} "
|
| 215 |
+
f"(containment: {containment.containment_score:.3f})"
|
| 216 |
+
)
|
| 217 |
+
|
| 218 |
+
result = PropagationResult(
|
| 219 |
+
source_lang=source_lang,
|
| 220 |
+
target_lang=target_lang,
|
| 221 |
+
rank=rank,
|
| 222 |
+
edit_vector=edit_vector,
|
| 223 |
+
propagated_vector=np.zeros_like(edit_vector),
|
| 224 |
+
containment_score=containment.containment_score,
|
| 225 |
+
success=False,
|
| 226 |
+
quality_score=0.0,
|
| 227 |
+
propagation_path=[source_lang, target_lang]
|
| 228 |
+
)
|
| 229 |
+
|
| 230 |
+
self.propagation_history.append(result)
|
| 231 |
+
return result
|
| 232 |
+
|
| 233 |
+
# Propagate edit via subspace projection
|
| 234 |
+
propagated_vector = self._transfer_edit(
|
| 235 |
+
edit_vector, source_lang, target_lang, rank
|
| 236 |
+
)
|
| 237 |
+
|
| 238 |
+
# Compute quality score
|
| 239 |
+
quality_score = self._compute_propagation_quality(
|
| 240 |
+
edit_vector, propagated_vector, containment.containment_score
|
| 241 |
+
)
|
| 242 |
+
|
| 243 |
+
result = PropagationResult(
|
| 244 |
+
source_lang=source_lang,
|
| 245 |
+
target_lang=target_lang,
|
| 246 |
+
rank=rank,
|
| 247 |
+
edit_vector=edit_vector,
|
| 248 |
+
propagated_vector=propagated_vector,
|
| 249 |
+
containment_score=containment.containment_score,
|
| 250 |
+
success=True,
|
| 251 |
+
quality_score=quality_score,
|
| 252 |
+
propagation_path=[source_lang, target_lang]
|
| 253 |
+
)
|
| 254 |
+
|
| 255 |
+
self.propagation_history.append(result)
|
| 256 |
+
logger.info(
|
| 257 |
+
f"Propagated edit: {source_lang} β {target_lang} "
|
| 258 |
+
f"(quality: {quality_score:.3f})"
|
| 259 |
+
)
|
| 260 |
+
|
| 261 |
+
return result
|
| 262 |
+
|
| 263 |
+
def _transfer_edit(
|
| 264 |
+
self,
|
| 265 |
+
edit_vector: np.ndarray,
|
| 266 |
+
source_lang: str,
|
| 267 |
+
target_lang: str,
|
| 268 |
+
rank: int
|
| 269 |
+
) -> np.ndarray:
|
| 270 |
+
"""Transfer edit vector from source to target language"""
|
| 271 |
+
# Get language embeddings
|
| 272 |
+
source_emb = self.language_embeddings[source_lang]
|
| 273 |
+
target_emb = self.language_embeddings[target_lang]
|
| 274 |
+
|
| 275 |
+
# Project edit onto shared subspace
|
| 276 |
+
# Simplified: weighted combination based on containment
|
| 277 |
+
source_subspace = source_emb[:rank]
|
| 278 |
+
target_subspace = target_emb[:rank]
|
| 279 |
+
|
| 280 |
+
# Compute transfer matrix (simplified)
|
| 281 |
+
transfer_weight = np.dot(source_subspace, target_subspace)
|
| 282 |
+
|
| 283 |
+
# Apply transfer
|
| 284 |
+
propagated = edit_vector * transfer_weight
|
| 285 |
+
|
| 286 |
+
return propagated
|
| 287 |
+
|
| 288 |
+
def _compute_propagation_quality(
|
| 289 |
+
self,
|
| 290 |
+
original: np.ndarray,
|
| 291 |
+
propagated: np.ndarray,
|
| 292 |
+
containment: float
|
| 293 |
+
) -> float:
|
| 294 |
+
"""Compute quality of propagated edit"""
|
| 295 |
+
# Quality based on:
|
| 296 |
+
# - Containment score
|
| 297 |
+
# - Vector similarity
|
| 298 |
+
# - Magnitude preservation
|
| 299 |
+
|
| 300 |
+
if np.linalg.norm(propagated) < 1e-6:
|
| 301 |
+
return 0.0
|
| 302 |
+
|
| 303 |
+
# Cosine similarity
|
| 304 |
+
similarity = np.dot(original, propagated) / (
|
| 305 |
+
np.linalg.norm(original) * np.linalg.norm(propagated)
|
| 306 |
+
)
|
| 307 |
+
similarity = (similarity + 1.0) / 2.0 # Normalize to [0, 1]
|
| 308 |
+
|
| 309 |
+
# Magnitude preservation
|
| 310 |
+
mag_ratio = np.linalg.norm(propagated) / np.linalg.norm(original)
|
| 311 |
+
mag_score = 1.0 - abs(1.0 - mag_ratio)
|
| 312 |
+
|
| 313 |
+
# Combined quality
|
| 314 |
+
quality = 0.5 * containment + 0.3 * similarity + 0.2 * mag_score
|
| 315 |
+
|
| 316 |
+
return float(np.clip(quality, 0.0, 1.0))
|
| 317 |
+
|
| 318 |
+
def compute_containment_heatmap(
|
| 319 |
+
self,
|
| 320 |
+
languages: List[str],
|
| 321 |
+
rank: int
|
| 322 |
+
) -> np.ndarray:
|
| 323 |
+
"""
|
| 324 |
+
Compute containment heatmap for dashboard visualization.
|
| 325 |
+
|
| 326 |
+
Args:
|
| 327 |
+
languages: List of languages to analyze
|
| 328 |
+
rank: NSN rank
|
| 329 |
+
|
| 330 |
+
Returns:
|
| 331 |
+
Heatmap matrix (languages x languages)
|
| 332 |
+
"""
|
| 333 |
+
n = len(languages)
|
| 334 |
+
heatmap = np.zeros((n, n))
|
| 335 |
+
|
| 336 |
+
for i, source in enumerate(languages):
|
| 337 |
+
for j, target in enumerate(languages):
|
| 338 |
+
if i == j:
|
| 339 |
+
heatmap[i, j] = 1.0
|
| 340 |
+
else:
|
| 341 |
+
containment = self.evaluate_subspace_containment(
|
| 342 |
+
source, target, rank
|
| 343 |
+
)
|
| 344 |
+
heatmap[i, j] = containment.containment_score
|
| 345 |
+
|
| 346 |
+
return heatmap
|
| 347 |
+
|
| 348 |
+
def find_propagation_paths(
|
| 349 |
+
self,
|
| 350 |
+
source_lang: str,
|
| 351 |
+
target_langs: List[str],
|
| 352 |
+
rank: int,
|
| 353 |
+
min_containment: float = 0.75
|
| 354 |
+
) -> Dict[str, List[str]]:
|
| 355 |
+
"""
|
| 356 |
+
Find optimal propagation paths from source to multiple targets.
|
| 357 |
+
|
| 358 |
+
Returns:
|
| 359 |
+
Dict mapping target language to propagation path
|
| 360 |
+
"""
|
| 361 |
+
paths = {}
|
| 362 |
+
|
| 363 |
+
for target in target_langs:
|
| 364 |
+
# Direct path
|
| 365 |
+
direct_containment = self.evaluate_subspace_containment(
|
| 366 |
+
source_lang, target, rank
|
| 367 |
+
)
|
| 368 |
+
|
| 369 |
+
if direct_containment.containment_score >= min_containment:
|
| 370 |
+
paths[target] = [source_lang, target]
|
| 371 |
+
else:
|
| 372 |
+
# Try indirect path through intermediate language
|
| 373 |
+
best_path = None
|
| 374 |
+
best_score = 0.0
|
| 375 |
+
|
| 376 |
+
for intermediate in self.language_embeddings.keys():
|
| 377 |
+
if intermediate in [source_lang, target]:
|
| 378 |
+
continue
|
| 379 |
+
|
| 380 |
+
c1 = self.evaluate_subspace_containment(
|
| 381 |
+
source_lang, intermediate, rank
|
| 382 |
+
)
|
| 383 |
+
c2 = self.evaluate_subspace_containment(
|
| 384 |
+
intermediate, target, rank
|
| 385 |
+
)
|
| 386 |
+
|
| 387 |
+
combined_score = c1.containment_score * c2.containment_score
|
| 388 |
+
|
| 389 |
+
if combined_score > best_score and combined_score >= min_containment:
|
| 390 |
+
best_score = combined_score
|
| 391 |
+
best_path = [source_lang, intermediate, target]
|
| 392 |
+
|
| 393 |
+
if best_path:
|
| 394 |
+
paths[target] = best_path
|
| 395 |
+
else:
|
| 396 |
+
paths[target] = [] # No viable path
|
| 397 |
+
|
| 398 |
+
return paths
|
ensemble_inference_manager.py
ADDED
|
@@ -0,0 +1,400 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
"""
|
| 3 |
+
Ensemble Inference Across Backends
|
| 4 |
+
Run edits across multiple backends and compute agreement scores
|
| 5 |
+
|
| 6 |
+
"""
|
| 7 |
+
import numpy as np
|
| 8 |
+
from typing import Dict, List, Optional, Tuple
|
| 9 |
+
from dataclasses import dataclass
|
| 10 |
+
import logging
|
| 11 |
+
|
| 12 |
+
logger = logging.getLogger(__name__)
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
@dataclass
|
| 16 |
+
class BackendResult:
|
| 17 |
+
"""Result from a single backend"""
|
| 18 |
+
backend_id: str
|
| 19 |
+
edit_vector: np.ndarray
|
| 20 |
+
output: np.ndarray
|
| 21 |
+
confidence: float
|
| 22 |
+
latency: float # seconds
|
| 23 |
+
success: bool
|
| 24 |
+
error_message: Optional[str] = None
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
@dataclass
|
| 28 |
+
class EnsembleResult:
|
| 29 |
+
"""Result from ensemble inference"""
|
| 30 |
+
edit_vector: np.ndarray
|
| 31 |
+
backend_results: List[BackendResult]
|
| 32 |
+
consensus_output: np.ndarray
|
| 33 |
+
agreement_score: float
|
| 34 |
+
reliability_boost: float
|
| 35 |
+
agreement_matrix: np.ndarray
|
| 36 |
+
best_backend: str
|
| 37 |
+
ensemble_confidence: float
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
class EnsembleInferenceManager:
|
| 41 |
+
"""
|
| 42 |
+
Run edits across multiple quantum backends and compute agreement scores.
|
| 43 |
+
|
| 44 |
+
Dashboard Extension:
|
| 45 |
+
- Agreement matrix across backends
|
| 46 |
+
- Reliability boost from ensemble consensus
|
| 47 |
+
"""
|
| 48 |
+
|
| 49 |
+
def __init__(self):
|
| 50 |
+
self.backend_configs = self._initialize_backend_configs()
|
| 51 |
+
self.inference_history: List[EnsembleResult] = []
|
| 52 |
+
|
| 53 |
+
def _initialize_backend_configs(self) -> Dict[str, Dict]:
|
| 54 |
+
"""Initialize backend configurations"""
|
| 55 |
+
return {
|
| 56 |
+
'ibm_manila': {
|
| 57 |
+
'qubits': 5,
|
| 58 |
+
'error_rate': 0.08,
|
| 59 |
+
'gate_fidelity': 0.92,
|
| 60 |
+
'coherence_time': 30.0,
|
| 61 |
+
'base_latency': 0.05
|
| 62 |
+
},
|
| 63 |
+
'ibm_washington': {
|
| 64 |
+
'qubits': 127,
|
| 65 |
+
'error_rate': 0.02,
|
| 66 |
+
'gate_fidelity': 0.98,
|
| 67 |
+
'coherence_time': 120.0,
|
| 68 |
+
'base_latency': 0.15
|
| 69 |
+
},
|
| 70 |
+
'russian_simulator': {
|
| 71 |
+
'qubits': 256,
|
| 72 |
+
'error_rate': 0.001,
|
| 73 |
+
'gate_fidelity': 0.999,
|
| 74 |
+
'coherence_time': 1000.0,
|
| 75 |
+
'base_latency': 0.30
|
| 76 |
+
},
|
| 77 |
+
'ibm_kyoto': {
|
| 78 |
+
'qubits': 127,
|
| 79 |
+
'error_rate': 0.025,
|
| 80 |
+
'gate_fidelity': 0.975,
|
| 81 |
+
'coherence_time': 100.0,
|
| 82 |
+
'base_latency': 0.12
|
| 83 |
+
},
|
| 84 |
+
'google_sycamore': {
|
| 85 |
+
'qubits': 53,
|
| 86 |
+
'error_rate': 0.015,
|
| 87 |
+
'gate_fidelity': 0.985,
|
| 88 |
+
'coherence_time': 80.0,
|
| 89 |
+
'base_latency': 0.08
|
| 90 |
+
}
|
| 91 |
+
}
|
| 92 |
+
|
| 93 |
+
def run_ensemble_inference(
|
| 94 |
+
self,
|
| 95 |
+
edit_vector: np.ndarray,
|
| 96 |
+
backend_list: List[str]
|
| 97 |
+
) -> EnsembleResult:
|
| 98 |
+
"""
|
| 99 |
+
Run inference across multiple backends and compute ensemble result.
|
| 100 |
+
|
| 101 |
+
Args:
|
| 102 |
+
edit_vector: Edit vector to apply
|
| 103 |
+
backend_list: List of backend IDs (e.g., ['ibm_manila', 'ibm_washington'])
|
| 104 |
+
|
| 105 |
+
Returns:
|
| 106 |
+
EnsembleResult with consensus and agreement metrics
|
| 107 |
+
"""
|
| 108 |
+
# Run inference on each backend
|
| 109 |
+
backend_results = []
|
| 110 |
+
|
| 111 |
+
for backend_id in backend_list:
|
| 112 |
+
result = self._run_single_backend(backend_id, edit_vector)
|
| 113 |
+
backend_results.append(result)
|
| 114 |
+
|
| 115 |
+
# Compute agreement matrix
|
| 116 |
+
agreement_matrix = self._compute_agreement_matrix(backend_results)
|
| 117 |
+
|
| 118 |
+
# Compute consensus output
|
| 119 |
+
consensus_output = self._compute_consensus(backend_results)
|
| 120 |
+
|
| 121 |
+
# Compute overall agreement score
|
| 122 |
+
agreement_score = self._compute_overall_agreement(agreement_matrix)
|
| 123 |
+
|
| 124 |
+
# Compute reliability boost
|
| 125 |
+
reliability_boost = self._compute_reliability_boost(
|
| 126 |
+
backend_results, agreement_score
|
| 127 |
+
)
|
| 128 |
+
|
| 129 |
+
# Find best backend
|
| 130 |
+
best_backend = self._select_best_backend(backend_results)
|
| 131 |
+
|
| 132 |
+
# Compute ensemble confidence
|
| 133 |
+
ensemble_confidence = self._compute_ensemble_confidence(
|
| 134 |
+
backend_results, agreement_score
|
| 135 |
+
)
|
| 136 |
+
|
| 137 |
+
result = EnsembleResult(
|
| 138 |
+
edit_vector=edit_vector,
|
| 139 |
+
backend_results=backend_results,
|
| 140 |
+
consensus_output=consensus_output,
|
| 141 |
+
agreement_score=agreement_score,
|
| 142 |
+
reliability_boost=reliability_boost,
|
| 143 |
+
agreement_matrix=agreement_matrix,
|
| 144 |
+
best_backend=best_backend,
|
| 145 |
+
ensemble_confidence=ensemble_confidence
|
| 146 |
+
)
|
| 147 |
+
|
| 148 |
+
self.inference_history.append(result)
|
| 149 |
+
|
| 150 |
+
logger.info(
|
| 151 |
+
f"Ensemble inference complete: {len(backend_list)} backends, "
|
| 152 |
+
f"agreement: {agreement_score:.3f}, boost: {reliability_boost:.3f}"
|
| 153 |
+
)
|
| 154 |
+
|
| 155 |
+
return result
|
| 156 |
+
|
| 157 |
+
def _run_single_backend(
|
| 158 |
+
self, backend_id: str, edit_vector: np.ndarray
|
| 159 |
+
) -> BackendResult:
|
| 160 |
+
"""Run inference on a single backend"""
|
| 161 |
+
config = self.backend_configs.get(backend_id)
|
| 162 |
+
|
| 163 |
+
if config is None:
|
| 164 |
+
logger.warning(f"Unknown backend: {backend_id}")
|
| 165 |
+
return BackendResult(
|
| 166 |
+
backend_id=backend_id,
|
| 167 |
+
edit_vector=edit_vector,
|
| 168 |
+
output=np.zeros_like(edit_vector),
|
| 169 |
+
confidence=0.0,
|
| 170 |
+
latency=0.0,
|
| 171 |
+
success=False,
|
| 172 |
+
error_message=f"Unknown backend: {backend_id}"
|
| 173 |
+
)
|
| 174 |
+
|
| 175 |
+
# Simulate inference with backend-specific noise
|
| 176 |
+
noise_level = config['error_rate']
|
| 177 |
+
noise = np.random.randn(*edit_vector.shape) * noise_level
|
| 178 |
+
|
| 179 |
+
output = edit_vector + noise
|
| 180 |
+
|
| 181 |
+
# Confidence based on gate fidelity
|
| 182 |
+
confidence = config['gate_fidelity']
|
| 183 |
+
|
| 184 |
+
# Latency based on backend and vector size
|
| 185 |
+
latency = config['base_latency'] * (1 + len(edit_vector) / 1000.0)
|
| 186 |
+
|
| 187 |
+
return BackendResult(
|
| 188 |
+
backend_id=backend_id,
|
| 189 |
+
edit_vector=edit_vector,
|
| 190 |
+
output=output,
|
| 191 |
+
confidence=confidence,
|
| 192 |
+
latency=latency,
|
| 193 |
+
success=True
|
| 194 |
+
)
|
| 195 |
+
|
| 196 |
+
def _compute_agreement_matrix(
|
| 197 |
+
self, results: List[BackendResult]
|
| 198 |
+
) -> np.ndarray:
|
| 199 |
+
"""Compute pairwise agreement matrix between backends"""
|
| 200 |
+
n = len(results)
|
| 201 |
+
agreement_matrix = np.zeros((n, n))
|
| 202 |
+
|
| 203 |
+
for i in range(n):
|
| 204 |
+
for j in range(n):
|
| 205 |
+
if i == j:
|
| 206 |
+
agreement_matrix[i, j] = 1.0
|
| 207 |
+
else:
|
| 208 |
+
# Cosine similarity between outputs
|
| 209 |
+
output_i = results[i].output
|
| 210 |
+
output_j = results[j].output
|
| 211 |
+
|
| 212 |
+
if np.linalg.norm(output_i) < 1e-6 or np.linalg.norm(output_j) < 1e-6:
|
| 213 |
+
agreement_matrix[i, j] = 0.0
|
| 214 |
+
else:
|
| 215 |
+
similarity = np.dot(output_i, output_j) / (
|
| 216 |
+
np.linalg.norm(output_i) * np.linalg.norm(output_j)
|
| 217 |
+
)
|
| 218 |
+
# Normalize to [0, 1]
|
| 219 |
+
agreement_matrix[i, j] = (similarity + 1.0) / 2.0
|
| 220 |
+
|
| 221 |
+
return agreement_matrix
|
| 222 |
+
|
| 223 |
+
def _compute_consensus(
|
| 224 |
+
self, results: List[BackendResult]
|
| 225 |
+
) -> np.ndarray:
|
| 226 |
+
"""Compute consensus output from all backends"""
|
| 227 |
+
successful_results = [r for r in results if r.success]
|
| 228 |
+
|
| 229 |
+
if not successful_results:
|
| 230 |
+
return np.zeros_like(results[0].edit_vector)
|
| 231 |
+
|
| 232 |
+
# Weighted average by confidence
|
| 233 |
+
total_confidence = sum(r.confidence for r in successful_results)
|
| 234 |
+
|
| 235 |
+
if total_confidence < 1e-6:
|
| 236 |
+
# Unweighted average
|
| 237 |
+
outputs = [r.output for r in successful_results]
|
| 238 |
+
return np.mean(outputs, axis=0)
|
| 239 |
+
|
| 240 |
+
# Confidence-weighted average
|
| 241 |
+
consensus = np.zeros_like(successful_results[0].output)
|
| 242 |
+
|
| 243 |
+
for result in successful_results:
|
| 244 |
+
weight = result.confidence / total_confidence
|
| 245 |
+
consensus += weight * result.output
|
| 246 |
+
|
| 247 |
+
return consensus
|
| 248 |
+
|
| 249 |
+
def _compute_overall_agreement(self, agreement_matrix: np.ndarray) -> float:
|
| 250 |
+
"""Compute overall agreement score from matrix"""
|
| 251 |
+
# Average of off-diagonal elements
|
| 252 |
+
n = agreement_matrix.shape[0]
|
| 253 |
+
|
| 254 |
+
if n <= 1:
|
| 255 |
+
return 1.0
|
| 256 |
+
|
| 257 |
+
# Sum off-diagonal elements
|
| 258 |
+
total = 0.0
|
| 259 |
+
count = 0
|
| 260 |
+
|
| 261 |
+
for i in range(n):
|
| 262 |
+
for j in range(n):
|
| 263 |
+
if i != j:
|
| 264 |
+
total += agreement_matrix[i, j]
|
| 265 |
+
count += 1
|
| 266 |
+
|
| 267 |
+
return total / count if count > 0 else 0.0
|
| 268 |
+
|
| 269 |
+
def _compute_reliability_boost(
|
| 270 |
+
self, results: List[BackendResult], agreement_score: float
|
| 271 |
+
) -> float:
|
| 272 |
+
"""
|
| 273 |
+
Compute reliability boost from ensemble consensus.
|
| 274 |
+
|
| 275 |
+
Boost is higher when:
|
| 276 |
+
- More backends agree
|
| 277 |
+
- Individual backends have high confidence
|
| 278 |
+
- Agreement score is high
|
| 279 |
+
"""
|
| 280 |
+
if not results:
|
| 281 |
+
return 0.0
|
| 282 |
+
|
| 283 |
+
# Average individual confidence
|
| 284 |
+
avg_confidence = np.mean([r.confidence for r in results if r.success])
|
| 285 |
+
|
| 286 |
+
# Ensemble size factor
|
| 287 |
+
ensemble_factor = min(len(results) / 5.0, 1.0)
|
| 288 |
+
|
| 289 |
+
# Boost formula
|
| 290 |
+
boost = (
|
| 291 |
+
0.4 * agreement_score +
|
| 292 |
+
0.3 * avg_confidence +
|
| 293 |
+
0.3 * ensemble_factor
|
| 294 |
+
)
|
| 295 |
+
|
| 296 |
+
return float(np.clip(boost, 0.0, 1.0))
|
| 297 |
+
|
| 298 |
+
def _select_best_backend(self, results: List[BackendResult]) -> str:
|
| 299 |
+
"""Select best backend based on confidence and success"""
|
| 300 |
+
successful_results = [r for r in results if r.success]
|
| 301 |
+
|
| 302 |
+
if not successful_results:
|
| 303 |
+
return results[0].backend_id if results else "none"
|
| 304 |
+
|
| 305 |
+
# Score by confidence and inverse latency
|
| 306 |
+
scores = {}
|
| 307 |
+
|
| 308 |
+
for result in successful_results:
|
| 309 |
+
scores[result.backend_id] = (
|
| 310 |
+
0.7 * result.confidence +
|
| 311 |
+
0.3 * (1.0 / (1.0 + result.latency))
|
| 312 |
+
)
|
| 313 |
+
|
| 314 |
+
return max(scores, key=scores.get)
|
| 315 |
+
|
| 316 |
+
def _compute_ensemble_confidence(
|
| 317 |
+
self, results: List[BackendResult], agreement_score: float
|
| 318 |
+
) -> float:
|
| 319 |
+
"""Compute overall ensemble confidence"""
|
| 320 |
+
if not results:
|
| 321 |
+
return 0.0
|
| 322 |
+
|
| 323 |
+
# Combine individual confidences with agreement
|
| 324 |
+
avg_confidence = np.mean([r.confidence for r in results if r.success])
|
| 325 |
+
|
| 326 |
+
# Ensemble confidence is boosted by agreement
|
| 327 |
+
ensemble_confidence = 0.6 * avg_confidence + 0.4 * agreement_score
|
| 328 |
+
|
| 329 |
+
return float(np.clip(ensemble_confidence, 0.0, 1.0))
|
| 330 |
+
|
| 331 |
+
def compare_backends(
|
| 332 |
+
self, edit_vectors: List[np.ndarray]
|
| 333 |
+
) -> Dict[str, Dict[str, float]]:
|
| 334 |
+
"""
|
| 335 |
+
Compare all backends across multiple edit vectors.
|
| 336 |
+
|
| 337 |
+
Returns:
|
| 338 |
+
Dict mapping backend_id to performance metrics
|
| 339 |
+
"""
|
| 340 |
+
backend_stats = {
|
| 341 |
+
backend_id: {
|
| 342 |
+
'avg_confidence': [],
|
| 343 |
+
'avg_latency': [],
|
| 344 |
+
'success_rate': []
|
| 345 |
+
}
|
| 346 |
+
for backend_id in self.backend_configs.keys()
|
| 347 |
+
}
|
| 348 |
+
|
| 349 |
+
for edit_vector in edit_vectors:
|
| 350 |
+
for backend_id in self.backend_configs.keys():
|
| 351 |
+
result = self._run_single_backend(backend_id, edit_vector)
|
| 352 |
+
|
| 353 |
+
backend_stats[backend_id]['avg_confidence'].append(result.confidence)
|
| 354 |
+
backend_stats[backend_id]['avg_latency'].append(result.latency)
|
| 355 |
+
backend_stats[backend_id]['success_rate'].append(1.0 if result.success else 0.0)
|
| 356 |
+
|
| 357 |
+
# Compute averages
|
| 358 |
+
comparison = {}
|
| 359 |
+
|
| 360 |
+
for backend_id, stats in backend_stats.items():
|
| 361 |
+
comparison[backend_id] = {
|
| 362 |
+
'avg_confidence': float(np.mean(stats['avg_confidence'])),
|
| 363 |
+
'avg_latency': float(np.mean(stats['avg_latency'])),
|
| 364 |
+
'success_rate': float(np.mean(stats['success_rate']))
|
| 365 |
+
}
|
| 366 |
+
|
| 367 |
+
return comparison
|
| 368 |
+
|
| 369 |
+
def get_agreement_heatmap(
|
| 370 |
+
self, backend_list: List[str], edit_vector: np.ndarray
|
| 371 |
+
) -> Tuple[np.ndarray, List[str]]:
|
| 372 |
+
"""
|
| 373 |
+
Get agreement heatmap for visualization.
|
| 374 |
+
|
| 375 |
+
Returns:
|
| 376 |
+
Tuple of (agreement_matrix, backend_labels)
|
| 377 |
+
"""
|
| 378 |
+
result = self.run_ensemble_inference(edit_vector, backend_list)
|
| 379 |
+
return result.agreement_matrix, backend_list
|
| 380 |
+
|
| 381 |
+
def compute_reliability_metrics(self) -> Dict[str, float]:
|
| 382 |
+
"""Compute overall reliability metrics from history"""
|
| 383 |
+
if not self.inference_history:
|
| 384 |
+
return {
|
| 385 |
+
'avg_agreement': 0.0,
|
| 386 |
+
'avg_reliability_boost': 0.0,
|
| 387 |
+
'avg_ensemble_confidence': 0.0
|
| 388 |
+
}
|
| 389 |
+
|
| 390 |
+
return {
|
| 391 |
+
'avg_agreement': float(np.mean([
|
| 392 |
+
r.agreement_score for r in self.inference_history
|
| 393 |
+
])),
|
| 394 |
+
'avg_reliability_boost': float(np.mean([
|
| 395 |
+
r.reliability_boost for r in self.inference_history
|
| 396 |
+
])),
|
| 397 |
+
'avg_ensemble_confidence': float(np.mean([
|
| 398 |
+
r.ensemble_confidence for r in self.inference_history
|
| 399 |
+
]))
|
| 400 |
+
}
|
rank_feedback_generator.py
ADDED
|
@@ -0,0 +1,484 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
"""
|
| 3 |
+
Contributor-Aware Rank Feedback Loop
|
| 4 |
+
Recommend optimal ranks based on contributor history and efficiency
|
| 5 |
+
|
| 6 |
+
Based on:
|
| 7 |
+
Zhang, Y., et al. (2024). "Deep Hierarchical Learning with Nested Subspace Networks."
|
| 8 |
+
arXiv preprint. NSN framework for hierarchical representation learning.
|
| 9 |
+
"""
|
| 10 |
+
import numpy as np
|
| 11 |
+
from typing import Dict, List, Optional, Tuple
|
| 12 |
+
from dataclasses import dataclass
|
| 13 |
+
import logging
|
| 14 |
+
|
| 15 |
+
logger = logging.getLogger(__name__)
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
@dataclass
|
| 19 |
+
class SubmissionRecord:
|
| 20 |
+
"""Record of a contributor submission"""
|
| 21 |
+
contributor_id: str
|
| 22 |
+
language: str
|
| 23 |
+
rank: int
|
| 24 |
+
accuracy: float
|
| 25 |
+
flops: float
|
| 26 |
+
uncertainty: float
|
| 27 |
+
timestamp: str
|
| 28 |
+
efficiency: float # accuracy / flops
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
@dataclass
|
| 32 |
+
class RankRecommendation:
|
| 33 |
+
"""Rank recommendation for contributor"""
|
| 34 |
+
contributor_id: str
|
| 35 |
+
recommended_rank: int
|
| 36 |
+
confidence: float
|
| 37 |
+
rationale: str
|
| 38 |
+
unexplored_pairs: List[Tuple[int, str]] # (rank, language) pairs
|
| 39 |
+
efficiency_prediction: float
|
| 40 |
+
personalized_badge: str
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
class RankFeedbackGenerator:
|
| 44 |
+
"""
|
| 45 |
+
Recommend optimal ranks based on contributor history and efficiency.
|
| 46 |
+
|
| 47 |
+
Leaderboard Extension:
|
| 48 |
+
- Personalized rank badges
|
| 49 |
+
- Suggestion panel for unexplored rank-language pairs
|
| 50 |
+
"""
|
| 51 |
+
|
| 52 |
+
def __init__(self):
|
| 53 |
+
self.submission_history: Dict[str, List[SubmissionRecord]] = {}
|
| 54 |
+
self.rank_options = [8, 16, 32, 64, 128, 256]
|
| 55 |
+
self.language_options = [
|
| 56 |
+
'english', 'chinese', 'spanish', 'french', 'german',
|
| 57 |
+
'russian', 'arabic', 'japanese', 'korean', 'portuguese',
|
| 58 |
+
'indonesian', 'vietnamese', 'thai', 'swahili', 'yoruba'
|
| 59 |
+
]
|
| 60 |
+
|
| 61 |
+
def record_submission(
|
| 62 |
+
self,
|
| 63 |
+
contributor_id: str,
|
| 64 |
+
language: str,
|
| 65 |
+
rank: int,
|
| 66 |
+
accuracy: float,
|
| 67 |
+
flops: float,
|
| 68 |
+
uncertainty: float,
|
| 69 |
+
timestamp: str = None
|
| 70 |
+
):
|
| 71 |
+
"""Record a contributor submission"""
|
| 72 |
+
if timestamp is None:
|
| 73 |
+
from datetime import datetime
|
| 74 |
+
timestamp = datetime.now().isoformat()
|
| 75 |
+
|
| 76 |
+
efficiency = accuracy / flops if flops > 0 else 0.0
|
| 77 |
+
|
| 78 |
+
record = SubmissionRecord(
|
| 79 |
+
contributor_id=contributor_id,
|
| 80 |
+
language=language,
|
| 81 |
+
rank=rank,
|
| 82 |
+
accuracy=accuracy,
|
| 83 |
+
flops=flops,
|
| 84 |
+
uncertainty=uncertainty,
|
| 85 |
+
timestamp=timestamp,
|
| 86 |
+
efficiency=efficiency
|
| 87 |
+
)
|
| 88 |
+
|
| 89 |
+
if contributor_id not in self.submission_history:
|
| 90 |
+
self.submission_history[contributor_id] = []
|
| 91 |
+
|
| 92 |
+
self.submission_history[contributor_id].append(record)
|
| 93 |
+
logger.info(
|
| 94 |
+
f"Recorded submission: {contributor_id} - {language} @ rank {rank} "
|
| 95 |
+
f"(accuracy: {accuracy:.3f}, efficiency: {efficiency:.2e})"
|
| 96 |
+
)
|
| 97 |
+
|
| 98 |
+
def recommend_rank(
|
| 99 |
+
self,
|
| 100 |
+
contributor_id: str,
|
| 101 |
+
target_language: Optional[str] = None
|
| 102 |
+
) -> RankRecommendation:
|
| 103 |
+
"""
|
| 104 |
+
Recommend optimal rank based on contributor history.
|
| 105 |
+
|
| 106 |
+
Args:
|
| 107 |
+
contributor_id: Contributor identifier
|
| 108 |
+
target_language: Optional target language for recommendation
|
| 109 |
+
|
| 110 |
+
Returns:
|
| 111 |
+
RankRecommendation with personalized suggestions
|
| 112 |
+
"""
|
| 113 |
+
submissions = self.submission_history.get(contributor_id, [])
|
| 114 |
+
|
| 115 |
+
if not submissions:
|
| 116 |
+
# New contributor: recommend starting rank
|
| 117 |
+
return RankRecommendation(
|
| 118 |
+
contributor_id=contributor_id,
|
| 119 |
+
recommended_rank=32,
|
| 120 |
+
confidence=0.5,
|
| 121 |
+
rationale="Starting recommendation for new contributor",
|
| 122 |
+
unexplored_pairs=self._get_unexplored_pairs(contributor_id),
|
| 123 |
+
efficiency_prediction=0.0,
|
| 124 |
+
personalized_badge="π Newcomer"
|
| 125 |
+
)
|
| 126 |
+
|
| 127 |
+
# Analyze submission history
|
| 128 |
+
if target_language:
|
| 129 |
+
# Language-specific recommendation
|
| 130 |
+
lang_submissions = [s for s in submissions if s.language == target_language]
|
| 131 |
+
if lang_submissions:
|
| 132 |
+
return self._recommend_from_history(
|
| 133 |
+
contributor_id, lang_submissions, target_language
|
| 134 |
+
)
|
| 135 |
+
|
| 136 |
+
# General recommendation based on all submissions
|
| 137 |
+
return self._recommend_from_history(contributor_id, submissions)
|
| 138 |
+
|
| 139 |
+
def _recommend_from_history(
|
| 140 |
+
self,
|
| 141 |
+
contributor_id: str,
|
| 142 |
+
submissions: List[SubmissionRecord],
|
| 143 |
+
target_language: Optional[str] = None
|
| 144 |
+
) -> RankRecommendation:
|
| 145 |
+
"""Generate recommendation from submission history"""
|
| 146 |
+
# Find best efficiency rank
|
| 147 |
+
best_submission = max(submissions, key=lambda s: s.efficiency)
|
| 148 |
+
|
| 149 |
+
# Analyze rank performance
|
| 150 |
+
rank_performance = self._analyze_rank_performance(submissions)
|
| 151 |
+
|
| 152 |
+
# Find optimal rank
|
| 153 |
+
recommended_rank = self._select_optimal_rank(rank_performance)
|
| 154 |
+
|
| 155 |
+
# Compute confidence
|
| 156 |
+
confidence = self._compute_recommendation_confidence(
|
| 157 |
+
submissions, recommended_rank
|
| 158 |
+
)
|
| 159 |
+
|
| 160 |
+
# Generate rationale
|
| 161 |
+
rationale = self._generate_rationale(
|
| 162 |
+
submissions, recommended_rank, best_submission
|
| 163 |
+
)
|
| 164 |
+
|
| 165 |
+
# Find unexplored pairs
|
| 166 |
+
unexplored = self._get_unexplored_pairs(contributor_id)
|
| 167 |
+
|
| 168 |
+
# Predict efficiency
|
| 169 |
+
efficiency_prediction = self._predict_efficiency(
|
| 170 |
+
submissions, recommended_rank
|
| 171 |
+
)
|
| 172 |
+
|
| 173 |
+
# Assign badge
|
| 174 |
+
badge = self._assign_badge(submissions)
|
| 175 |
+
|
| 176 |
+
return RankRecommendation(
|
| 177 |
+
contributor_id=contributor_id,
|
| 178 |
+
recommended_rank=recommended_rank,
|
| 179 |
+
confidence=confidence,
|
| 180 |
+
rationale=rationale,
|
| 181 |
+
unexplored_pairs=unexplored[:5], # Top 5 suggestions
|
| 182 |
+
efficiency_prediction=efficiency_prediction,
|
| 183 |
+
personalized_badge=badge
|
| 184 |
+
)
|
| 185 |
+
|
| 186 |
+
def _analyze_rank_performance(
|
| 187 |
+
self, submissions: List[SubmissionRecord]
|
| 188 |
+
) -> Dict[int, Dict[str, float]]:
|
| 189 |
+
"""Analyze performance at each rank"""
|
| 190 |
+
rank_stats = {}
|
| 191 |
+
|
| 192 |
+
for rank in self.rank_options:
|
| 193 |
+
rank_subs = [s for s in submissions if s.rank == rank]
|
| 194 |
+
|
| 195 |
+
if rank_subs:
|
| 196 |
+
rank_stats[rank] = {
|
| 197 |
+
'avg_accuracy': np.mean([s.accuracy for s in rank_subs]),
|
| 198 |
+
'avg_efficiency': np.mean([s.efficiency for s in rank_subs]),
|
| 199 |
+
'avg_uncertainty': np.mean([s.uncertainty for s in rank_subs]),
|
| 200 |
+
'count': len(rank_subs)
|
| 201 |
+
}
|
| 202 |
+
else:
|
| 203 |
+
rank_stats[rank] = {
|
| 204 |
+
'avg_accuracy': 0.0,
|
| 205 |
+
'avg_efficiency': 0.0,
|
| 206 |
+
'avg_uncertainty': 1.0,
|
| 207 |
+
'count': 0
|
| 208 |
+
}
|
| 209 |
+
|
| 210 |
+
return rank_stats
|
| 211 |
+
|
| 212 |
+
def _select_optimal_rank(
|
| 213 |
+
self, rank_performance: Dict[int, Dict[str, float]]
|
| 214 |
+
) -> int:
|
| 215 |
+
"""Select optimal rank based on performance"""
|
| 216 |
+
# Score each rank by efficiency and accuracy
|
| 217 |
+
scores = {}
|
| 218 |
+
|
| 219 |
+
for rank, stats in rank_performance.items():
|
| 220 |
+
if stats['count'] == 0:
|
| 221 |
+
scores[rank] = 0.0
|
| 222 |
+
else:
|
| 223 |
+
# Weighted score: 60% efficiency, 40% accuracy
|
| 224 |
+
scores[rank] = (
|
| 225 |
+
0.6 * stats['avg_efficiency'] * 1e8 + # Scale efficiency
|
| 226 |
+
0.4 * stats['avg_accuracy']
|
| 227 |
+
)
|
| 228 |
+
|
| 229 |
+
# Return rank with highest score
|
| 230 |
+
if not scores or max(scores.values()) == 0:
|
| 231 |
+
return 32 # Default
|
| 232 |
+
|
| 233 |
+
return max(scores, key=scores.get)
|
| 234 |
+
|
| 235 |
+
def _compute_recommendation_confidence(
|
| 236 |
+
self, submissions: List[SubmissionRecord], recommended_rank: int
|
| 237 |
+
) -> float:
|
| 238 |
+
"""Compute confidence in recommendation"""
|
| 239 |
+
# Confidence based on:
|
| 240 |
+
# - Number of submissions at recommended rank
|
| 241 |
+
# - Consistency of performance
|
| 242 |
+
# - Total submission count
|
| 243 |
+
|
| 244 |
+
rank_subs = [s for s in submissions if s.rank == recommended_rank]
|
| 245 |
+
|
| 246 |
+
if not rank_subs:
|
| 247 |
+
return 0.3 # Low confidence for untested rank
|
| 248 |
+
|
| 249 |
+
# Sample size factor
|
| 250 |
+
sample_factor = min(len(rank_subs) / 10.0, 1.0)
|
| 251 |
+
|
| 252 |
+
# Consistency factor (low variance in efficiency)
|
| 253 |
+
efficiencies = [s.efficiency for s in rank_subs]
|
| 254 |
+
if len(efficiencies) > 1:
|
| 255 |
+
consistency = 1.0 - min(np.std(efficiencies) / np.mean(efficiencies), 1.0)
|
| 256 |
+
else:
|
| 257 |
+
consistency = 0.5
|
| 258 |
+
|
| 259 |
+
# Experience factor
|
| 260 |
+
experience = min(len(submissions) / 20.0, 1.0)
|
| 261 |
+
|
| 262 |
+
confidence = 0.4 * sample_factor + 0.3 * consistency + 0.3 * experience
|
| 263 |
+
|
| 264 |
+
return float(np.clip(confidence, 0.0, 1.0))
|
| 265 |
+
|
| 266 |
+
def _generate_rationale(
|
| 267 |
+
self,
|
| 268 |
+
submissions: List[SubmissionRecord],
|
| 269 |
+
recommended_rank: int,
|
| 270 |
+
best_submission: SubmissionRecord
|
| 271 |
+
) -> str:
|
| 272 |
+
"""Generate human-readable rationale"""
|
| 273 |
+
rank_subs = [s for s in submissions if s.rank == recommended_rank]
|
| 274 |
+
|
| 275 |
+
if not rank_subs:
|
| 276 |
+
return (
|
| 277 |
+
f"Rank {recommended_rank} recommended based on interpolation "
|
| 278 |
+
f"from your best performance at rank {best_submission.rank} "
|
| 279 |
+
f"(efficiency: {best_submission.efficiency:.2e})"
|
| 280 |
+
)
|
| 281 |
+
|
| 282 |
+
avg_accuracy = np.mean([s.accuracy for s in rank_subs])
|
| 283 |
+
avg_efficiency = np.mean([s.efficiency for s in rank_subs])
|
| 284 |
+
|
| 285 |
+
return (
|
| 286 |
+
f"Rank {recommended_rank} shows best efficiency ({avg_efficiency:.2e}) "
|
| 287 |
+
f"with {len(rank_subs)} submissions averaging {avg_accuracy:.3f} accuracy. "
|
| 288 |
+
f"This balances compute cost and performance for your editing style."
|
| 289 |
+
)
|
| 290 |
+
|
| 291 |
+
def _get_unexplored_pairs(
|
| 292 |
+
self, contributor_id: str
|
| 293 |
+
) -> List[Tuple[int, str]]:
|
| 294 |
+
"""Get unexplored rank-language pairs"""
|
| 295 |
+
submissions = self.submission_history.get(contributor_id, [])
|
| 296 |
+
|
| 297 |
+
explored = set((s.rank, s.language) for s in submissions)
|
| 298 |
+
|
| 299 |
+
all_pairs = [
|
| 300 |
+
(rank, lang)
|
| 301 |
+
for rank in self.rank_options
|
| 302 |
+
for lang in self.language_options
|
| 303 |
+
]
|
| 304 |
+
|
| 305 |
+
unexplored = [pair for pair in all_pairs if pair not in explored]
|
| 306 |
+
|
| 307 |
+
# Prioritize by potential value
|
| 308 |
+
# Prefer: medium ranks, diverse languages
|
| 309 |
+
def priority_score(pair):
|
| 310 |
+
rank, lang = pair
|
| 311 |
+
rank_score = 1.0 - abs(rank - 64) / 128.0 # Prefer rank 64
|
| 312 |
+
|
| 313 |
+
# Prefer low-resource languages (more impact)
|
| 314 |
+
low_resource = ['indonesian', 'vietnamese', 'thai', 'swahili', 'yoruba']
|
| 315 |
+
lang_score = 1.5 if lang in low_resource else 1.0
|
| 316 |
+
|
| 317 |
+
return rank_score * lang_score
|
| 318 |
+
|
| 319 |
+
unexplored.sort(key=priority_score, reverse=True)
|
| 320 |
+
|
| 321 |
+
return unexplored
|
| 322 |
+
|
| 323 |
+
def _predict_efficiency(
|
| 324 |
+
self, submissions: List[SubmissionRecord], rank: int
|
| 325 |
+
) -> float:
|
| 326 |
+
"""Predict efficiency at given rank"""
|
| 327 |
+
# Simple linear interpolation from existing data
|
| 328 |
+
rank_subs = [s for s in submissions if s.rank == rank]
|
| 329 |
+
|
| 330 |
+
if rank_subs:
|
| 331 |
+
return np.mean([s.efficiency for s in rank_subs])
|
| 332 |
+
|
| 333 |
+
# Interpolate from nearby ranks
|
| 334 |
+
nearby_ranks = sorted([s.rank for s in submissions])
|
| 335 |
+
|
| 336 |
+
if not nearby_ranks:
|
| 337 |
+
return 0.0
|
| 338 |
+
|
| 339 |
+
# Find closest ranks
|
| 340 |
+
lower = [r for r in nearby_ranks if r < rank]
|
| 341 |
+
upper = [r for r in nearby_ranks if r > rank]
|
| 342 |
+
|
| 343 |
+
if lower and upper:
|
| 344 |
+
lower_rank = max(lower)
|
| 345 |
+
upper_rank = min(upper)
|
| 346 |
+
|
| 347 |
+
lower_eff = np.mean([
|
| 348 |
+
s.efficiency for s in submissions if s.rank == lower_rank
|
| 349 |
+
])
|
| 350 |
+
upper_eff = np.mean([
|
| 351 |
+
s.efficiency for s in submissions if s.rank == upper_rank
|
| 352 |
+
])
|
| 353 |
+
|
| 354 |
+
# Linear interpolation
|
| 355 |
+
weight = (rank - lower_rank) / (upper_rank - lower_rank)
|
| 356 |
+
return lower_eff * (1 - weight) + upper_eff * weight
|
| 357 |
+
|
| 358 |
+
# Use closest available rank
|
| 359 |
+
closest_rank = min(nearby_ranks, key=lambda r: abs(r - rank))
|
| 360 |
+
return np.mean([s.efficiency for s in submissions if s.rank == closest_rank])
|
| 361 |
+
|
| 362 |
+
def _assign_badge(self, submissions: List[SubmissionRecord]) -> str:
|
| 363 |
+
"""Assign personalized badge based on performance"""
|
| 364 |
+
if not submissions:
|
| 365 |
+
return "π Newcomer"
|
| 366 |
+
|
| 367 |
+
# Analyze submission characteristics
|
| 368 |
+
total_subs = len(submissions)
|
| 369 |
+
unique_langs = len(set(s.language for s in submissions))
|
| 370 |
+
unique_ranks = len(set(s.rank for s in submissions))
|
| 371 |
+
avg_accuracy = np.mean([s.accuracy for s in submissions])
|
| 372 |
+
avg_efficiency = np.mean([s.efficiency for s in submissions])
|
| 373 |
+
|
| 374 |
+
# Badge criteria
|
| 375 |
+
if total_subs >= 50 and unique_langs >= 10:
|
| 376 |
+
return "π Master Contributor"
|
| 377 |
+
elif avg_efficiency > 1e-7:
|
| 378 |
+
return "β‘ Efficiency Expert"
|
| 379 |
+
elif avg_accuracy > 0.95:
|
| 380 |
+
return "π― Accuracy Champion"
|
| 381 |
+
elif unique_ranks >= 5:
|
| 382 |
+
return "π¬ Rank Explorer"
|
| 383 |
+
elif unique_langs >= 8:
|
| 384 |
+
return "π Multilingual Specialist"
|
| 385 |
+
elif total_subs >= 20:
|
| 386 |
+
return "πͺ Active Contributor"
|
| 387 |
+
elif total_subs >= 10:
|
| 388 |
+
return "π Rising Star"
|
| 389 |
+
else:
|
| 390 |
+
return "π Getting Started"
|
| 391 |
+
|
| 392 |
+
def generate_feedback_panel(
|
| 393 |
+
self, contributor_id: str
|
| 394 |
+
) -> Dict[str, any]:
|
| 395 |
+
"""
|
| 396 |
+
Generate comprehensive feedback panel for dashboard.
|
| 397 |
+
|
| 398 |
+
Returns:
|
| 399 |
+
Dict with recommendations, stats, and suggestions
|
| 400 |
+
"""
|
| 401 |
+
submissions = self.submission_history.get(contributor_id, [])
|
| 402 |
+
recommendation = self.recommend_rank(contributor_id)
|
| 403 |
+
|
| 404 |
+
if not submissions:
|
| 405 |
+
return {
|
| 406 |
+
'recommendation': recommendation,
|
| 407 |
+
'stats': {},
|
| 408 |
+
'suggestions': [
|
| 409 |
+
"Start with rank 32 for balanced performance",
|
| 410 |
+
"Try high-resource languages (English, Chinese) first",
|
| 411 |
+
"Focus on accuracy before optimizing efficiency"
|
| 412 |
+
]
|
| 413 |
+
}
|
| 414 |
+
|
| 415 |
+
# Compute statistics
|
| 416 |
+
stats = {
|
| 417 |
+
'total_submissions': len(submissions),
|
| 418 |
+
'unique_languages': len(set(s.language for s in submissions)),
|
| 419 |
+
'unique_ranks': len(set(s.rank for s in submissions)),
|
| 420 |
+
'avg_accuracy': float(np.mean([s.accuracy for s in submissions])),
|
| 421 |
+
'avg_efficiency': float(np.mean([s.efficiency for s in submissions])),
|
| 422 |
+
'best_accuracy': float(max(s.accuracy for s in submissions)),
|
| 423 |
+
'best_efficiency': float(max(s.efficiency for s in submissions))
|
| 424 |
+
}
|
| 425 |
+
|
| 426 |
+
# Generate suggestions
|
| 427 |
+
suggestions = self._generate_suggestions(submissions, recommendation)
|
| 428 |
+
|
| 429 |
+
return {
|
| 430 |
+
'recommendation': recommendation,
|
| 431 |
+
'stats': stats,
|
| 432 |
+
'suggestions': suggestions
|
| 433 |
+
}
|
| 434 |
+
|
| 435 |
+
|
| 436 |
+
def _generate_suggestions(
|
| 437 |
+
self,
|
| 438 |
+
submissions: List[SubmissionRecord],
|
| 439 |
+
recommendation: RankRecommendation
|
| 440 |
+
) -> List[str]:
|
| 441 |
+
"""Generate actionable suggestions"""
|
| 442 |
+
suggestions = []
|
| 443 |
+
|
| 444 |
+
# Analyze gaps
|
| 445 |
+
tested_ranks = set(s.rank for s in submissions)
|
| 446 |
+
tested_langs = set(s.language for s in submissions)
|
| 447 |
+
|
| 448 |
+
# Rank diversity
|
| 449 |
+
if len(tested_ranks) < 3:
|
| 450 |
+
suggestions.append(
|
| 451 |
+
f"Try exploring more ranks - you've only tested {len(tested_ranks)} so far"
|
| 452 |
+
)
|
| 453 |
+
|
| 454 |
+
# Language diversity
|
| 455 |
+
low_resource = ['indonesian', 'vietnamese', 'thai', 'swahili', 'yoruba']
|
| 456 |
+
tested_low_resource = [l for l in tested_langs if l in low_resource]
|
| 457 |
+
|
| 458 |
+
if len(tested_low_resource) < 2:
|
| 459 |
+
suggestions.append(
|
| 460 |
+
"Consider testing low-resource languages for higher impact"
|
| 461 |
+
)
|
| 462 |
+
|
| 463 |
+
# Efficiency optimization
|
| 464 |
+
avg_efficiency = np.mean([s.efficiency for s in submissions])
|
| 465 |
+
if avg_efficiency < 5e-8:
|
| 466 |
+
suggestions.append(
|
| 467 |
+
"Focus on efficiency - try lower ranks to reduce FLOPs"
|
| 468 |
+
)
|
| 469 |
+
|
| 470 |
+
# Accuracy improvement
|
| 471 |
+
avg_accuracy = np.mean([s.accuracy for s in submissions])
|
| 472 |
+
if avg_accuracy < 0.85:
|
| 473 |
+
suggestions.append(
|
| 474 |
+
"Accuracy could be improved - try higher ranks or refine your edits"
|
| 475 |
+
)
|
| 476 |
+
|
| 477 |
+
# Unexplored pairs
|
| 478 |
+
if recommendation.unexplored_pairs:
|
| 479 |
+
top_pair = recommendation.unexplored_pairs[0]
|
| 480 |
+
suggestions.append(
|
| 481 |
+
f"High-value opportunity: Try rank {top_pair[0]} with {top_pair[1]}"
|
| 482 |
+
)
|
| 483 |
+
|
| 484 |
+
return suggestions[:5] # Top 5 suggestions
|