Nurcholish commited on
Commit
db27299
Β·
verified Β·
1 Parent(s): 6b08db7

Upload 4 files

Browse files
backend_telemetry_rank_adapter.py ADDED
File without changes
edit_propagation_engine.py ADDED
@@ -0,0 +1,398 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Cross-Lingual Edit Propagation via Subspace Containment
4
+ Transfer high-resource corrections to low-resource languages using containment scores
5
+
6
+ Based on:
7
+ Zhang, Y., et al. (2024). "Deep Hierarchical Learning with Nested Subspace Networks."
8
+ arXiv preprint. NSN framework for hierarchical representation learning.
9
+ """
10
+ import numpy as np
11
+ from typing import Dict, List, Optional, Tuple
12
+ from dataclasses import dataclass
13
+ import logging
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ @dataclass
19
+ class ContainmentScore:
20
+ """Subspace containment analysis result"""
21
+ source_lang: str
22
+ target_lang: str
23
+ rank: int
24
+ containment_score: float # 0-1, how much target is contained in source
25
+ overlap_dimension: int # Dimension of overlap
26
+ confidence: float
27
+ propagation_recommended: bool
28
+
29
+
30
+ @dataclass
31
+ class PropagationResult:
32
+ """Result of edit propagation"""
33
+ source_lang: str
34
+ target_lang: str
35
+ rank: int
36
+ edit_vector: np.ndarray
37
+ propagated_vector: np.ndarray
38
+ containment_score: float
39
+ success: bool
40
+ quality_score: float # Predicted quality after propagation
41
+ propagation_path: List[str] # Languages in propagation chain
42
+
43
+
44
+ class EditPropagationEngine:
45
+ """
46
+ Transfer edits from high-resource to low-resource languages using
47
+ subspace containment analysis.
48
+
49
+ Dashboard Extension:
50
+ - Heatmap of containment scores across language pairs
51
+ - Flow arrows showing edit propagation paths
52
+ """
53
+
54
+ def __init__(self):
55
+ self.language_embeddings = self._initialize_language_embeddings()
56
+ self.containment_cache: Dict[Tuple[str, str, int], ContainmentScore] = {}
57
+ self.propagation_history: List[PropagationResult] = []
58
+
59
+ def _initialize_language_embeddings(self) -> Dict[str, np.ndarray]:
60
+ """Initialize language subspace embeddings"""
61
+ # Simulated language embeddings (in practice, learned from data)
62
+ np.random.seed(42)
63
+
64
+ languages = {
65
+ # High-resource languages (larger subspaces)
66
+ 'english': np.random.randn(256),
67
+ 'chinese': np.random.randn(256),
68
+ 'spanish': np.random.randn(256),
69
+ 'french': np.random.randn(256),
70
+ 'german': np.random.randn(256),
71
+
72
+ # Medium-resource languages
73
+ 'russian': np.random.randn(256),
74
+ 'arabic': np.random.randn(256),
75
+ 'japanese': np.random.randn(256),
76
+ 'korean': np.random.randn(256),
77
+ 'portuguese': np.random.randn(256),
78
+
79
+ # Low-resource languages (smaller subspaces)
80
+ 'indonesian': np.random.randn(256),
81
+ 'vietnamese': np.random.randn(256),
82
+ 'thai': np.random.randn(256),
83
+ 'swahili': np.random.randn(256),
84
+ 'yoruba': np.random.randn(256)
85
+ }
86
+
87
+ # Normalize embeddings
88
+ for lang in languages:
89
+ languages[lang] = languages[lang] / np.linalg.norm(languages[lang])
90
+
91
+ return languages
92
+
93
+ def evaluate_subspace_containment(
94
+ self,
95
+ source_lang: str,
96
+ target_lang: str,
97
+ rank: int
98
+ ) -> ContainmentScore:
99
+ """
100
+ Evaluate how much target language subspace is contained in source.
101
+
102
+ Args:
103
+ source_lang: High-resource source language
104
+ target_lang: Low-resource target language
105
+ rank: NSN rank for analysis
106
+
107
+ Returns:
108
+ ContainmentScore with containment metrics
109
+ """
110
+ cache_key = (source_lang, target_lang, rank)
111
+ if cache_key in self.containment_cache:
112
+ return self.containment_cache[cache_key]
113
+
114
+ # Get language embeddings
115
+ source_emb = self.language_embeddings.get(source_lang)
116
+ target_emb = self.language_embeddings.get(target_lang)
117
+
118
+ if source_emb is None or target_emb is None:
119
+ logger.warning(f"Unknown language: {source_lang} or {target_lang}")
120
+ return ContainmentScore(
121
+ source_lang=source_lang,
122
+ target_lang=target_lang,
123
+ rank=rank,
124
+ containment_score=0.0,
125
+ overlap_dimension=0,
126
+ confidence=0.0,
127
+ propagation_recommended=False
128
+ )
129
+
130
+ # Compute containment via projection
131
+ # Truncate to rank dimension
132
+ source_subspace = source_emb[:rank]
133
+ target_subspace = target_emb[:rank]
134
+
135
+ # Containment score: cosine similarity in rank-dimensional subspace
136
+ containment = float(np.dot(source_subspace, target_subspace))
137
+ containment = (containment + 1.0) / 2.0 # Normalize to [0, 1]
138
+
139
+ # Overlap dimension: effective rank of shared subspace
140
+ overlap_dim = int(rank * containment)
141
+
142
+ # Confidence based on rank and language resource levels
143
+ confidence = self._compute_containment_confidence(
144
+ source_lang, target_lang, rank, containment
145
+ )
146
+
147
+ # Recommend propagation if containment > 0.75 and confidence > 0.7
148
+ propagation_recommended = containment > 0.75 and confidence > 0.7
149
+
150
+ result = ContainmentScore(
151
+ source_lang=source_lang,
152
+ target_lang=target_lang,
153
+ rank=rank,
154
+ containment_score=containment,
155
+ overlap_dimension=overlap_dim,
156
+ confidence=confidence,
157
+ propagation_recommended=propagation_recommended
158
+ )
159
+
160
+ self.containment_cache[cache_key] = result
161
+ return result
162
+
163
+ def _compute_containment_confidence(
164
+ self,
165
+ source_lang: str,
166
+ target_lang: str,
167
+ rank: int,
168
+ containment: float
169
+ ) -> float:
170
+ """Compute confidence in containment score"""
171
+ # Higher confidence for:
172
+ # - Higher ranks (more dimensions to analyze)
173
+ # - Higher containment scores
174
+ # - Related language families
175
+
176
+ rank_factor = min(rank / 128.0, 1.0)
177
+ containment_factor = containment
178
+
179
+ # Language family bonus (simplified)
180
+ family_bonus = 0.0
181
+ if (source_lang in ['english', 'german', 'french', 'spanish'] and
182
+ target_lang in ['english', 'german', 'french', 'spanish']):
183
+ family_bonus = 0.1
184
+
185
+ confidence = 0.5 * rank_factor + 0.4 * containment_factor + family_bonus
186
+ return float(np.clip(confidence, 0.0, 1.0))
187
+
188
+ def propagate_edit(
189
+ self,
190
+ source_lang: str,
191
+ target_lang: str,
192
+ rank: int,
193
+ edit_vector: np.ndarray
194
+ ) -> PropagationResult:
195
+ """
196
+ Propagate edit from source to target language.
197
+
198
+ Args:
199
+ source_lang: Source language
200
+ target_lang: Target language
201
+ rank: NSN rank
202
+ edit_vector: Edit vector in source language
203
+
204
+ Returns:
205
+ PropagationResult with propagated edit
206
+ """
207
+ # Evaluate containment
208
+ containment = self.evaluate_subspace_containment(
209
+ source_lang, target_lang, rank
210
+ )
211
+
212
+ if not containment.propagation_recommended:
213
+ logger.warning(
214
+ f"Propagation not recommended: {source_lang} β†’ {target_lang} "
215
+ f"(containment: {containment.containment_score:.3f})"
216
+ )
217
+
218
+ result = PropagationResult(
219
+ source_lang=source_lang,
220
+ target_lang=target_lang,
221
+ rank=rank,
222
+ edit_vector=edit_vector,
223
+ propagated_vector=np.zeros_like(edit_vector),
224
+ containment_score=containment.containment_score,
225
+ success=False,
226
+ quality_score=0.0,
227
+ propagation_path=[source_lang, target_lang]
228
+ )
229
+
230
+ self.propagation_history.append(result)
231
+ return result
232
+
233
+ # Propagate edit via subspace projection
234
+ propagated_vector = self._transfer_edit(
235
+ edit_vector, source_lang, target_lang, rank
236
+ )
237
+
238
+ # Compute quality score
239
+ quality_score = self._compute_propagation_quality(
240
+ edit_vector, propagated_vector, containment.containment_score
241
+ )
242
+
243
+ result = PropagationResult(
244
+ source_lang=source_lang,
245
+ target_lang=target_lang,
246
+ rank=rank,
247
+ edit_vector=edit_vector,
248
+ propagated_vector=propagated_vector,
249
+ containment_score=containment.containment_score,
250
+ success=True,
251
+ quality_score=quality_score,
252
+ propagation_path=[source_lang, target_lang]
253
+ )
254
+
255
+ self.propagation_history.append(result)
256
+ logger.info(
257
+ f"Propagated edit: {source_lang} β†’ {target_lang} "
258
+ f"(quality: {quality_score:.3f})"
259
+ )
260
+
261
+ return result
262
+
263
+ def _transfer_edit(
264
+ self,
265
+ edit_vector: np.ndarray,
266
+ source_lang: str,
267
+ target_lang: str,
268
+ rank: int
269
+ ) -> np.ndarray:
270
+ """Transfer edit vector from source to target language"""
271
+ # Get language embeddings
272
+ source_emb = self.language_embeddings[source_lang]
273
+ target_emb = self.language_embeddings[target_lang]
274
+
275
+ # Project edit onto shared subspace
276
+ # Simplified: weighted combination based on containment
277
+ source_subspace = source_emb[:rank]
278
+ target_subspace = target_emb[:rank]
279
+
280
+ # Compute transfer matrix (simplified)
281
+ transfer_weight = np.dot(source_subspace, target_subspace)
282
+
283
+ # Apply transfer
284
+ propagated = edit_vector * transfer_weight
285
+
286
+ return propagated
287
+
288
+ def _compute_propagation_quality(
289
+ self,
290
+ original: np.ndarray,
291
+ propagated: np.ndarray,
292
+ containment: float
293
+ ) -> float:
294
+ """Compute quality of propagated edit"""
295
+ # Quality based on:
296
+ # - Containment score
297
+ # - Vector similarity
298
+ # - Magnitude preservation
299
+
300
+ if np.linalg.norm(propagated) < 1e-6:
301
+ return 0.0
302
+
303
+ # Cosine similarity
304
+ similarity = np.dot(original, propagated) / (
305
+ np.linalg.norm(original) * np.linalg.norm(propagated)
306
+ )
307
+ similarity = (similarity + 1.0) / 2.0 # Normalize to [0, 1]
308
+
309
+ # Magnitude preservation
310
+ mag_ratio = np.linalg.norm(propagated) / np.linalg.norm(original)
311
+ mag_score = 1.0 - abs(1.0 - mag_ratio)
312
+
313
+ # Combined quality
314
+ quality = 0.5 * containment + 0.3 * similarity + 0.2 * mag_score
315
+
316
+ return float(np.clip(quality, 0.0, 1.0))
317
+
318
+ def compute_containment_heatmap(
319
+ self,
320
+ languages: List[str],
321
+ rank: int
322
+ ) -> np.ndarray:
323
+ """
324
+ Compute containment heatmap for dashboard visualization.
325
+
326
+ Args:
327
+ languages: List of languages to analyze
328
+ rank: NSN rank
329
+
330
+ Returns:
331
+ Heatmap matrix (languages x languages)
332
+ """
333
+ n = len(languages)
334
+ heatmap = np.zeros((n, n))
335
+
336
+ for i, source in enumerate(languages):
337
+ for j, target in enumerate(languages):
338
+ if i == j:
339
+ heatmap[i, j] = 1.0
340
+ else:
341
+ containment = self.evaluate_subspace_containment(
342
+ source, target, rank
343
+ )
344
+ heatmap[i, j] = containment.containment_score
345
+
346
+ return heatmap
347
+
348
+ def find_propagation_paths(
349
+ self,
350
+ source_lang: str,
351
+ target_langs: List[str],
352
+ rank: int,
353
+ min_containment: float = 0.75
354
+ ) -> Dict[str, List[str]]:
355
+ """
356
+ Find optimal propagation paths from source to multiple targets.
357
+
358
+ Returns:
359
+ Dict mapping target language to propagation path
360
+ """
361
+ paths = {}
362
+
363
+ for target in target_langs:
364
+ # Direct path
365
+ direct_containment = self.evaluate_subspace_containment(
366
+ source_lang, target, rank
367
+ )
368
+
369
+ if direct_containment.containment_score >= min_containment:
370
+ paths[target] = [source_lang, target]
371
+ else:
372
+ # Try indirect path through intermediate language
373
+ best_path = None
374
+ best_score = 0.0
375
+
376
+ for intermediate in self.language_embeddings.keys():
377
+ if intermediate in [source_lang, target]:
378
+ continue
379
+
380
+ c1 = self.evaluate_subspace_containment(
381
+ source_lang, intermediate, rank
382
+ )
383
+ c2 = self.evaluate_subspace_containment(
384
+ intermediate, target, rank
385
+ )
386
+
387
+ combined_score = c1.containment_score * c2.containment_score
388
+
389
+ if combined_score > best_score and combined_score >= min_containment:
390
+ best_score = combined_score
391
+ best_path = [source_lang, intermediate, target]
392
+
393
+ if best_path:
394
+ paths[target] = best_path
395
+ else:
396
+ paths[target] = [] # No viable path
397
+
398
+ return paths
ensemble_inference_manager.py ADDED
@@ -0,0 +1,400 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Ensemble Inference Across Backends
4
+ Run edits across multiple backends and compute agreement scores
5
+
6
+ """
7
+ import numpy as np
8
+ from typing import Dict, List, Optional, Tuple
9
+ from dataclasses import dataclass
10
+ import logging
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ @dataclass
16
+ class BackendResult:
17
+ """Result from a single backend"""
18
+ backend_id: str
19
+ edit_vector: np.ndarray
20
+ output: np.ndarray
21
+ confidence: float
22
+ latency: float # seconds
23
+ success: bool
24
+ error_message: Optional[str] = None
25
+
26
+
27
+ @dataclass
28
+ class EnsembleResult:
29
+ """Result from ensemble inference"""
30
+ edit_vector: np.ndarray
31
+ backend_results: List[BackendResult]
32
+ consensus_output: np.ndarray
33
+ agreement_score: float
34
+ reliability_boost: float
35
+ agreement_matrix: np.ndarray
36
+ best_backend: str
37
+ ensemble_confidence: float
38
+
39
+
40
+ class EnsembleInferenceManager:
41
+ """
42
+ Run edits across multiple quantum backends and compute agreement scores.
43
+
44
+ Dashboard Extension:
45
+ - Agreement matrix across backends
46
+ - Reliability boost from ensemble consensus
47
+ """
48
+
49
+ def __init__(self):
50
+ self.backend_configs = self._initialize_backend_configs()
51
+ self.inference_history: List[EnsembleResult] = []
52
+
53
+ def _initialize_backend_configs(self) -> Dict[str, Dict]:
54
+ """Initialize backend configurations"""
55
+ return {
56
+ 'ibm_manila': {
57
+ 'qubits': 5,
58
+ 'error_rate': 0.08,
59
+ 'gate_fidelity': 0.92,
60
+ 'coherence_time': 30.0,
61
+ 'base_latency': 0.05
62
+ },
63
+ 'ibm_washington': {
64
+ 'qubits': 127,
65
+ 'error_rate': 0.02,
66
+ 'gate_fidelity': 0.98,
67
+ 'coherence_time': 120.0,
68
+ 'base_latency': 0.15
69
+ },
70
+ 'russian_simulator': {
71
+ 'qubits': 256,
72
+ 'error_rate': 0.001,
73
+ 'gate_fidelity': 0.999,
74
+ 'coherence_time': 1000.0,
75
+ 'base_latency': 0.30
76
+ },
77
+ 'ibm_kyoto': {
78
+ 'qubits': 127,
79
+ 'error_rate': 0.025,
80
+ 'gate_fidelity': 0.975,
81
+ 'coherence_time': 100.0,
82
+ 'base_latency': 0.12
83
+ },
84
+ 'google_sycamore': {
85
+ 'qubits': 53,
86
+ 'error_rate': 0.015,
87
+ 'gate_fidelity': 0.985,
88
+ 'coherence_time': 80.0,
89
+ 'base_latency': 0.08
90
+ }
91
+ }
92
+
93
+ def run_ensemble_inference(
94
+ self,
95
+ edit_vector: np.ndarray,
96
+ backend_list: List[str]
97
+ ) -> EnsembleResult:
98
+ """
99
+ Run inference across multiple backends and compute ensemble result.
100
+
101
+ Args:
102
+ edit_vector: Edit vector to apply
103
+ backend_list: List of backend IDs (e.g., ['ibm_manila', 'ibm_washington'])
104
+
105
+ Returns:
106
+ EnsembleResult with consensus and agreement metrics
107
+ """
108
+ # Run inference on each backend
109
+ backend_results = []
110
+
111
+ for backend_id in backend_list:
112
+ result = self._run_single_backend(backend_id, edit_vector)
113
+ backend_results.append(result)
114
+
115
+ # Compute agreement matrix
116
+ agreement_matrix = self._compute_agreement_matrix(backend_results)
117
+
118
+ # Compute consensus output
119
+ consensus_output = self._compute_consensus(backend_results)
120
+
121
+ # Compute overall agreement score
122
+ agreement_score = self._compute_overall_agreement(agreement_matrix)
123
+
124
+ # Compute reliability boost
125
+ reliability_boost = self._compute_reliability_boost(
126
+ backend_results, agreement_score
127
+ )
128
+
129
+ # Find best backend
130
+ best_backend = self._select_best_backend(backend_results)
131
+
132
+ # Compute ensemble confidence
133
+ ensemble_confidence = self._compute_ensemble_confidence(
134
+ backend_results, agreement_score
135
+ )
136
+
137
+ result = EnsembleResult(
138
+ edit_vector=edit_vector,
139
+ backend_results=backend_results,
140
+ consensus_output=consensus_output,
141
+ agreement_score=agreement_score,
142
+ reliability_boost=reliability_boost,
143
+ agreement_matrix=agreement_matrix,
144
+ best_backend=best_backend,
145
+ ensemble_confidence=ensemble_confidence
146
+ )
147
+
148
+ self.inference_history.append(result)
149
+
150
+ logger.info(
151
+ f"Ensemble inference complete: {len(backend_list)} backends, "
152
+ f"agreement: {agreement_score:.3f}, boost: {reliability_boost:.3f}"
153
+ )
154
+
155
+ return result
156
+
157
+ def _run_single_backend(
158
+ self, backend_id: str, edit_vector: np.ndarray
159
+ ) -> BackendResult:
160
+ """Run inference on a single backend"""
161
+ config = self.backend_configs.get(backend_id)
162
+
163
+ if config is None:
164
+ logger.warning(f"Unknown backend: {backend_id}")
165
+ return BackendResult(
166
+ backend_id=backend_id,
167
+ edit_vector=edit_vector,
168
+ output=np.zeros_like(edit_vector),
169
+ confidence=0.0,
170
+ latency=0.0,
171
+ success=False,
172
+ error_message=f"Unknown backend: {backend_id}"
173
+ )
174
+
175
+ # Simulate inference with backend-specific noise
176
+ noise_level = config['error_rate']
177
+ noise = np.random.randn(*edit_vector.shape) * noise_level
178
+
179
+ output = edit_vector + noise
180
+
181
+ # Confidence based on gate fidelity
182
+ confidence = config['gate_fidelity']
183
+
184
+ # Latency based on backend and vector size
185
+ latency = config['base_latency'] * (1 + len(edit_vector) / 1000.0)
186
+
187
+ return BackendResult(
188
+ backend_id=backend_id,
189
+ edit_vector=edit_vector,
190
+ output=output,
191
+ confidence=confidence,
192
+ latency=latency,
193
+ success=True
194
+ )
195
+
196
+ def _compute_agreement_matrix(
197
+ self, results: List[BackendResult]
198
+ ) -> np.ndarray:
199
+ """Compute pairwise agreement matrix between backends"""
200
+ n = len(results)
201
+ agreement_matrix = np.zeros((n, n))
202
+
203
+ for i in range(n):
204
+ for j in range(n):
205
+ if i == j:
206
+ agreement_matrix[i, j] = 1.0
207
+ else:
208
+ # Cosine similarity between outputs
209
+ output_i = results[i].output
210
+ output_j = results[j].output
211
+
212
+ if np.linalg.norm(output_i) < 1e-6 or np.linalg.norm(output_j) < 1e-6:
213
+ agreement_matrix[i, j] = 0.0
214
+ else:
215
+ similarity = np.dot(output_i, output_j) / (
216
+ np.linalg.norm(output_i) * np.linalg.norm(output_j)
217
+ )
218
+ # Normalize to [0, 1]
219
+ agreement_matrix[i, j] = (similarity + 1.0) / 2.0
220
+
221
+ return agreement_matrix
222
+
223
+ def _compute_consensus(
224
+ self, results: List[BackendResult]
225
+ ) -> np.ndarray:
226
+ """Compute consensus output from all backends"""
227
+ successful_results = [r for r in results if r.success]
228
+
229
+ if not successful_results:
230
+ return np.zeros_like(results[0].edit_vector)
231
+
232
+ # Weighted average by confidence
233
+ total_confidence = sum(r.confidence for r in successful_results)
234
+
235
+ if total_confidence < 1e-6:
236
+ # Unweighted average
237
+ outputs = [r.output for r in successful_results]
238
+ return np.mean(outputs, axis=0)
239
+
240
+ # Confidence-weighted average
241
+ consensus = np.zeros_like(successful_results[0].output)
242
+
243
+ for result in successful_results:
244
+ weight = result.confidence / total_confidence
245
+ consensus += weight * result.output
246
+
247
+ return consensus
248
+
249
+ def _compute_overall_agreement(self, agreement_matrix: np.ndarray) -> float:
250
+ """Compute overall agreement score from matrix"""
251
+ # Average of off-diagonal elements
252
+ n = agreement_matrix.shape[0]
253
+
254
+ if n <= 1:
255
+ return 1.0
256
+
257
+ # Sum off-diagonal elements
258
+ total = 0.0
259
+ count = 0
260
+
261
+ for i in range(n):
262
+ for j in range(n):
263
+ if i != j:
264
+ total += agreement_matrix[i, j]
265
+ count += 1
266
+
267
+ return total / count if count > 0 else 0.0
268
+
269
+ def _compute_reliability_boost(
270
+ self, results: List[BackendResult], agreement_score: float
271
+ ) -> float:
272
+ """
273
+ Compute reliability boost from ensemble consensus.
274
+
275
+ Boost is higher when:
276
+ - More backends agree
277
+ - Individual backends have high confidence
278
+ - Agreement score is high
279
+ """
280
+ if not results:
281
+ return 0.0
282
+
283
+ # Average individual confidence
284
+ avg_confidence = np.mean([r.confidence for r in results if r.success])
285
+
286
+ # Ensemble size factor
287
+ ensemble_factor = min(len(results) / 5.0, 1.0)
288
+
289
+ # Boost formula
290
+ boost = (
291
+ 0.4 * agreement_score +
292
+ 0.3 * avg_confidence +
293
+ 0.3 * ensemble_factor
294
+ )
295
+
296
+ return float(np.clip(boost, 0.0, 1.0))
297
+
298
+ def _select_best_backend(self, results: List[BackendResult]) -> str:
299
+ """Select best backend based on confidence and success"""
300
+ successful_results = [r for r in results if r.success]
301
+
302
+ if not successful_results:
303
+ return results[0].backend_id if results else "none"
304
+
305
+ # Score by confidence and inverse latency
306
+ scores = {}
307
+
308
+ for result in successful_results:
309
+ scores[result.backend_id] = (
310
+ 0.7 * result.confidence +
311
+ 0.3 * (1.0 / (1.0 + result.latency))
312
+ )
313
+
314
+ return max(scores, key=scores.get)
315
+
316
+ def _compute_ensemble_confidence(
317
+ self, results: List[BackendResult], agreement_score: float
318
+ ) -> float:
319
+ """Compute overall ensemble confidence"""
320
+ if not results:
321
+ return 0.0
322
+
323
+ # Combine individual confidences with agreement
324
+ avg_confidence = np.mean([r.confidence for r in results if r.success])
325
+
326
+ # Ensemble confidence is boosted by agreement
327
+ ensemble_confidence = 0.6 * avg_confidence + 0.4 * agreement_score
328
+
329
+ return float(np.clip(ensemble_confidence, 0.0, 1.0))
330
+
331
+ def compare_backends(
332
+ self, edit_vectors: List[np.ndarray]
333
+ ) -> Dict[str, Dict[str, float]]:
334
+ """
335
+ Compare all backends across multiple edit vectors.
336
+
337
+ Returns:
338
+ Dict mapping backend_id to performance metrics
339
+ """
340
+ backend_stats = {
341
+ backend_id: {
342
+ 'avg_confidence': [],
343
+ 'avg_latency': [],
344
+ 'success_rate': []
345
+ }
346
+ for backend_id in self.backend_configs.keys()
347
+ }
348
+
349
+ for edit_vector in edit_vectors:
350
+ for backend_id in self.backend_configs.keys():
351
+ result = self._run_single_backend(backend_id, edit_vector)
352
+
353
+ backend_stats[backend_id]['avg_confidence'].append(result.confidence)
354
+ backend_stats[backend_id]['avg_latency'].append(result.latency)
355
+ backend_stats[backend_id]['success_rate'].append(1.0 if result.success else 0.0)
356
+
357
+ # Compute averages
358
+ comparison = {}
359
+
360
+ for backend_id, stats in backend_stats.items():
361
+ comparison[backend_id] = {
362
+ 'avg_confidence': float(np.mean(stats['avg_confidence'])),
363
+ 'avg_latency': float(np.mean(stats['avg_latency'])),
364
+ 'success_rate': float(np.mean(stats['success_rate']))
365
+ }
366
+
367
+ return comparison
368
+
369
+ def get_agreement_heatmap(
370
+ self, backend_list: List[str], edit_vector: np.ndarray
371
+ ) -> Tuple[np.ndarray, List[str]]:
372
+ """
373
+ Get agreement heatmap for visualization.
374
+
375
+ Returns:
376
+ Tuple of (agreement_matrix, backend_labels)
377
+ """
378
+ result = self.run_ensemble_inference(edit_vector, backend_list)
379
+ return result.agreement_matrix, backend_list
380
+
381
+ def compute_reliability_metrics(self) -> Dict[str, float]:
382
+ """Compute overall reliability metrics from history"""
383
+ if not self.inference_history:
384
+ return {
385
+ 'avg_agreement': 0.0,
386
+ 'avg_reliability_boost': 0.0,
387
+ 'avg_ensemble_confidence': 0.0
388
+ }
389
+
390
+ return {
391
+ 'avg_agreement': float(np.mean([
392
+ r.agreement_score for r in self.inference_history
393
+ ])),
394
+ 'avg_reliability_boost': float(np.mean([
395
+ r.reliability_boost for r in self.inference_history
396
+ ])),
397
+ 'avg_ensemble_confidence': float(np.mean([
398
+ r.ensemble_confidence for r in self.inference_history
399
+ ]))
400
+ }
rank_feedback_generator.py ADDED
@@ -0,0 +1,484 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Contributor-Aware Rank Feedback Loop
4
+ Recommend optimal ranks based on contributor history and efficiency
5
+
6
+ Based on:
7
+ Zhang, Y., et al. (2024). "Deep Hierarchical Learning with Nested Subspace Networks."
8
+ arXiv preprint. NSN framework for hierarchical representation learning.
9
+ """
10
+ import numpy as np
11
+ from typing import Dict, List, Optional, Tuple
12
+ from dataclasses import dataclass
13
+ import logging
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ @dataclass
19
+ class SubmissionRecord:
20
+ """Record of a contributor submission"""
21
+ contributor_id: str
22
+ language: str
23
+ rank: int
24
+ accuracy: float
25
+ flops: float
26
+ uncertainty: float
27
+ timestamp: str
28
+ efficiency: float # accuracy / flops
29
+
30
+
31
+ @dataclass
32
+ class RankRecommendation:
33
+ """Rank recommendation for contributor"""
34
+ contributor_id: str
35
+ recommended_rank: int
36
+ confidence: float
37
+ rationale: str
38
+ unexplored_pairs: List[Tuple[int, str]] # (rank, language) pairs
39
+ efficiency_prediction: float
40
+ personalized_badge: str
41
+
42
+
43
+ class RankFeedbackGenerator:
44
+ """
45
+ Recommend optimal ranks based on contributor history and efficiency.
46
+
47
+ Leaderboard Extension:
48
+ - Personalized rank badges
49
+ - Suggestion panel for unexplored rank-language pairs
50
+ """
51
+
52
+ def __init__(self):
53
+ self.submission_history: Dict[str, List[SubmissionRecord]] = {}
54
+ self.rank_options = [8, 16, 32, 64, 128, 256]
55
+ self.language_options = [
56
+ 'english', 'chinese', 'spanish', 'french', 'german',
57
+ 'russian', 'arabic', 'japanese', 'korean', 'portuguese',
58
+ 'indonesian', 'vietnamese', 'thai', 'swahili', 'yoruba'
59
+ ]
60
+
61
+ def record_submission(
62
+ self,
63
+ contributor_id: str,
64
+ language: str,
65
+ rank: int,
66
+ accuracy: float,
67
+ flops: float,
68
+ uncertainty: float,
69
+ timestamp: str = None
70
+ ):
71
+ """Record a contributor submission"""
72
+ if timestamp is None:
73
+ from datetime import datetime
74
+ timestamp = datetime.now().isoformat()
75
+
76
+ efficiency = accuracy / flops if flops > 0 else 0.0
77
+
78
+ record = SubmissionRecord(
79
+ contributor_id=contributor_id,
80
+ language=language,
81
+ rank=rank,
82
+ accuracy=accuracy,
83
+ flops=flops,
84
+ uncertainty=uncertainty,
85
+ timestamp=timestamp,
86
+ efficiency=efficiency
87
+ )
88
+
89
+ if contributor_id not in self.submission_history:
90
+ self.submission_history[contributor_id] = []
91
+
92
+ self.submission_history[contributor_id].append(record)
93
+ logger.info(
94
+ f"Recorded submission: {contributor_id} - {language} @ rank {rank} "
95
+ f"(accuracy: {accuracy:.3f}, efficiency: {efficiency:.2e})"
96
+ )
97
+
98
+ def recommend_rank(
99
+ self,
100
+ contributor_id: str,
101
+ target_language: Optional[str] = None
102
+ ) -> RankRecommendation:
103
+ """
104
+ Recommend optimal rank based on contributor history.
105
+
106
+ Args:
107
+ contributor_id: Contributor identifier
108
+ target_language: Optional target language for recommendation
109
+
110
+ Returns:
111
+ RankRecommendation with personalized suggestions
112
+ """
113
+ submissions = self.submission_history.get(contributor_id, [])
114
+
115
+ if not submissions:
116
+ # New contributor: recommend starting rank
117
+ return RankRecommendation(
118
+ contributor_id=contributor_id,
119
+ recommended_rank=32,
120
+ confidence=0.5,
121
+ rationale="Starting recommendation for new contributor",
122
+ unexplored_pairs=self._get_unexplored_pairs(contributor_id),
123
+ efficiency_prediction=0.0,
124
+ personalized_badge="🌟 Newcomer"
125
+ )
126
+
127
+ # Analyze submission history
128
+ if target_language:
129
+ # Language-specific recommendation
130
+ lang_submissions = [s for s in submissions if s.language == target_language]
131
+ if lang_submissions:
132
+ return self._recommend_from_history(
133
+ contributor_id, lang_submissions, target_language
134
+ )
135
+
136
+ # General recommendation based on all submissions
137
+ return self._recommend_from_history(contributor_id, submissions)
138
+
139
+ def _recommend_from_history(
140
+ self,
141
+ contributor_id: str,
142
+ submissions: List[SubmissionRecord],
143
+ target_language: Optional[str] = None
144
+ ) -> RankRecommendation:
145
+ """Generate recommendation from submission history"""
146
+ # Find best efficiency rank
147
+ best_submission = max(submissions, key=lambda s: s.efficiency)
148
+
149
+ # Analyze rank performance
150
+ rank_performance = self._analyze_rank_performance(submissions)
151
+
152
+ # Find optimal rank
153
+ recommended_rank = self._select_optimal_rank(rank_performance)
154
+
155
+ # Compute confidence
156
+ confidence = self._compute_recommendation_confidence(
157
+ submissions, recommended_rank
158
+ )
159
+
160
+ # Generate rationale
161
+ rationale = self._generate_rationale(
162
+ submissions, recommended_rank, best_submission
163
+ )
164
+
165
+ # Find unexplored pairs
166
+ unexplored = self._get_unexplored_pairs(contributor_id)
167
+
168
+ # Predict efficiency
169
+ efficiency_prediction = self._predict_efficiency(
170
+ submissions, recommended_rank
171
+ )
172
+
173
+ # Assign badge
174
+ badge = self._assign_badge(submissions)
175
+
176
+ return RankRecommendation(
177
+ contributor_id=contributor_id,
178
+ recommended_rank=recommended_rank,
179
+ confidence=confidence,
180
+ rationale=rationale,
181
+ unexplored_pairs=unexplored[:5], # Top 5 suggestions
182
+ efficiency_prediction=efficiency_prediction,
183
+ personalized_badge=badge
184
+ )
185
+
186
+ def _analyze_rank_performance(
187
+ self, submissions: List[SubmissionRecord]
188
+ ) -> Dict[int, Dict[str, float]]:
189
+ """Analyze performance at each rank"""
190
+ rank_stats = {}
191
+
192
+ for rank in self.rank_options:
193
+ rank_subs = [s for s in submissions if s.rank == rank]
194
+
195
+ if rank_subs:
196
+ rank_stats[rank] = {
197
+ 'avg_accuracy': np.mean([s.accuracy for s in rank_subs]),
198
+ 'avg_efficiency': np.mean([s.efficiency for s in rank_subs]),
199
+ 'avg_uncertainty': np.mean([s.uncertainty for s in rank_subs]),
200
+ 'count': len(rank_subs)
201
+ }
202
+ else:
203
+ rank_stats[rank] = {
204
+ 'avg_accuracy': 0.0,
205
+ 'avg_efficiency': 0.0,
206
+ 'avg_uncertainty': 1.0,
207
+ 'count': 0
208
+ }
209
+
210
+ return rank_stats
211
+
212
+ def _select_optimal_rank(
213
+ self, rank_performance: Dict[int, Dict[str, float]]
214
+ ) -> int:
215
+ """Select optimal rank based on performance"""
216
+ # Score each rank by efficiency and accuracy
217
+ scores = {}
218
+
219
+ for rank, stats in rank_performance.items():
220
+ if stats['count'] == 0:
221
+ scores[rank] = 0.0
222
+ else:
223
+ # Weighted score: 60% efficiency, 40% accuracy
224
+ scores[rank] = (
225
+ 0.6 * stats['avg_efficiency'] * 1e8 + # Scale efficiency
226
+ 0.4 * stats['avg_accuracy']
227
+ )
228
+
229
+ # Return rank with highest score
230
+ if not scores or max(scores.values()) == 0:
231
+ return 32 # Default
232
+
233
+ return max(scores, key=scores.get)
234
+
235
+ def _compute_recommendation_confidence(
236
+ self, submissions: List[SubmissionRecord], recommended_rank: int
237
+ ) -> float:
238
+ """Compute confidence in recommendation"""
239
+ # Confidence based on:
240
+ # - Number of submissions at recommended rank
241
+ # - Consistency of performance
242
+ # - Total submission count
243
+
244
+ rank_subs = [s for s in submissions if s.rank == recommended_rank]
245
+
246
+ if not rank_subs:
247
+ return 0.3 # Low confidence for untested rank
248
+
249
+ # Sample size factor
250
+ sample_factor = min(len(rank_subs) / 10.0, 1.0)
251
+
252
+ # Consistency factor (low variance in efficiency)
253
+ efficiencies = [s.efficiency for s in rank_subs]
254
+ if len(efficiencies) > 1:
255
+ consistency = 1.0 - min(np.std(efficiencies) / np.mean(efficiencies), 1.0)
256
+ else:
257
+ consistency = 0.5
258
+
259
+ # Experience factor
260
+ experience = min(len(submissions) / 20.0, 1.0)
261
+
262
+ confidence = 0.4 * sample_factor + 0.3 * consistency + 0.3 * experience
263
+
264
+ return float(np.clip(confidence, 0.0, 1.0))
265
+
266
+ def _generate_rationale(
267
+ self,
268
+ submissions: List[SubmissionRecord],
269
+ recommended_rank: int,
270
+ best_submission: SubmissionRecord
271
+ ) -> str:
272
+ """Generate human-readable rationale"""
273
+ rank_subs = [s for s in submissions if s.rank == recommended_rank]
274
+
275
+ if not rank_subs:
276
+ return (
277
+ f"Rank {recommended_rank} recommended based on interpolation "
278
+ f"from your best performance at rank {best_submission.rank} "
279
+ f"(efficiency: {best_submission.efficiency:.2e})"
280
+ )
281
+
282
+ avg_accuracy = np.mean([s.accuracy for s in rank_subs])
283
+ avg_efficiency = np.mean([s.efficiency for s in rank_subs])
284
+
285
+ return (
286
+ f"Rank {recommended_rank} shows best efficiency ({avg_efficiency:.2e}) "
287
+ f"with {len(rank_subs)} submissions averaging {avg_accuracy:.3f} accuracy. "
288
+ f"This balances compute cost and performance for your editing style."
289
+ )
290
+
291
+ def _get_unexplored_pairs(
292
+ self, contributor_id: str
293
+ ) -> List[Tuple[int, str]]:
294
+ """Get unexplored rank-language pairs"""
295
+ submissions = self.submission_history.get(contributor_id, [])
296
+
297
+ explored = set((s.rank, s.language) for s in submissions)
298
+
299
+ all_pairs = [
300
+ (rank, lang)
301
+ for rank in self.rank_options
302
+ for lang in self.language_options
303
+ ]
304
+
305
+ unexplored = [pair for pair in all_pairs if pair not in explored]
306
+
307
+ # Prioritize by potential value
308
+ # Prefer: medium ranks, diverse languages
309
+ def priority_score(pair):
310
+ rank, lang = pair
311
+ rank_score = 1.0 - abs(rank - 64) / 128.0 # Prefer rank 64
312
+
313
+ # Prefer low-resource languages (more impact)
314
+ low_resource = ['indonesian', 'vietnamese', 'thai', 'swahili', 'yoruba']
315
+ lang_score = 1.5 if lang in low_resource else 1.0
316
+
317
+ return rank_score * lang_score
318
+
319
+ unexplored.sort(key=priority_score, reverse=True)
320
+
321
+ return unexplored
322
+
323
+ def _predict_efficiency(
324
+ self, submissions: List[SubmissionRecord], rank: int
325
+ ) -> float:
326
+ """Predict efficiency at given rank"""
327
+ # Simple linear interpolation from existing data
328
+ rank_subs = [s for s in submissions if s.rank == rank]
329
+
330
+ if rank_subs:
331
+ return np.mean([s.efficiency for s in rank_subs])
332
+
333
+ # Interpolate from nearby ranks
334
+ nearby_ranks = sorted([s.rank for s in submissions])
335
+
336
+ if not nearby_ranks:
337
+ return 0.0
338
+
339
+ # Find closest ranks
340
+ lower = [r for r in nearby_ranks if r < rank]
341
+ upper = [r for r in nearby_ranks if r > rank]
342
+
343
+ if lower and upper:
344
+ lower_rank = max(lower)
345
+ upper_rank = min(upper)
346
+
347
+ lower_eff = np.mean([
348
+ s.efficiency for s in submissions if s.rank == lower_rank
349
+ ])
350
+ upper_eff = np.mean([
351
+ s.efficiency for s in submissions if s.rank == upper_rank
352
+ ])
353
+
354
+ # Linear interpolation
355
+ weight = (rank - lower_rank) / (upper_rank - lower_rank)
356
+ return lower_eff * (1 - weight) + upper_eff * weight
357
+
358
+ # Use closest available rank
359
+ closest_rank = min(nearby_ranks, key=lambda r: abs(r - rank))
360
+ return np.mean([s.efficiency for s in submissions if s.rank == closest_rank])
361
+
362
+ def _assign_badge(self, submissions: List[SubmissionRecord]) -> str:
363
+ """Assign personalized badge based on performance"""
364
+ if not submissions:
365
+ return "🌟 Newcomer"
366
+
367
+ # Analyze submission characteristics
368
+ total_subs = len(submissions)
369
+ unique_langs = len(set(s.language for s in submissions))
370
+ unique_ranks = len(set(s.rank for s in submissions))
371
+ avg_accuracy = np.mean([s.accuracy for s in submissions])
372
+ avg_efficiency = np.mean([s.efficiency for s in submissions])
373
+
374
+ # Badge criteria
375
+ if total_subs >= 50 and unique_langs >= 10:
376
+ return "πŸ† Master Contributor"
377
+ elif avg_efficiency > 1e-7:
378
+ return "⚑ Efficiency Expert"
379
+ elif avg_accuracy > 0.95:
380
+ return "🎯 Accuracy Champion"
381
+ elif unique_ranks >= 5:
382
+ return "πŸ”¬ Rank Explorer"
383
+ elif unique_langs >= 8:
384
+ return "🌍 Multilingual Specialist"
385
+ elif total_subs >= 20:
386
+ return "πŸ’ͺ Active Contributor"
387
+ elif total_subs >= 10:
388
+ return "πŸ“ˆ Rising Star"
389
+ else:
390
+ return "πŸš€ Getting Started"
391
+
392
+ def generate_feedback_panel(
393
+ self, contributor_id: str
394
+ ) -> Dict[str, any]:
395
+ """
396
+ Generate comprehensive feedback panel for dashboard.
397
+
398
+ Returns:
399
+ Dict with recommendations, stats, and suggestions
400
+ """
401
+ submissions = self.submission_history.get(contributor_id, [])
402
+ recommendation = self.recommend_rank(contributor_id)
403
+
404
+ if not submissions:
405
+ return {
406
+ 'recommendation': recommendation,
407
+ 'stats': {},
408
+ 'suggestions': [
409
+ "Start with rank 32 for balanced performance",
410
+ "Try high-resource languages (English, Chinese) first",
411
+ "Focus on accuracy before optimizing efficiency"
412
+ ]
413
+ }
414
+
415
+ # Compute statistics
416
+ stats = {
417
+ 'total_submissions': len(submissions),
418
+ 'unique_languages': len(set(s.language for s in submissions)),
419
+ 'unique_ranks': len(set(s.rank for s in submissions)),
420
+ 'avg_accuracy': float(np.mean([s.accuracy for s in submissions])),
421
+ 'avg_efficiency': float(np.mean([s.efficiency for s in submissions])),
422
+ 'best_accuracy': float(max(s.accuracy for s in submissions)),
423
+ 'best_efficiency': float(max(s.efficiency for s in submissions))
424
+ }
425
+
426
+ # Generate suggestions
427
+ suggestions = self._generate_suggestions(submissions, recommendation)
428
+
429
+ return {
430
+ 'recommendation': recommendation,
431
+ 'stats': stats,
432
+ 'suggestions': suggestions
433
+ }
434
+
435
+
436
+ def _generate_suggestions(
437
+ self,
438
+ submissions: List[SubmissionRecord],
439
+ recommendation: RankRecommendation
440
+ ) -> List[str]:
441
+ """Generate actionable suggestions"""
442
+ suggestions = []
443
+
444
+ # Analyze gaps
445
+ tested_ranks = set(s.rank for s in submissions)
446
+ tested_langs = set(s.language for s in submissions)
447
+
448
+ # Rank diversity
449
+ if len(tested_ranks) < 3:
450
+ suggestions.append(
451
+ f"Try exploring more ranks - you've only tested {len(tested_ranks)} so far"
452
+ )
453
+
454
+ # Language diversity
455
+ low_resource = ['indonesian', 'vietnamese', 'thai', 'swahili', 'yoruba']
456
+ tested_low_resource = [l for l in tested_langs if l in low_resource]
457
+
458
+ if len(tested_low_resource) < 2:
459
+ suggestions.append(
460
+ "Consider testing low-resource languages for higher impact"
461
+ )
462
+
463
+ # Efficiency optimization
464
+ avg_efficiency = np.mean([s.efficiency for s in submissions])
465
+ if avg_efficiency < 5e-8:
466
+ suggestions.append(
467
+ "Focus on efficiency - try lower ranks to reduce FLOPs"
468
+ )
469
+
470
+ # Accuracy improvement
471
+ avg_accuracy = np.mean([s.accuracy for s in submissions])
472
+ if avg_accuracy < 0.85:
473
+ suggestions.append(
474
+ "Accuracy could be improved - try higher ranks or refine your edits"
475
+ )
476
+
477
+ # Unexplored pairs
478
+ if recommendation.unexplored_pairs:
479
+ top_pair = recommendation.unexplored_pairs[0]
480
+ suggestions.append(
481
+ f"High-value opportunity: Try rank {top_pair[0]} with {top_pair[1]}"
482
+ )
483
+
484
+ return suggestions[:5] # Top 5 suggestions