FerrellSyntheticIntelligence
Add deep cognition layer, curriculum runner, evaluation probe, updated loop and benchmark
653b8c1 | """ | |
| EvaluationProbe — Vitalis FSI | |
| Measures whether the system has learned to distinguish | |
| primitive speech acts after the curriculum. | |
| Speech acts measured: | |
| - question (interrogative) | |
| - instruction (imperative) | |
| - explanation (declarative/expository) | |
| No labeled audio required at runtime — | |
| uses helix prototype clustering for zero-shot evaluation. | |
| """ | |
| import numpy as np | |
| from pathlib import Path | |
| from typing import Dict, List, Tuple | |
| from src.dream_engine.helix_memory import HelixMemory | |
| from src.hdc_encoder.encoder import encode | |
| from src.audio_ear.feature_extractor import extract_features | |
| class EvaluationProbe: | |
| N_CLUSTERS = 3 | |
| LABELS = ["question", "instruction", "explanation"] | |
| def __init__(self, helix_path: Path = None): | |
| self.helix_path = helix_path or ( | |
| Path.home() / ".vitalis_workspace" / "helix_memory.pkl" | |
| ) | |
| self.helix = HelixMemory(self.helix_path) | |
| def _build_centroids(self) -> Dict[str, np.ndarray]: | |
| """ | |
| Build one centroid per speech act by clustering | |
| all stored helix prototypes. | |
| """ | |
| if len(self.helix.entries) < self.N_CLUSTERS: | |
| raise RuntimeError( | |
| f"Need at least {self.N_CLUSTERS} helix codes. " | |
| f"Run curriculum first." | |
| ) | |
| all_protos = np.stack( | |
| [proto for _, proto, _, _ in self.helix.entries] | |
| ).astype(np.int8) | |
| # Seeded k-means for reproducibility | |
| rng = np.random.default_rng(42) | |
| idx = rng.choice(len(all_protos), self.N_CLUSTERS, replace=False) | |
| centroids = all_protos[idx].copy() | |
| for _ in range(6): | |
| dists = np.stack([ | |
| np.sum(all_protos != c, axis=1) for c in centroids | |
| ], axis=1) | |
| assigns = np.argmin(dists, axis=1) | |
| for i in range(self.N_CLUSTERS): | |
| mask = assigns == i | |
| if np.any(mask): | |
| summed = all_protos[mask].astype(np.int32).sum(axis=0) | |
| new_c = np.sign(summed).astype(np.int8) | |
| new_c[new_c == 0] = 1 | |
| centroids[i] = new_c | |
| return dict(zip(self.LABELS, centroids)) | |
| def _semantic_fingerprint(self, hv: np.ndarray) -> np.ndarray: | |
| """ | |
| Retrieve top-3 helix prototypes and XOR-bundle them. | |
| Reduces noise in raw hypervector. | |
| """ | |
| matches = self.helix.retrieve(hv, top_k=3) | |
| if not matches: | |
| return hv.copy() | |
| protos = [proto for proto, _ in matches] | |
| stacked = np.stack(protos).astype(np.int32).sum(axis=0) | |
| result = np.sign(stacked).astype(np.int8) | |
| result[result == 0] = 1 | |
| return result | |
| def evaluate_file( | |
| self, | |
| wav_path: Path, | |
| true_label: str, | |
| centroids: Dict[str, np.ndarray], | |
| ) -> Tuple[str, float, bool]: | |
| """Evaluate one audio file. Returns (predicted, confidence, correct).""" | |
| mfcc, prosody = extract_features(wav_path) | |
| raw_hv = encode(mfcc, prosody) | |
| semantic_hv = self._semantic_fingerprint(raw_hv) | |
| sims = { | |
| label: float(np.mean(semantic_hv == centroid)) | |
| for label, centroid in centroids.items() | |
| } | |
| predicted = max(sims, key=sims.get) | |
| confidence = sims[predicted] | |
| correct = predicted == true_label | |
| return predicted, confidence, correct | |
| def evaluate_directory(self, probe_dir: Path) -> Dict: | |
| """ | |
| Evaluate all wav files in probe_dir. | |
| Directory structure: probe_dir/label/file.wav | |
| """ | |
| if not probe_dir.exists(): | |
| return {"status": "probe_dir_not_found", "path": str(probe_dir)} | |
| centroids = self._build_centroids() | |
| results = {label: [] for label in self.LABELS} | |
| total = 0 | |
| correct = 0 | |
| for label_dir in probe_dir.iterdir(): | |
| if not label_dir.is_dir(): | |
| continue | |
| label = label_dir.name | |
| for wav in label_dir.glob("*.wav"): | |
| pred, conf, is_correct = self.evaluate_file( | |
| wav, label, centroids | |
| ) | |
| results[label].append({ | |
| "file": wav.name, | |
| "predicted": pred, | |
| "confidence": round(conf, 4), | |
| "correct": is_correct, | |
| }) | |
| total += 1 | |
| correct += int(is_correct) | |
| if total == 0: | |
| return {"status": "no_files_found"} | |
| per_class_acc = { | |
| label: round( | |
| sum(r["correct"] for r in items) / len(items), 4 | |
| ) if items else 0.0 | |
| for label, items in results.items() | |
| } | |
| return { | |
| "status": "complete", | |
| "overall_accuracy": round(correct / total, 4), | |
| "per_class": per_class_acc, | |
| "total_files": total, | |
| "helix_codes": len(self.helix.entries), | |
| "details": results, | |
| } | |
| def evaluate_helix_health(self) -> Dict: | |
| """ | |
| Evaluate helix memory health without audio files. | |
| Tests clustering quality and prototype diversity. | |
| """ | |
| if len(self.helix.entries) < 2: | |
| return {"status": "insufficient_data"} | |
| protos = np.stack( | |
| [p for _, p, _, _ in self.helix.entries] | |
| ).astype(np.float32) | |
| # Inter-prototype similarity matrix | |
| n = len(protos) | |
| sims = [] | |
| for i in range(n): | |
| for j in range(i + 1, n): | |
| sim = float(np.mean(protos[i] == protos[j])) | |
| sims.append(sim) | |
| avg_sim = float(np.mean(sims)) if sims else 0.0 | |
| diversity = round(1.0 - avg_sim, 4) | |
| usage_counts = [cnt for _, _, cnt, _ in self.helix.entries] | |
| return { | |
| "status": "healthy" if diversity > 0.1 else "low_diversity", | |
| "helix_codes": n, | |
| "diversity_score": diversity, | |
| "avg_similarity": round(avg_sim, 4), | |
| "total_ingestions": sum(usage_counts), | |
| "most_used_code": int(np.argmax(usage_counts)), | |
| } | |