FerrellSyntheticIntelligence
Add deep cognition layer, curriculum runner, evaluation probe, updated loop and benchmark
653b8c1 | """ | |
| CurriculumRunner — Vitalis FSI | |
| The childhood listening phase. | |
| Vitalis ingests curated audio, builds hypervector representations, | |
| and feeds the DreamEngine for consolidation. | |
| No external APIs. No cloud. Fully sovereign. | |
| Audio files live locally. Processing is local. | |
| """ | |
| import os | |
| import time | |
| import numpy as np | |
| from pathlib import Path | |
| from typing import List, Tuple | |
| from src.audio_ear.feature_extractor import extract_features | |
| from src.hdc_encoder.encoder import encode | |
| from src.dream_engine.helix_memory import HelixMemory | |
| from src.dream_engine.consolidator import DreamEngine | |
| from src.ide_kernel.ledger import ProjectLedger | |
| class CurriculumRunner: | |
| SEGMENT_SECONDS = 30 | |
| DREAM_EVERY_N = 10 | |
| def __init__( | |
| self, | |
| audio_dir: str, | |
| helix_path: Path = None, | |
| workspace: str = None, | |
| ): | |
| self.audio_dir = Path(audio_dir) | |
| self.helix_path = helix_path or ( | |
| Path.home() / ".vitalis_workspace" / "helix_memory.pkl" | |
| ) | |
| self.workspace = workspace or os.getcwd() | |
| self.helix = HelixMemory(self.helix_path) | |
| self.dreamer = DreamEngine(self.helix, buffer_max=500) | |
| self.ledger = ProjectLedger(self.workspace) | |
| self._processed = 0 | |
| self._start_time = None | |
| def _discover_audio(self) -> List[Path]: | |
| """Find all wav files in the audio directory.""" | |
| if not self.audio_dir.exists(): | |
| print(f"[CURRICULUM] Audio dir not found: {self.audio_dir}") | |
| print(f"[CURRICULUM] Creating directory. Add .wav files to begin.") | |
| self.audio_dir.mkdir(parents=True, exist_ok=True) | |
| return [] | |
| files = list(self.audio_dir.rglob("*.wav")) | |
| print(f"[CURRICULUM] Discovered {len(files)} audio files.") | |
| return files | |
| def _process_file(self, wav_path: Path, label: str = "unlabeled") -> bool: | |
| """Process one wav file into a hypervector and ingest.""" | |
| try: | |
| mfcc, prosody = extract_features(wav_path) | |
| hv = encode(mfcc, prosody) | |
| self.dreamer.ingest(hv, meta={ | |
| "source": str(wav_path.name), | |
| "label": label, | |
| "timestamp": time.time(), | |
| }) | |
| return True | |
| except Exception as e: | |
| print(f"[CURRICULUM] Error processing {wav_path.name}: {e}") | |
| return False | |
| def run(self, total_hours: float = 12.0) -> dict: | |
| """ | |
| Run the full curriculum. | |
| Processes audio files in a loop until total_hours elapsed. | |
| Dreams periodically based on buffer pressure. | |
| """ | |
| files = self._discover_audio() | |
| if not files: | |
| print("[CURRICULUM] No audio files found. " | |
| f"Add .wav files to {self.audio_dir} and rerun.") | |
| return {"status": "no_audio", "processed": 0} | |
| self._start_time = time.time() | |
| elapsed_hours = 0.0 | |
| file_index = 0 | |
| success_count = 0 | |
| print(f"[CURRICULUM] Starting {total_hours}h curriculum " | |
| f"with {len(files)} files.") | |
| while elapsed_hours < total_hours: | |
| wav = files[file_index % len(files)] | |
| # Infer label from parent directory name | |
| label = wav.parent.name | |
| success = self._process_file(wav, label) | |
| if success: | |
| success_count += 1 | |
| self._processed += 1 | |
| # Log to ledger | |
| self.ledger.update_state( | |
| f"curriculum_segment_{self._processed}", | |
| f"Completed — {wav.name}" | |
| ) | |
| # Dream when buffer pressure is high | |
| if self._processed % self.DREAM_EVERY_N == 0: | |
| self.dreamer.dream(force=True) | |
| print(f"[CURRICULUM] {self._processed} segments processed. " | |
| f"Elapsed: {elapsed_hours:.2f}h") | |
| file_index += 1 | |
| elapsed_hours = (time.time() - self._start_time) / 3600 | |
| # Final dream | |
| self.dreamer.dream(force=True) | |
| result = { | |
| "status": "complete", | |
| "processed": self._processed, | |
| "successful": success_count, | |
| "helix_codes": len(self.helix.entries), | |
| "elapsed_hours": round(elapsed_hours, 3), | |
| } | |
| print(f"[CURRICULUM] Complete. {result}") | |
| return result | |
| def run_curriculum( | |
| audio_dir: str, | |
| helix_path: Path = None, | |
| total_hours: float = 12.0, | |
| ) -> dict: | |
| """Convenience entry point.""" | |
| runner = CurriculumRunner( | |
| audio_dir=audio_dir, | |
| helix_path=helix_path, | |
| ) | |
| return runner.run(total_hours=total_hours) | |