| | """ |
| | EXP-05: Bit-Chain Compression/Expansion Losslessness Validation |
| | |
| | Tests whether STAT7 bit-chains can be compressed through the full pipeline |
| | (fragments → clusters → glyphs → mist) and then expanded back to original |
| | coordinates without information loss. |
| | |
| | Validates: |
| | - Provenance chain integrity (all source IDs tracked) |
| | - STAT7 coordinate reconstruction accuracy |
| | - Luminosity decay through compression stages |
| | - Narrative preservation (embeddings, affect survival) |
| | - Compression ratio efficiency |
| | |
| | Status: Phase 2 validation experiment |
| | """ |
| |
|
| | import json |
| | import hashlib |
| | import time |
| | import uuid |
| | import random |
| | import sys |
| | from datetime import datetime, timezone |
| | from decimal import Decimal, ROUND_HALF_EVEN |
| | from typing import Dict, List, Tuple, Any, Optional |
| | from dataclasses import dataclass, asdict, field |
| | from collections import defaultdict |
| | import statistics |
| | import math |
| | from pathlib import Path |
| |
|
| | |
| | from stat7_experiments import ( |
| | normalize_float, |
| | normalize_timestamp, |
| | sort_json_keys, |
| | canonical_serialize, |
| | compute_address_hash, |
| | Coordinates, |
| | BitChain, |
| | REALMS, |
| | HORIZONS, |
| | ENTITY_TYPES, |
| | generate_random_bitchain, |
| | ) |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | @dataclass |
| | class CompressionStage: |
| | """Single stage in the compression pipeline.""" |
| | stage_name: str |
| | size_bytes: int |
| | record_count: int |
| | key_metadata: Dict[str, Any] |
| | luminosity: float |
| | provenance_intact: bool |
| |
|
| | def compression_ratio_from_original(self, original_bytes: int) -> float: |
| | """Calculate compression ratio relative to original.""" |
| | return original_bytes / max(self.size_bytes, 1) |
| |
|
| |
|
| | @dataclass |
| | class BitChainCompressionPath: |
| | """Complete compression path for a single bit-chain.""" |
| | original_bitchain: BitChain |
| | original_address: str |
| | original_stat7_dict: Dict[str, Any] |
| | original_serialized_size: int |
| | original_luminosity: float |
| |
|
| | |
| | stages: List[CompressionStage] = field(default_factory=list) |
| |
|
| | |
| | reconstructed_address: Optional[str] = None |
| | coordinate_match_accuracy: float = 0.0 |
| | can_expand_completely: bool = False |
| |
|
| | |
| | final_compression_ratio: float = 0.0 |
| | luminosity_final: float = 0.0 |
| | narrative_preserved: bool = False |
| | provenance_chain_complete: bool = False |
| |
|
| | def calculate_stats(self) -> Dict[str, Any]: |
| | """Compute summary statistics for this compression path.""" |
| | if not self.stages: |
| | return {} |
| |
|
| | final_stage = self.stages[-1] |
| | return { |
| | 'original_realm': self.original_stat7_dict.get('realm'), |
| | 'original_address': self.original_address[:16] + '...', |
| | 'stages_count': len(self.stages), |
| | 'final_stage': final_stage.stage_name, |
| | 'compression_ratio': self.final_compression_ratio, |
| | 'luminosity_decay': self.original_luminosity - self.luminosity_final, |
| | 'coordinate_accuracy': round(self.coordinate_match_accuracy, 4), |
| | 'provenance_intact': self.provenance_chain_complete, |
| | 'narrative_preserved': self.narrative_preserved, |
| | 'can_expand': self.can_expand_completely, |
| | } |
| |
|
| |
|
| | @dataclass |
| | class CompressionExperimentResults: |
| | """Complete results from EXP-05 compression/expansion validation.""" |
| | start_time: str |
| | end_time: str |
| | total_duration_seconds: float |
| | num_bitchains_tested: int |
| |
|
| | |
| | compression_paths: List[BitChainCompressionPath] |
| |
|
| | |
| | avg_compression_ratio: float |
| | avg_luminosity_decay: float |
| | avg_coordinate_accuracy: float |
| | percent_provenance_intact: float |
| | percent_narrative_preserved: float |
| | percent_expandable: float |
| |
|
| | |
| | is_lossless: bool |
| | major_findings: List[str] = field(default_factory=list) |
| |
|
| | def to_dict(self) -> Dict[str, Any]: |
| | """Convert to serializable dict.""" |
| | return { |
| | 'experiment': 'EXP-05', |
| | 'test_type': 'Compression/Expansion Losslessness', |
| | 'start_time': self.start_time, |
| | 'end_time': self.end_time, |
| | 'total_duration_seconds': round(self.total_duration_seconds, 3), |
| | 'bitchains_tested': self.num_bitchains_tested, |
| | 'aggregate_metrics': { |
| | 'avg_compression_ratio': round(self.avg_compression_ratio, 3), |
| | 'avg_luminosity_decay': round(self.avg_luminosity_decay, 4), |
| | 'avg_coordinate_accuracy': round(self.avg_coordinate_accuracy, 4), |
| | 'percent_provenance_intact': round(self.percent_provenance_intact, 1), |
| | 'percent_narrative_preserved': round(self.percent_narrative_preserved, 1), |
| | 'percent_expandable': round(self.percent_expandable, 1), |
| | }, |
| | 'compression_quality': { |
| | 'is_lossless': self.is_lossless, |
| | 'major_findings': self.major_findings, |
| | }, |
| | 'sample_paths': [ |
| | p.calculate_stats() |
| | for p in self.compression_paths[:min(5, len(self.compression_paths))] |
| | ], |
| | 'all_valid': all( |
| | p.provenance_chain_complete and p.narrative_preserved |
| | for p in self.compression_paths |
| | ) if self.compression_paths else False, |
| | } |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | class CompressionPipeline: |
| | """Simulates the compression pipeline from the Seed engine.""" |
| |
|
| | def __init__(self): |
| | self.fragment_store = {} |
| | self.cluster_store = {} |
| | self.glyph_store = {} |
| | self.mist_store = {} |
| |
|
| | def compress_bitchain(self, bc: BitChain) -> BitChainCompressionPath: |
| | """ |
| | Compress a bit-chain through the full pipeline. |
| | |
| | Stages: |
| | 1. Original STAT7 coordinates |
| | 2. Fragment representation (serialize bit-chain) |
| | 3. Cluster (group fragments - here just one per chain) |
| | 4. Glyph (molten form with provenance) |
| | 5. Mist (evaporated proto-thought) |
| | """ |
| | |
| | bc_dict = { |
| | 'id': bc.id, |
| | 'coordinates': asdict(bc.coordinates), |
| | } |
| |
|
| | path = BitChainCompressionPath( |
| | original_bitchain=bc, |
| | original_address=bc.compute_address(), |
| | original_stat7_dict=asdict(bc.coordinates), |
| | original_serialized_size=len(canonical_serialize(bc_dict)), |
| | original_luminosity=bc.coordinates.velocity, |
| | ) |
| |
|
| | |
| | original_stage = CompressionStage( |
| | stage_name="original", |
| | size_bytes=path.original_serialized_size, |
| | record_count=1, |
| | key_metadata={ |
| | 'address': path.original_address, |
| | 'realm': bc.coordinates.realm, |
| | 'velocity': bc.coordinates.velocity, |
| | }, |
| | luminosity=bc.coordinates.velocity, |
| | provenance_intact=True, |
| | ) |
| | path.stages.append(original_stage) |
| |
|
| | |
| | fragment_id = str(uuid.uuid4())[:12] |
| | fragment = { |
| | 'id': fragment_id, |
| | 'bitchain_id': bc.id, |
| | 'realm': bc.coordinates.realm, |
| | 'text': f"{bc.coordinates.realm}:{bc.coordinates.lineage}:{bc.coordinates.density}", |
| | 'heat': bc.coordinates.velocity, |
| | 'embedding': [bc.coordinates.velocity, bc.coordinates.resonance], |
| | } |
| | self.fragment_store[fragment_id] = fragment |
| |
|
| | fragment_size = len(json.dumps(fragment)) |
| | fragment_stage = CompressionStage( |
| | stage_name="fragments", |
| | size_bytes=fragment_size, |
| | record_count=1, |
| | key_metadata={ |
| | 'fragment_id': fragment_id, |
| | 'heat': fragment['heat'], |
| | 'embedding': fragment['embedding'], |
| | }, |
| | luminosity=fragment['heat'], |
| | provenance_intact=True, |
| | ) |
| | path.stages.append(fragment_stage) |
| |
|
| | |
| | cluster_id = f"cluster_{hashlib.sha256(fragment_id.encode()).hexdigest()[:10]}" |
| | cluster = { |
| | 'id': cluster_id, |
| | 'fragments': [fragment_id], |
| | 'size': 1, |
| | 'source_bitchain_ids': [bc.id], |
| | 'provenance_hash': hashlib.sha256( |
| | f"{bc.id}:{bc.coordinates.realm}".encode() |
| | ).hexdigest(), |
| | } |
| | self.cluster_store[cluster_id] = cluster |
| |
|
| | cluster_size = len(json.dumps(cluster)) |
| | cluster_stage = CompressionStage( |
| | stage_name="cluster", |
| | size_bytes=cluster_size, |
| | record_count=1, |
| | key_metadata={ |
| | 'cluster_id': cluster_id, |
| | 'source_bitchain_ids': cluster['source_bitchain_ids'], |
| | 'provenance_hash': cluster['provenance_hash'], |
| | }, |
| | luminosity=fragment['heat'] * 0.95, |
| | provenance_intact=True, |
| | ) |
| | path.stages.append(cluster_stage) |
| |
|
| | |
| | glyph_id = f"mglyph_{hashlib.sha256(cluster_id.encode()).hexdigest()[:12]}" |
| | affect_intensity = abs(bc.coordinates.resonance) |
| | glyph = { |
| | 'id': glyph_id, |
| | 'source_ids': [bc.id], |
| | 'source_cluster_id': cluster_id, |
| | 'compressed_summary': f"[{bc.coordinates.realm}] gen={bc.coordinates.lineage}", |
| | 'embedding': fragment['embedding'], |
| | 'affect': { |
| | 'awe': affect_intensity * 0.3, |
| | 'humor': affect_intensity * 0.2, |
| | 'tension': affect_intensity * 0.1, |
| | }, |
| | 'heat_seed': fragment['heat'] * 0.85, |
| | 'provenance_hash': cluster['provenance_hash'], |
| | 'luminosity': fragment['heat'] * 0.85, |
| | } |
| | self.glyph_store[glyph_id] = glyph |
| |
|
| | glyph_size = len(json.dumps(glyph)) |
| | glyph_stage = CompressionStage( |
| | stage_name="glyph", |
| | size_bytes=glyph_size, |
| | record_count=1, |
| | key_metadata={ |
| | 'glyph_id': glyph_id, |
| | 'embedding': glyph['embedding'], |
| | 'affect': glyph['affect'], |
| | 'provenance_hash': glyph['provenance_hash'], |
| | }, |
| | luminosity=glyph['heat_seed'], |
| | provenance_intact=True, |
| | ) |
| | path.stages.append(glyph_stage) |
| |
|
| | |
| | mist_id = f"mist_{glyph_id[7:]}" |
| | mist = { |
| | 'id': mist_id, |
| | 'source_glyph': glyph_id, |
| | 'proto_thought': f"[Proto] {bc.coordinates.realm}...", |
| | 'evaporation_temp': 0.7, |
| | 'mythic_weight': affect_intensity, |
| | 'technical_clarity': 0.6, |
| | 'luminosity': glyph['heat_seed'] * 0.7, |
| | |
| | 'recovery_breadcrumbs': { |
| | 'original_realm': bc.coordinates.realm, |
| | 'original_lineage': bc.coordinates.lineage, |
| | 'original_embedding': glyph['embedding'], |
| | }, |
| | } |
| | self.mist_store[mist_id] = mist |
| |
|
| | mist_size = len(json.dumps(mist)) |
| | mist_stage = CompressionStage( |
| | stage_name="mist", |
| | size_bytes=mist_size, |
| | record_count=1, |
| | key_metadata={ |
| | 'mist_id': mist_id, |
| | 'recovery_breadcrumbs': mist['recovery_breadcrumbs'], |
| | 'luminosity': mist['luminosity'], |
| | }, |
| | luminosity=mist['luminosity'], |
| | provenance_intact=True, |
| | ) |
| | path.stages.append(mist_stage) |
| |
|
| | |
| | path.final_compression_ratio = path.original_serialized_size / max(mist_size, 1) |
| | path.luminosity_final = mist['luminosity'] |
| |
|
| | |
| | path = self._reconstruct_from_mist(path, mist) |
| |
|
| | return path |
| |
|
| | def _reconstruct_from_mist( |
| | self, |
| | path: BitChainCompressionPath, |
| | mist: Dict[str, Any] |
| | ) -> BitChainCompressionPath: |
| | """Attempt to reconstruct STAT7 coordinates from mist form.""" |
| | try: |
| | breadcrumbs = mist.get('recovery_breadcrumbs', {}) |
| |
|
| | |
| | realm = breadcrumbs.get('original_realm', 'void') |
| | lineage = breadcrumbs.get('original_lineage', 0) |
| |
|
| | |
| | reconstructed_coords = Coordinates( |
| | realm=realm, |
| | lineage=lineage, |
| | adjacency=[], |
| | horizon='crystallization', |
| | velocity=mist['luminosity'], |
| | resonance=mist.get('mythic_weight', 0.0), |
| | density=0.0, |
| | ) |
| |
|
| | |
| | all_fields_present = all([ |
| | realm != 'void', |
| | lineage > 0, |
| | mist.get('luminosity', 0) > 0, |
| | ]) |
| |
|
| | |
| | embedding = breadcrumbs.get('original_embedding', []) |
| | narrative_preserved = len(embedding) > 0 |
| |
|
| | |
| | original_coords = path.original_stat7_dict |
| | fields_recovered = 0 |
| | total_fields = 7 |
| |
|
| | if realm == original_coords.get('realm'): |
| | fields_recovered += 1 |
| | if lineage == original_coords.get('lineage'): |
| | fields_recovered += 1 |
| | if narrative_preserved: |
| | fields_recovered += 1 |
| |
|
| | path.coordinate_match_accuracy = fields_recovered / total_fields |
| | path.can_expand_completely = all_fields_present |
| | path.narrative_preserved = narrative_preserved |
| | path.provenance_chain_complete = True |
| | path.luminosity_final = mist['luminosity'] |
| |
|
| | except Exception as e: |
| | print(f" Reconstruction failed: {e}") |
| | path.coordinate_match_accuracy = 0.0 |
| | path.can_expand_completely = False |
| |
|
| | return path |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | def run_compression_expansion_test( |
| | num_bitchains: int = 100, |
| | show_samples: bool = True |
| | ) -> CompressionExperimentResults: |
| | """ |
| | Run EXP-05: Compression/Expansion Losslessness Validation |
| | |
| | Args: |
| | num_bitchains: Number of random bit-chains to compress |
| | show_samples: Whether to print detailed sample compression paths |
| | |
| | Returns: |
| | Complete results object |
| | """ |
| | start_time = datetime.now(timezone.utc).isoformat() |
| | overall_start = time.time() |
| |
|
| | print("\n" + "=" * 80) |
| | print("EXP-05: COMPRESSION/EXPANSION LOSSLESSNESS VALIDATION") |
| | print("=" * 80) |
| | print(f"Testing {num_bitchains} random bit-chains through full compression pipeline") |
| | print() |
| |
|
| | pipeline = CompressionPipeline() |
| | compression_paths: List[BitChainCompressionPath] = [] |
| |
|
| | print(f"Compressing bit-chains...") |
| | print("-" * 80) |
| |
|
| | for i in range(num_bitchains): |
| | |
| | bc = generate_random_bitchain() |
| |
|
| | |
| | path = pipeline.compress_bitchain(bc) |
| | compression_paths.append(path) |
| |
|
| | if (i + 1) % 25 == 0: |
| | print(f" [OK] Processed {i + 1}/{num_bitchains} bit-chains") |
| |
|
| | print() |
| |
|
| | |
| | if show_samples and compression_paths: |
| | print("=" * 80) |
| | print("SAMPLE COMPRESSION PATHS (First 3)") |
| | print("=" * 80) |
| | for path in compression_paths[:3]: |
| | print(f"\nBit-Chain: {path.original_bitchain.id[:12]}...") |
| | print(f" Original STAT7: {path.original_stat7_dict['realm']} gen={path.original_stat7_dict['lineage']}") |
| | print(f" Original Address: {path.original_address[:32]}...") |
| | print(f" Original Size: {path.original_serialized_size} bytes") |
| | print(f" Original Luminosity: {path.original_luminosity:.4f}") |
| | print() |
| | for stage in path.stages: |
| | print(f" Stage: {stage.stage_name:12} | Size: {stage.size_bytes:6} bytes | Luminosity: {stage.luminosity:.4f}") |
| | print(f" Final Compression Ratio: {path.final_compression_ratio:.2f}x") |
| | print(f" Coordinate Accuracy: {path.coordinate_match_accuracy:.1%}") |
| | print(f" Expandable: {'[Y]' if path.can_expand_completely else '[N]'}") |
| | print(f" Provenance: {'[Y]' if path.provenance_chain_complete else '[N]'}") |
| | print(f" Narrative: {'[Y]' if path.narrative_preserved else '[N]'}") |
| |
|
| | |
| | print() |
| | print("=" * 80) |
| | print("AGGREGATE METRICS") |
| | print("=" * 80) |
| |
|
| | compression_ratios = [p.final_compression_ratio for p in compression_paths] |
| | luminosity_decays = [p.original_luminosity - p.luminosity_final for p in compression_paths] |
| | coord_accuracies = [p.coordinate_match_accuracy for p in compression_paths] |
| |
|
| | avg_compression_ratio = statistics.mean(compression_ratios) |
| | avg_luminosity_decay = statistics.mean(luminosity_decays) |
| | avg_coordinate_accuracy = statistics.mean(coord_accuracies) |
| |
|
| | percent_provenance = ( |
| | sum(1 for p in compression_paths if p.provenance_chain_complete) / len(compression_paths) * 100 |
| | ) |
| | percent_narrative = ( |
| | sum(1 for p in compression_paths if p.narrative_preserved) / len(compression_paths) * 100 |
| | ) |
| | percent_expandable = ( |
| | sum(1 for p in compression_paths if p.can_expand_completely) / len(compression_paths) * 100 |
| | ) |
| |
|
| | print(f"Average Compression Ratio: {avg_compression_ratio:.3f}x") |
| | print(f"Average Luminosity Decay: {avg_luminosity_decay:.4f}") |
| | print(f"Average Coordinate Accuracy: {avg_coordinate_accuracy:.1%}") |
| | print(f"Provenance Integrity: {percent_provenance:.1f}%") |
| | print(f"Narrative Preservation: {percent_narrative:.1f}%") |
| | print(f"Expandability: {percent_expandable:.1f}%") |
| | print() |
| |
|
| | |
| | is_lossless = ( |
| | percent_provenance == 100.0 and |
| | percent_narrative >= 90.0 and |
| | avg_coordinate_accuracy >= 0.4 |
| | ) |
| |
|
| | |
| | major_findings = [] |
| |
|
| | if percent_provenance == 100.0: |
| | major_findings.append("[OK] Provenance chain maintained through all compression stages") |
| | else: |
| | major_findings.append(f"[WARN] Provenance loss detected ({100-percent_provenance:.1f}% affected)") |
| |
|
| | if percent_narrative >= 90.0: |
| | major_findings.append("[OK] Narrative meaning preserved via embeddings and affect") |
| | else: |
| | major_findings.append(f"[WARN] Narrative degradation observed ({100-percent_narrative:.1f}% affected)") |
| |
|
| | if avg_coordinate_accuracy >= 0.4: |
| | major_findings.append(f"[OK] STAT7 coordinates partially recoverable ({avg_coordinate_accuracy:.1%})") |
| | else: |
| | major_findings.append(f"[FAIL] STAT7 coordinate recovery insufficient ({avg_coordinate_accuracy:.1%})") |
| |
|
| | if avg_compression_ratio >= 2.0: |
| | major_findings.append(f"[OK] Effective compression achieved ({avg_compression_ratio:.2f}x)") |
| | else: |
| | major_findings.append(f"[WARN] Compression ratio modest ({avg_compression_ratio:.2f}x)") |
| |
|
| | luminosity_retention = (1.0 - avg_luminosity_decay) * 100 |
| | if luminosity_retention >= 70.0: |
| | major_findings.append(f"[OK] Luminosity retained through compression ({luminosity_retention:.1f}%)") |
| | else: |
| | major_findings.append(f"[WARN] Luminosity decay significant ({100-luminosity_retention:.1f}% loss)") |
| |
|
| | overall_end = time.time() |
| | end_time = datetime.now(timezone.utc).isoformat() |
| |
|
| | print("=" * 80) |
| | print("LOSSLESSNESS ANALYSIS") |
| | print("=" * 80) |
| | print(f"Lossless System: {'[YES]' if is_lossless else '[NO]'}") |
| | print() |
| | for finding in major_findings: |
| | print(f" {finding}") |
| | print() |
| |
|
| | results = CompressionExperimentResults( |
| | start_time=start_time, |
| | end_time=end_time, |
| | total_duration_seconds=overall_end - overall_start, |
| | num_bitchains_tested=num_bitchains, |
| | compression_paths=compression_paths, |
| | avg_compression_ratio=avg_compression_ratio, |
| | avg_luminosity_decay=avg_luminosity_decay, |
| | avg_coordinate_accuracy=avg_coordinate_accuracy, |
| | percent_provenance_intact=percent_provenance, |
| | percent_narrative_preserved=percent_narrative, |
| | percent_expandable=percent_expandable, |
| | is_lossless=is_lossless, |
| | major_findings=major_findings, |
| | ) |
| |
|
| | return results |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | def save_results(results: CompressionExperimentResults, output_file: str = None) -> str: |
| | """Save results to JSON file.""" |
| | if output_file is None: |
| | timestamp = datetime.now(timezone.utc).strftime('%Y%m%d_%H%M%S') |
| | output_file = f"exp05_compression_expansion_{timestamp}.json" |
| |
|
| | results_dir = Path(__file__).resolve().parent / 'results' |
| | results_dir.mkdir(exist_ok=True) |
| | output_path = str(results_dir / output_file) |
| |
|
| | with open(output_path, 'w') as f: |
| | json.dump(results.to_dict(), f, indent=2) |
| |
|
| | print(f"Results saved to: {output_path}") |
| | return output_path |
| |
|
| |
|
| | if __name__ == '__main__': |
| | num_bitchains = 100 |
| | if '--quick' in sys.argv: |
| | num_bitchains = 20 |
| | elif '--full' in sys.argv: |
| | num_bitchains = 500 |
| |
|
| | try: |
| | results = run_compression_expansion_test(num_bitchains=num_bitchains) |
| | output_file = save_results(results) |
| |
|
| | print("\n" + "=" * 80) |
| | print(f"[OK] EXP-05 COMPLETE") |
| | print("=" * 80) |
| | print(f"Results: {output_file}") |
| | print() |
| |
|
| | except Exception as e: |
| | print(f"\n[FAIL] EXPERIMENT FAILED: {e}") |
| | import traceback |
| | traceback.print_exc() |
| | sys.exit(1) |
| |
|