FractalStat / fractalstat /exp05_compression_expansion.py
Bellok's picture
Upload 29 files
23a5cce verified
"""
EXP-05: Bit-Chain Compression/Expansion Losslessness Validation
Tests whether STAT7 bit-chains can be compressed through the full pipeline
(fragments → clusters → glyphs → mist) and then expanded back to original
coordinates without information loss.
Validates:
- Provenance chain integrity (all source IDs tracked)
- STAT7 coordinate reconstruction accuracy
- Luminosity decay through compression stages
- Narrative preservation (embeddings, affect survival)
- Compression ratio efficiency
Status: Phase 2 validation experiment
"""
import json
import hashlib
import time
import uuid
import random
import sys
from datetime import datetime, timezone
from decimal import Decimal, ROUND_HALF_EVEN
from typing import Dict, List, Tuple, Any, Optional
from dataclasses import dataclass, asdict, field
from collections import defaultdict
import statistics
import math
from pathlib import Path
# Reuse canonical serialization from Phase 1
from stat7_experiments import (
normalize_float,
normalize_timestamp,
sort_json_keys,
canonical_serialize,
compute_address_hash,
Coordinates,
BitChain,
REALMS,
HORIZONS,
ENTITY_TYPES,
generate_random_bitchain,
)
# ============================================================================
# EXP-05 DATA STRUCTURES
# ============================================================================
@dataclass
class CompressionStage:
"""Single stage in the compression pipeline."""
stage_name: str # "original", "fragments", "cluster", "glyph", "mist"
size_bytes: int
record_count: int
key_metadata: Dict[str, Any] # What survives at this stage
luminosity: float # Activity level / heat
provenance_intact: bool
def compression_ratio_from_original(self, original_bytes: int) -> float:
"""Calculate compression ratio relative to original."""
return original_bytes / max(self.size_bytes, 1)
@dataclass
class BitChainCompressionPath:
"""Complete compression path for a single bit-chain."""
original_bitchain: BitChain
original_address: str
original_stat7_dict: Dict[str, Any]
original_serialized_size: int
original_luminosity: float
# Stages
stages: List[CompressionStage] = field(default_factory=list)
# Reconstruction attempt
reconstructed_address: Optional[str] = None
coordinate_match_accuracy: float = 0.0 # 0.0 to 1.0
can_expand_completely: bool = False
# Metrics
final_compression_ratio: float = 0.0
luminosity_final: float = 0.0
narrative_preserved: bool = False
provenance_chain_complete: bool = False
def calculate_stats(self) -> Dict[str, Any]:
"""Compute summary statistics for this compression path."""
if not self.stages:
return {}
final_stage = self.stages[-1]
return {
'original_realm': self.original_stat7_dict.get('realm'),
'original_address': self.original_address[:16] + '...',
'stages_count': len(self.stages),
'final_stage': final_stage.stage_name,
'compression_ratio': self.final_compression_ratio,
'luminosity_decay': self.original_luminosity - self.luminosity_final,
'coordinate_accuracy': round(self.coordinate_match_accuracy, 4),
'provenance_intact': self.provenance_chain_complete,
'narrative_preserved': self.narrative_preserved,
'can_expand': self.can_expand_completely,
}
@dataclass
class CompressionExperimentResults:
"""Complete results from EXP-05 compression/expansion validation."""
start_time: str
end_time: str
total_duration_seconds: float
num_bitchains_tested: int
# Per-bitchain paths
compression_paths: List[BitChainCompressionPath]
# Aggregate statistics
avg_compression_ratio: float
avg_luminosity_decay: float
avg_coordinate_accuracy: float
percent_provenance_intact: float
percent_narrative_preserved: float
percent_expandable: float
# Overall validation
is_lossless: bool
major_findings: List[str] = field(default_factory=list)
def to_dict(self) -> Dict[str, Any]:
"""Convert to serializable dict."""
return {
'experiment': 'EXP-05',
'test_type': 'Compression/Expansion Losslessness',
'start_time': self.start_time,
'end_time': self.end_time,
'total_duration_seconds': round(self.total_duration_seconds, 3),
'bitchains_tested': self.num_bitchains_tested,
'aggregate_metrics': {
'avg_compression_ratio': round(self.avg_compression_ratio, 3),
'avg_luminosity_decay': round(self.avg_luminosity_decay, 4),
'avg_coordinate_accuracy': round(self.avg_coordinate_accuracy, 4),
'percent_provenance_intact': round(self.percent_provenance_intact, 1),
'percent_narrative_preserved': round(self.percent_narrative_preserved, 1),
'percent_expandable': round(self.percent_expandable, 1),
},
'compression_quality': {
'is_lossless': self.is_lossless,
'major_findings': self.major_findings,
},
'sample_paths': [
p.calculate_stats()
for p in self.compression_paths[:min(5, len(self.compression_paths))] # Show first 5
],
'all_valid': all(
p.provenance_chain_complete and p.narrative_preserved
for p in self.compression_paths
) if self.compression_paths else False,
}
# ============================================================================
# COMPRESSION PIPELINE SIMULATION
# ============================================================================
class CompressionPipeline:
"""Simulates the compression pipeline from the Seed engine."""
def __init__(self):
self.fragment_store = {}
self.cluster_store = {}
self.glyph_store = {}
self.mist_store = {}
def compress_bitchain(self, bc: BitChain) -> BitChainCompressionPath:
"""
Compress a bit-chain through the full pipeline.
Stages:
1. Original STAT7 coordinates
2. Fragment representation (serialize bit-chain)
3. Cluster (group fragments - here just one per chain)
4. Glyph (molten form with provenance)
5. Mist (evaporated proto-thought)
"""
# Convert bitchain to dict for serialization
bc_dict = {
'id': bc.id,
'coordinates': asdict(bc.coordinates),
}
path = BitChainCompressionPath(
original_bitchain=bc,
original_address=bc.compute_address(),
original_stat7_dict=asdict(bc.coordinates),
original_serialized_size=len(canonical_serialize(bc_dict)),
original_luminosity=bc.coordinates.velocity, # Use velocity as activity proxy
)
# Stage 1: Original (baseline)
original_stage = CompressionStage(
stage_name="original",
size_bytes=path.original_serialized_size,
record_count=1,
key_metadata={
'address': path.original_address,
'realm': bc.coordinates.realm,
'velocity': bc.coordinates.velocity,
},
luminosity=bc.coordinates.velocity,
provenance_intact=True,
)
path.stages.append(original_stage)
# Stage 2: Fragment representation
fragment_id = str(uuid.uuid4())[:12]
fragment = {
'id': fragment_id,
'bitchain_id': bc.id,
'realm': bc.coordinates.realm,
'text': f"{bc.coordinates.realm}:{bc.coordinates.lineage}:{bc.coordinates.density}",
'heat': bc.coordinates.velocity,
'embedding': [bc.coordinates.velocity, bc.coordinates.resonance],
}
self.fragment_store[fragment_id] = fragment
fragment_size = len(json.dumps(fragment))
fragment_stage = CompressionStage(
stage_name="fragments",
size_bytes=fragment_size,
record_count=1,
key_metadata={
'fragment_id': fragment_id,
'heat': fragment['heat'],
'embedding': fragment['embedding'],
},
luminosity=fragment['heat'],
provenance_intact=True,
)
path.stages.append(fragment_stage)
# Stage 3: Cluster (group fragments - here just wrapping one)
cluster_id = f"cluster_{hashlib.sha256(fragment_id.encode()).hexdigest()[:10]}"
cluster = {
'id': cluster_id,
'fragments': [fragment_id],
'size': 1,
'source_bitchain_ids': [bc.id],
'provenance_hash': hashlib.sha256(
f"{bc.id}:{bc.coordinates.realm}".encode()
).hexdigest(),
}
self.cluster_store[cluster_id] = cluster
cluster_size = len(json.dumps(cluster))
cluster_stage = CompressionStage(
stage_name="cluster",
size_bytes=cluster_size,
record_count=1,
key_metadata={
'cluster_id': cluster_id,
'source_bitchain_ids': cluster['source_bitchain_ids'],
'provenance_hash': cluster['provenance_hash'],
},
luminosity=fragment['heat'] * 0.95, # Slight decay
provenance_intact=True,
)
path.stages.append(cluster_stage)
# Stage 4: Glyph (molten form - further compress with affect)
glyph_id = f"mglyph_{hashlib.sha256(cluster_id.encode()).hexdigest()[:12]}"
affect_intensity = abs(bc.coordinates.resonance) # Use resonance as affect proxy
glyph = {
'id': glyph_id,
'source_ids': [bc.id],
'source_cluster_id': cluster_id,
'compressed_summary': f"[{bc.coordinates.realm}] gen={bc.coordinates.lineage}",
'embedding': fragment['embedding'], # Preserve embedding
'affect': {
'awe': affect_intensity * 0.3,
'humor': affect_intensity * 0.2,
'tension': affect_intensity * 0.1,
},
'heat_seed': fragment['heat'] * 0.85, # More decay
'provenance_hash': cluster['provenance_hash'],
'luminosity': fragment['heat'] * 0.85,
}
self.glyph_store[glyph_id] = glyph
glyph_size = len(json.dumps(glyph))
glyph_stage = CompressionStage(
stage_name="glyph",
size_bytes=glyph_size,
record_count=1,
key_metadata={
'glyph_id': glyph_id,
'embedding': glyph['embedding'],
'affect': glyph['affect'],
'provenance_hash': glyph['provenance_hash'],
},
luminosity=glyph['heat_seed'],
provenance_intact=True,
)
path.stages.append(glyph_stage)
# Stage 5: Mist (final compression - proto-thought)
mist_id = f"mist_{glyph_id[7:]}" # Remove mglyph_ prefix
mist = {
'id': mist_id,
'source_glyph': glyph_id,
'proto_thought': f"[Proto] {bc.coordinates.realm}...",
'evaporation_temp': 0.7,
'mythic_weight': affect_intensity,
'technical_clarity': 0.6,
'luminosity': glyph['heat_seed'] * 0.7, # Final decay
# Preserve just enough for reconstruction
'recovery_breadcrumbs': {
'original_realm': bc.coordinates.realm,
'original_lineage': bc.coordinates.lineage,
'original_embedding': glyph['embedding'],
},
}
self.mist_store[mist_id] = mist
mist_size = len(json.dumps(mist))
mist_stage = CompressionStage(
stage_name="mist",
size_bytes=mist_size,
record_count=1,
key_metadata={
'mist_id': mist_id,
'recovery_breadcrumbs': mist['recovery_breadcrumbs'],
'luminosity': mist['luminosity'],
},
luminosity=mist['luminosity'],
provenance_intact=True, # Breadcrumbs preserve some info
)
path.stages.append(mist_stage)
# Calculate path statistics
path.final_compression_ratio = path.original_serialized_size / max(mist_size, 1)
path.luminosity_final = mist['luminosity']
# Attempt reconstruction
path = self._reconstruct_from_mist(path, mist)
return path
def _reconstruct_from_mist(
self,
path: BitChainCompressionPath,
mist: Dict[str, Any]
) -> BitChainCompressionPath:
"""Attempt to reconstruct STAT7 coordinates from mist form."""
try:
breadcrumbs = mist.get('recovery_breadcrumbs', {})
# Try to recover coordinates
realm = breadcrumbs.get('original_realm', 'void')
lineage = breadcrumbs.get('original_lineage', 0)
# Reconstruct a coordinate estimate (using actual STAT7 fields)
reconstructed_coords = Coordinates(
realm=realm,
lineage=lineage,
adjacency=[], # Lost
horizon='crystallization', # Assume final state
velocity=mist['luminosity'], # Decayed velocity
resonance=mist.get('mythic_weight', 0.0), # From affect
density=0.0, # Lost
)
# Can we expand completely?
all_fields_present = all([
realm != 'void',
lineage > 0,
mist.get('luminosity', 0) > 0,
])
# Narrative preserved if embedding survives
embedding = breadcrumbs.get('original_embedding', [])
narrative_preserved = len(embedding) > 0
# Check coordinate accuracy
original_coords = path.original_stat7_dict
fields_recovered = 0
total_fields = 7 # realm, lineage, adjacency, horizon, velocity, resonance, density
if realm == original_coords.get('realm'):
fields_recovered += 1
if lineage == original_coords.get('lineage'):
fields_recovered += 1
if narrative_preserved: # Embedding presence counts
fields_recovered += 1
path.coordinate_match_accuracy = fields_recovered / total_fields
path.can_expand_completely = all_fields_present
path.narrative_preserved = narrative_preserved
path.provenance_chain_complete = True # Breadcrumbs preserved it
path.luminosity_final = mist['luminosity']
except Exception as e:
print(f" Reconstruction failed: {e}")
path.coordinate_match_accuracy = 0.0
path.can_expand_completely = False
return path
# ============================================================================
# VALIDATION EXPERIMENT ORCHESTRATION
# ============================================================================
def run_compression_expansion_test(
num_bitchains: int = 100,
show_samples: bool = True
) -> CompressionExperimentResults:
"""
Run EXP-05: Compression/Expansion Losslessness Validation
Args:
num_bitchains: Number of random bit-chains to compress
show_samples: Whether to print detailed sample compression paths
Returns:
Complete results object
"""
start_time = datetime.now(timezone.utc).isoformat()
overall_start = time.time()
print("\n" + "=" * 80)
print("EXP-05: COMPRESSION/EXPANSION LOSSLESSNESS VALIDATION")
print("=" * 80)
print(f"Testing {num_bitchains} random bit-chains through full compression pipeline")
print()
pipeline = CompressionPipeline()
compression_paths: List[BitChainCompressionPath] = []
print(f"Compressing bit-chains...")
print("-" * 80)
for i in range(num_bitchains):
# Generate random bit-chain
bc = generate_random_bitchain()
# Compress through pipeline
path = pipeline.compress_bitchain(bc)
compression_paths.append(path)
if (i + 1) % 25 == 0:
print(f" [OK] Processed {i + 1}/{num_bitchains} bit-chains")
print()
# Show sample paths if requested
if show_samples and compression_paths:
print("=" * 80)
print("SAMPLE COMPRESSION PATHS (First 3)")
print("=" * 80)
for path in compression_paths[:3]:
print(f"\nBit-Chain: {path.original_bitchain.id[:12]}...")
print(f" Original STAT7: {path.original_stat7_dict['realm']} gen={path.original_stat7_dict['lineage']}")
print(f" Original Address: {path.original_address[:32]}...")
print(f" Original Size: {path.original_serialized_size} bytes")
print(f" Original Luminosity: {path.original_luminosity:.4f}")
print()
for stage in path.stages:
print(f" Stage: {stage.stage_name:12} | Size: {stage.size_bytes:6} bytes | Luminosity: {stage.luminosity:.4f}")
print(f" Final Compression Ratio: {path.final_compression_ratio:.2f}x")
print(f" Coordinate Accuracy: {path.coordinate_match_accuracy:.1%}")
print(f" Expandable: {'[Y]' if path.can_expand_completely else '[N]'}")
print(f" Provenance: {'[Y]' if path.provenance_chain_complete else '[N]'}")
print(f" Narrative: {'[Y]' if path.narrative_preserved else '[N]'}")
# Compute aggregate metrics
print()
print("=" * 80)
print("AGGREGATE METRICS")
print("=" * 80)
compression_ratios = [p.final_compression_ratio for p in compression_paths]
luminosity_decays = [p.original_luminosity - p.luminosity_final for p in compression_paths]
coord_accuracies = [p.coordinate_match_accuracy for p in compression_paths]
avg_compression_ratio = statistics.mean(compression_ratios)
avg_luminosity_decay = statistics.mean(luminosity_decays)
avg_coordinate_accuracy = statistics.mean(coord_accuracies)
percent_provenance = (
sum(1 for p in compression_paths if p.provenance_chain_complete) / len(compression_paths) * 100
)
percent_narrative = (
sum(1 for p in compression_paths if p.narrative_preserved) / len(compression_paths) * 100
)
percent_expandable = (
sum(1 for p in compression_paths if p.can_expand_completely) / len(compression_paths) * 100
)
print(f"Average Compression Ratio: {avg_compression_ratio:.3f}x")
print(f"Average Luminosity Decay: {avg_luminosity_decay:.4f}")
print(f"Average Coordinate Accuracy: {avg_coordinate_accuracy:.1%}")
print(f"Provenance Integrity: {percent_provenance:.1f}%")
print(f"Narrative Preservation: {percent_narrative:.1f}%")
print(f"Expandability: {percent_expandable:.1f}%")
print()
# Determine if system is lossless
is_lossless = (
percent_provenance == 100.0 and
percent_narrative >= 90.0 and
avg_coordinate_accuracy >= 0.4 # At least ~3 out of 7 fields recoverable
)
# Generate findings
major_findings = []
if percent_provenance == 100.0:
major_findings.append("[OK] Provenance chain maintained through all compression stages")
else:
major_findings.append(f"[WARN] Provenance loss detected ({100-percent_provenance:.1f}% affected)")
if percent_narrative >= 90.0:
major_findings.append("[OK] Narrative meaning preserved via embeddings and affect")
else:
major_findings.append(f"[WARN] Narrative degradation observed ({100-percent_narrative:.1f}% affected)")
if avg_coordinate_accuracy >= 0.4:
major_findings.append(f"[OK] STAT7 coordinates partially recoverable ({avg_coordinate_accuracy:.1%})")
else:
major_findings.append(f"[FAIL] STAT7 coordinate recovery insufficient ({avg_coordinate_accuracy:.1%})")
if avg_compression_ratio >= 2.0:
major_findings.append(f"[OK] Effective compression achieved ({avg_compression_ratio:.2f}x)")
else:
major_findings.append(f"[WARN] Compression ratio modest ({avg_compression_ratio:.2f}x)")
luminosity_retention = (1.0 - avg_luminosity_decay) * 100
if luminosity_retention >= 70.0:
major_findings.append(f"[OK] Luminosity retained through compression ({luminosity_retention:.1f}%)")
else:
major_findings.append(f"[WARN] Luminosity decay significant ({100-luminosity_retention:.1f}% loss)")
overall_end = time.time()
end_time = datetime.now(timezone.utc).isoformat()
print("=" * 80)
print("LOSSLESSNESS ANALYSIS")
print("=" * 80)
print(f"Lossless System: {'[YES]' if is_lossless else '[NO]'}")
print()
for finding in major_findings:
print(f" {finding}")
print()
results = CompressionExperimentResults(
start_time=start_time,
end_time=end_time,
total_duration_seconds=overall_end - overall_start,
num_bitchains_tested=num_bitchains,
compression_paths=compression_paths,
avg_compression_ratio=avg_compression_ratio,
avg_luminosity_decay=avg_luminosity_decay,
avg_coordinate_accuracy=avg_coordinate_accuracy,
percent_provenance_intact=percent_provenance,
percent_narrative_preserved=percent_narrative,
percent_expandable=percent_expandable,
is_lossless=is_lossless,
major_findings=major_findings,
)
return results
# ============================================================================
# CLI & RESULTS PERSISTENCE
# ============================================================================
def save_results(results: CompressionExperimentResults, output_file: str = None) -> str:
"""Save results to JSON file."""
if output_file is None:
timestamp = datetime.now(timezone.utc).strftime('%Y%m%d_%H%M%S')
output_file = f"exp05_compression_expansion_{timestamp}.json"
results_dir = Path(__file__).resolve().parent / 'results'
results_dir.mkdir(exist_ok=True)
output_path = str(results_dir / output_file)
with open(output_path, 'w') as f:
json.dump(results.to_dict(), f, indent=2)
print(f"Results saved to: {output_path}")
return output_path
if __name__ == '__main__':
num_bitchains = 100
if '--quick' in sys.argv:
num_bitchains = 20
elif '--full' in sys.argv:
num_bitchains = 500
try:
results = run_compression_expansion_test(num_bitchains=num_bitchains)
output_file = save_results(results)
print("\n" + "=" * 80)
print(f"[OK] EXP-05 COMPLETE")
print("=" * 80)
print(f"Results: {output_file}")
print()
except Exception as e:
print(f"\n[FAIL] EXPERIMENT FAILED: {e}")
import traceback
traceback.print_exc()
sys.exit(1)