Spaces:

Locolab
/

Lower-Limb-Similarity-Analysis

Runtime error

App Files Files Community

jmontp commited on Sep 21, 2025

Commit

43ec583

1 Parent(s): d1a6270

Updated to new data and multivaraite lib api

Browse files

Files changed (12) hide show

__pycache__/shared_styling.cpython-312.pyc +0 -0
cached_data/precalculated_stats.pkl.gz +2 -2
config.py +58 -15
gpu_overlap.py +892 -0
multivariate_gaussian_overlap.py +0 -0
numba_overlap.py +443 -0
pages/02_Tool.py +28 -15
plot_similarity.py +4 -28
plot_styling.py +122 -7
requirements.txt +2 -1
shared_styling.py +122 -7
st_logs/dashboard_access.log +0 -0

__pycache__/shared_styling.cpython-312.pyc DELETED Viewed

Binary file (32.5 kB)

cached_data/precalculated_stats.pkl.gz CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:030a03b477c02ead69f0b0c83bfab2133d7f8bb9e2f81ab3ae09cb21b27fbd93
-size 5573257

 version https://git-lfs.github.com/spec/v1
+oid sha256:be3355088c6913cdb552802f2e3aa139abed2a3a79441895d31d08ff5f9cd23a
+size 25443689

config.py CHANGED Viewed

@@ -1,18 +1,22 @@
 # Configuration file for dashboard and preprocessing scripts
 AVAILABLE_SENSORS = [
     'hip_angle_s_r', 'hip_angle_s_l', 'hip_vel_s_r', 'hip_vel_s_l',
     'knee_angle_s_r', 'knee_angle_s_l', 'knee_vel_s_r', 'knee_vel_s_l',
     'ankle_angle_s_r', 'ankle_angle_s_l', 'ankle_vel_s_r', 'ankle_vel_s_l',
     'foot_angle_s_r', 'foot_angle_s_l', 'foot_vel_s_r', 'foot_vel_s_l',
-    # Add segment angles and velocities
-    'pelvis_angle_s_r', 'pelvis_angle_s_l', 'pelvis_vel_s_r', 'pelvis_vel_s_l',
-    'hip_angle_f_r', 'hip_angle_f_l', 'hip_vel_f_r', 'hip_vel_f_l',
-    'knee_angle_f_r', 'knee_angle_f_l', 'knee_vel_f_r', 'knee_vel_f_l',
-    'ankle_angle_f_r', 'ankle_angle_f_l', 'ankle_vel_f_r', 'ankle_vel_f_l',
-    'hip_angle_t_r', 'hip_angle_t_l', 'hip_vel_t_r', 'hip_vel_t_l',
-    'knee_angle_t_r', 'knee_angle_t_l', 'knee_vel_t_r', 'knee_vel_t_l',
-    'ankle_angle_t_r', 'ankle_angle_t_l', 'ankle_vel_t_r', 'ankle_vel_t_l'
 ]
 AVAILABLE_TASKS = ['decline_walking', 'level_walking', 'incline_walking',
@@ -26,24 +30,63 @@ ANALYSIS_ABSTRACTION_LEVELS = ['High', 'Medium/Low']
 # Task configurations for pre-calculation and analysis
 LOW_LEVEL_TASKS = [
-    ('stair_descent', None, None),
-    ('stair_ascent', None, None),
-    ('sit_to_stand', None, None),
     ('level_walking', 0.0, 0.8),
     ('level_walking', 0.0, 1.0),
     ('level_walking', 0.0, 1.2),
-    # Gtech variants
     ('level_walking', 0.0, 0.6),
     ('level_walking', 0.0, 1.8),
     ('incline_walking', 5.0, 0.8),
     ('incline_walking', 5.0, 1.0),
     ('incline_walking', 5.0, 1.2),
-    ('decline_walking', -5.0, 0.8),
-    ('decline_walking', -5.0, 1.0),
-    ('decline_walking', -5.0, 1.2),
     ('incline_walking', 10.0, 0.8),
     ('incline_walking', 10.0, 1.0),
     ('incline_walking', 10.0, 1.2),
     ('decline_walking', -10.0, 0.8),
     ('decline_walking', -10.0, 1.0),
     ('decline_walking', -10.0, 1.2),

 # Configuration file for dashboard and preprocessing scripts
+# Data source control
+USE_FILTERED_DATA = True   # If True: use *_filtered.parquet files (standard speeds only)
+                           # If False: use original .parquet files (all speeds, more data)
 AVAILABLE_SENSORS = [
+    # Core sagittal plane sensors (available in both UMich and GTech)
     'hip_angle_s_r', 'hip_angle_s_l', 'hip_vel_s_r', 'hip_vel_s_l',
     'knee_angle_s_r', 'knee_angle_s_l', 'knee_vel_s_r', 'knee_vel_s_l',
     'ankle_angle_s_r', 'ankle_angle_s_l', 'ankle_vel_s_r', 'ankle_vel_s_l',
     'foot_angle_s_r', 'foot_angle_s_l', 'foot_vel_s_r', 'foot_vel_s_l',
+    # Pelvis and trunk angles (available in both datasets)
+    'pelvis_angle_s_r', 'pelvis_angle_s_l',
+    # Calculated segment sensors (computed by loader.py)
+    'shank_angle_s_r', 'shank_angle_s_l', 'shank_vel_s_r', 'shank_vel_s_l',
+    'thigh_angle_s_r', 'thigh_angle_s_l', 'thigh_vel_s_r', 'thigh_vel_s_l',
+    # Ground reaction forces (keeping only vertical right GRF)
+    'grf_y_r',
 ]
 AVAILABLE_TASKS = ['decline_walking', 'level_walking', 'incline_walking',
 # Task configurations for pre-calculation and analysis
 LOW_LEVEL_TASKS = [
+    # Stair tasks - using incline field for height in mm, speed is None (self-selected)
+    # Original GTech stairs (102mm)
+    ('stair_descent', 102.0, None),
+    ('stair_ascent', 102.0, None),
+    # Additional stair heights (new datasets)
+    ('stair_descent', 127.0, None),
+    ('stair_ascent', 127.0, None),
+    ('stair_descent', 152.0, None),
+    ('stair_ascent', 152.0, None),
+    ('stair_descent', 178.0, None),
+    ('stair_ascent', 178.0, None),
+    # Level walking - UMich steady-state speeds
     ('level_walking', 0.0, 0.8),
     ('level_walking', 0.0, 1.0),
     ('level_walking', 0.0, 1.2),
+    # Level walking - All GTech speeds
+    ('level_walking', 0.0, 0.5),
+    ('level_walking', 0.0, 0.55),
     ('level_walking', 0.0, 0.6),
+    ('level_walking', 0.0, 0.65),
+    ('level_walking', 0.0, 0.7),
+    ('level_walking', 0.0, 0.75),
+    ('level_walking', 0.0, 0.85),
+    ('level_walking', 0.0, 0.9),
+    ('level_walking', 0.0, 0.95),
+    ('level_walking', 0.0, 1.05),
+    ('level_walking', 0.0, 1.1),
+    ('level_walking', 0.0, 1.15),
+    ('level_walking', 0.0, 1.25),
+    ('level_walking', 0.0, 1.3),
+    ('level_walking', 0.0, 1.35),
+    ('level_walking', 0.0, 1.4),
+    ('level_walking', 0.0, 1.45),
+    ('level_walking', 0.0, 1.5),
+    ('level_walking', 0.0, 1.55),
+    ('level_walking', 0.0, 1.6),
+    ('level_walking', 0.0, 1.65),
+    ('level_walking', 0.0, 1.7),
+    ('level_walking', 0.0, 1.75),
     ('level_walking', 0.0, 1.8),
+    ('level_walking', 0.0, 1.85),
+    # Incline walking - UMich speeds
     ('incline_walking', 5.0, 0.8),
     ('incline_walking', 5.0, 1.0),
     ('incline_walking', 5.0, 1.2),
     ('incline_walking', 10.0, 0.8),
     ('incline_walking', 10.0, 1.0),
     ('incline_walking', 10.0, 1.2),
+    # Decline walking - UMich speeds
+    ('decline_walking', -5.0, 0.8),
+    ('decline_walking', -5.0, 1.0),
+    ('decline_walking', -5.0, 1.2),
     ('decline_walking', -10.0, 0.8),
     ('decline_walking', -10.0, 1.0),
     ('decline_walking', -10.0, 1.2),

gpu_overlap.py ADDED Viewed

	@@ -0,0 +1,892 @@

+#!/usr/bin/env python3
+"""
+GPU-accelerated implementation of multivariate Gaussian overlap calculation using CuPy.
+This provides massive speedup for large-scale analyses by processing all task pairs simultaneously.
+"""
+import numpy as np
+import warnings
+from typing import Optional
+# Try to import CuPy for GPU acceleration
+try:
+    import cupy as cp
+    GPU_AVAILABLE = True
+    print("✅ CuPy GPU acceleration available")
+except ImportError:
+    GPU_AVAILABLE = False
+    cp = None
+    print("⚠️  CuPy not available. Install with: pip install cupy-cuda12x")
+# Check for CUDA availability
+if GPU_AVAILABLE:
+    try:
+        # Test if CUDA is actually available
+        device = cp.cuda.Device(0)
+        GPU_READY = True
+        print(f"🚀 GPU ready: Device {device.id} (RTX detected)")
+    except:
+        GPU_READY = False
+        GPU_AVAILABLE = False
+        print("⚠️  CUDA not available, disabling GPU acceleration")
+else:
+    GPU_READY = False
+def compute_overlap_batch_gpu(means1_batch, vars1_batch, means2_batch, vars2_batch,
+                             tol=1e-12, biomechanical_filter=False):
+    """
+    GPU-accelerated batch overlap computation using CuPy.
+    Processes all subjects simultaneously with full GPU vectorization.
+    This is the "throw everything in" approach for maximum GPU utilization.
+    Parameters:
+        means1_batch: np.ndarray shape (n_subjects, 150, n_features)
+        vars1_batch: np.ndarray shape (n_subjects, 150, n_features)
+        means2_batch: np.ndarray shape (n_subjects, 150, n_features)
+        vars2_batch: np.ndarray shape (n_subjects, 150, n_features)
+        tol: float, tolerance for variance validity
+        biomechanical_filter: bool, apply biomechanical filtering
+    Returns:
+        np.ndarray shape (n_subjects, 150, 150) - overlap values
+    """
+    if not GPU_AVAILABLE:
+        raise RuntimeError("CuPy not available for GPU computation")
+    n_subjects, n_phases, n_features = means1_batch.shape
+    # Transfer to GPU - single transfer for all data
+    means1_gpu = cp.asarray(means1_batch, dtype=cp.float32)
+    vars1_gpu = cp.asarray(vars1_batch, dtype=cp.float32)
+    means2_gpu = cp.asarray(means2_batch, dtype=cp.float32)
+    vars2_gpu = cp.asarray(vars2_batch, dtype=cp.float32)
+    # Pre-allocate output on GPU
+    overlap_batch_gpu = cp.zeros((n_subjects, 150, 150), dtype=cp.float32)
+    # CRITICAL OPTIMIZATION: Use broadcasting to compute ALL phase pairs at once
+    # Shape transformations for broadcasting:
+    # means1: (n_subjects, 150, 1, n_features) - for phase_i
+    # means2: (n_subjects, 1, 150, n_features) - for phase_j
+    # Result: (n_subjects, 150, 150, n_features) - all pairs
+    means1_exp = means1_gpu[:, :, cp.newaxis, :]  # (n_subjects, 150, 1, n_features)
+    vars1_exp = vars1_gpu[:, :, cp.newaxis, :]
+    means2_exp = means2_gpu[:, cp.newaxis, :, :]  # (n_subjects, 1, 150, n_features)
+    vars2_exp = vars2_gpu[:, cp.newaxis, :, :]
+    # Compute all differences and variance sums simultaneously
+    diff = means1_exp - means2_exp  # Shape: (n_subjects, 150, 150, n_features)
+    var_sum = vars1_exp + vars2_exp  # Shape: (n_subjects, 150, 150, n_features)
+    # NaN handling: Create validity mask
+    valid_mask = (~cp.isnan(diff).any(axis=3) &
+                  ~cp.isnan(var_sum).any(axis=3) &
+                  (var_sum > tol).all(axis=3))  # Shape: (n_subjects, 150, 150)
+    # Compute quadratic form for valid entries only
+    # Use where to avoid division by zero
+    quad_terms = cp.where(valid_mask[:, :, :, cp.newaxis],
+                         diff * diff / var_sum,
+                         0.0)  # Shape: (n_subjects, 150, 150, n_features)
+    # Sum over features
+    quad_sum = cp.sum(quad_terms, axis=3)  # Shape: (n_subjects, 150, 150)
+    # Apply exponential with underflow protection
+    # Only compute exp for valid entries with reasonable values
+    safe_exp_mask = valid_mask & (quad_sum * 0.5 <= 20.0)
+    overlap_batch_gpu = cp.where(safe_exp_mask,
+                                cp.exp(-0.5 * quad_sum),
+                                0.0)
+    # Apply biomechanical filtering if requested
+    if biomechanical_filter:
+        overlap_batch_gpu = _apply_biomechanical_filter_gpu(
+            overlap_batch_gpu, means1_gpu, vars1_gpu, means2_gpu, vars2_gpu, tol
+        )
+    # Transfer back to CPU - single transfer
+    result = cp.asnumpy(overlap_batch_gpu).astype(np.float64)
+    # Final clipping on CPU
+    np.clip(result, 0.0, 1.0, out=result)
+    return result
+def _apply_biomechanical_filter_gpu(overlap_batch, means1_batch, vars1_batch,
+                                   means2_batch, vars2_batch, tol):
+    """Apply biomechanical filtering on GPU using vectorized operations."""
+    n_subjects = overlap_batch.shape[0]
+    negligible_threshold = 0.1
+    ampable_threshold = 0.2
+    ci_factor = 1.96
+    # Only process first feature (torque) for biomechanical filtering
+    means1_torque = means1_batch[:, :, 0]  # Shape: (n_subjects, 150)
+    means2_torque = means2_batch[:, :, 0]
+    vars1_torque = vars1_batch[:, :, 0]
+    vars2_torque = vars2_batch[:, :, 0]
+    # Vectorized std and CI calculations
+    std1 = cp.sqrt(vars1_torque)
+    std2 = cp.sqrt(vars2_torque)
+    ci_lo1 = means1_torque - ci_factor * std1
+    ci_hi1 = means1_torque + ci_factor * std1
+    ci_lo2 = means2_torque - ci_factor * std2
+    ci_hi2 = means2_torque + ci_factor * std2
+    # Vectorized mask computation
+    negligible1 = ((ci_lo1 >= -negligible_threshold) &
+                   (ci_hi1 <= negligible_threshold))  # Shape: (n_subjects, 150)
+    negligible2 = ((ci_lo2 >= -negligible_threshold) &
+                   (ci_hi2 <= negligible_threshold))
+    ampable1 = cp.abs(means1_torque) > ampable_threshold
+    ampable2 = cp.abs(means2_torque) > ampable_threshold
+    # Broadcast to phase pair dimensions using newaxis
+    neg1_exp = negligible1[:, :, cp.newaxis]  # (n_subjects, 150, 1)
+    neg2_exp = negligible2[:, cp.newaxis, :]  # (n_subjects, 1, 150)
+    amp1_exp = ampable1[:, :, cp.newaxis]
+    amp2_exp = ampable2[:, cp.newaxis, :]
+    # Three-level filtering masks
+    # Negligible-negligible: Both torques are negligible
+    m0 = neg1_exp & neg2_exp  # Shape: (n_subjects, 150, 150)
+    # Amplitude conflicts: One negligible, other ampable
+    m1 = ((neg1_exp & amp2_exp) | (neg2_exp & amp1_exp))
+    # Sign reversal cases: Neither negligible-negligible nor amplitude conflict
+    m2 = ~(m0 | m1)
+    # Apply negligible-negligible rule (set to 1.0)
+    overlap_batch = cp.where(m0, 1.0, overlap_batch)
+    # Apply sign reversal filtering for m2 cases
+    if cp.any(m2):
+        # Get indices where filtering is needed
+        s_idx, i_idx, j_idx = cp.where(m2)
+        if len(s_idx) > 0:
+            # Vectorized probability calculations
+            std1_safe = cp.maximum(std1, tol)
+            std2_safe = cp.maximum(std2, tol)
+            z1 = means1_torque / std1_safe
+            z2 = means2_torque / std2_safe
+            # Normal CDF approximation (vectorized)
+            def norm_cdf_gpu(x):
+                # Abramowitz and Stegun approximation
+                t = 1.0 / (1.0 + 0.2316419 * cp.abs(x))
+                d = 0.3989423 * cp.exp(-x * x / 2.0)
+                prob = d * t * (0.3193815 + t * (-0.3565638 +
+                               t * (1.781478 + t * (-1.821256 + t * 1.330274))))
+                return cp.where(x > 0, 1.0 - prob, prob)
+            Ppos1 = norm_cdf_gpu(z1)
+            Ppos2 = norm_cdf_gpu(z2)
+            # Sign-mismatch probability for selected indices
+            Pdiff_sign = (Ppos1[s_idx, i_idx] * (1.0 - Ppos2[s_idx, j_idx]) +
+                         (1.0 - Ppos1[s_idx, i_idx]) * Ppos2[s_idx, j_idx])
+            # Mean-difference penalty (vectorized ramp function)
+            mean_diff = cp.abs(means1_torque[s_idx, i_idx] - means2_torque[s_idx, j_idx])
+            s_thresh, e_thresh = 0.2, 0.5
+            # Linear ramp penalty
+            penalty = cp.clip((mean_diff - s_thresh) / (e_thresh - s_thresh), 0.0, 1.0)
+            # Combine penalties
+            Pdiff = cp.maximum(Pdiff_sign, penalty)
+            # Apply penalty to overlaps
+            current_overlaps = overlap_batch[s_idx, i_idx, j_idx]
+            output_diff = 1.0 - current_overlaps
+            scaled_output_diff = output_diff * Pdiff
+            overlap_batch[s_idx, i_idx, j_idx] = 1.0 - scaled_output_diff
+    return overlap_batch
+def compute_overlap_batch_gpu_chunked(means1_batch, vars1_batch, means2_batch, vars2_batch,
+                                     chunk_size=None, **kwargs):
+    """
+    Chunked GPU processing for very large datasets that don't fit in GPU memory.
+    Automatically determines optimal chunk size based on available GPU memory.
+    """
+    if not GPU_AVAILABLE:
+        raise RuntimeError("CuPy not available for GPU computation")
+    n_subjects = means1_batch.shape[0]
+    if chunk_size is None:
+        # Estimate chunk size based on GPU memory
+        mempool = cp.get_default_memory_pool()
+        available_memory = mempool.free_bytes()
+        # Rough estimate: each subject needs ~150*150*4 bytes for overlap + input arrays
+        bytes_per_subject = 150 * 150 * 4 * 6  # 6 arrays (means1, vars1, means2, vars2, overlap, temp)
+        estimated_chunk_size = max(1, int(available_memory * 0.8 // bytes_per_subject))
+        chunk_size = min(estimated_chunk_size, n_subjects)
+        print(f"🔧 Auto-determined GPU chunk size: {chunk_size} subjects")
+    if chunk_size >= n_subjects:
+        # Process all at once
+        return compute_overlap_batch_gpu(means1_batch, vars1_batch,
+                                        means2_batch, vars2_batch, **kwargs)
+    # Process in chunks
+    results = []
+    for i in range(0, n_subjects, chunk_size):
+        end_idx = min(i + chunk_size, n_subjects)
+        chunk_result = compute_overlap_batch_gpu(
+            means1_batch[i:end_idx],
+            vars1_batch[i:end_idx],
+            means2_batch[i:end_idx],
+            vars2_batch[i:end_idx],
+            **kwargs
+        )
+        results.append(chunk_result)
+    return np.concatenate(results, axis=0)
+def benchmark_gpu_vs_cpu():
+    """Benchmark GPU vs CPU performance on sample data."""
+    if not GPU_AVAILABLE:
+        print("GPU not available for benchmarking")
+        return
+    import time
+    # Create test data
+    n_subjects = 10
+    n_features = 4
+    print(f"🔧 Benchmarking with {n_subjects} subjects, {n_features} features...")
+    means1 = np.random.randn(n_subjects, 150, n_features).astype(np.float32)
+    vars1 = np.abs(np.random.randn(n_subjects, 150, n_features)).astype(np.float32) + 0.1
+    means2 = np.random.randn(n_subjects, 150, n_features).astype(np.float32)
+    vars2 = np.abs(np.random.randn(n_subjects, 150, n_features)).astype(np.float32) + 0.1
+    # Warm up GPU
+    if GPU_AVAILABLE:
+        _ = compute_overlap_batch_gpu(means1[:2], vars1[:2], means2[:2], vars2[:2])
+    # Benchmark GPU
+    if GPU_AVAILABLE:
+        start = time.time()
+        result_gpu = compute_overlap_batch_gpu(means1, vars1, means2, vars2)
+        gpu_time = time.time() - start
+        print(f"🚀 GPU time: {gpu_time:.4f} seconds")
+    else:
+        result_gpu = None
+        gpu_time = float('inf')
+    # Benchmark CPU (Numba fallback)
+    try:
+        from .numba_overlap import compute_overlap_batch
+        start = time.time()
+        result_cpu = compute_overlap_batch(means1, vars1, means2, vars2)
+        cpu_time = time.time() - start
+        print(f"🔧 CPU time: {cpu_time:.4f} seconds")
+        if GPU_AVAILABLE and result_gpu is not None:
+            speedup = cpu_time / gpu_time
+            print(f"📈 GPU Speedup: {speedup:.1f}x")
+            # Check accuracy
+            max_diff = np.max(np.abs(result_gpu.astype(np.float64) - result_cpu))
+            print(f"🎯 Max difference: {max_diff:.2e}")
+    except ImportError:
+        print("❌ Numba not available for CPU comparison")
+def compute_overlap_batch_gpu_mega(all_means1_batch, all_vars1_batch, all_means2_batch, all_vars2_batch,
+                                   valid_mask, tol=1e-12, biomechanical_filter=False):
+    """
+    MEGA-BATCH GPU computation: Process ALL task pairs simultaneously.
+    This is the ultimate "throw everything in" approach for maximum GPU utilization.
+    Processes hundreds of task pairs × subjects × phase pairs in a single GPU call.
+    Parameters:
+        all_means1_batch: np.ndarray shape (n_task_pairs, n_subjects_max, 150, n_features)
+        all_vars1_batch: np.ndarray shape (n_task_pairs, n_subjects_max, 150, n_features)
+        all_means2_batch: np.ndarray shape (n_task_pairs, n_subjects_max, 150, n_features)
+        all_vars2_batch: np.ndarray shape (n_task_pairs, n_subjects_max, 150, n_features)
+        valid_mask: np.ndarray shape (n_task_pairs, n_subjects_max) - bool mask for valid subjects
+        tol: float, tolerance for variance validity
+        biomechanical_filter: bool, apply biomechanical filtering
+    Returns:
+        np.ndarray shape (n_task_pairs, n_subjects_max, 150, 150) - overlap values
+    """
+    if not GPU_AVAILABLE:
+        raise RuntimeError("CuPy not available for mega-batch GPU computation")
+    n_task_pairs, n_subjects_max, n_phases, n_features = all_means1_batch.shape
+    print(f"🚀 GPU Mega-batch: Processing {n_task_pairs} task pairs × {n_subjects_max} subjects × {150*150} phase pairs")
+    print(f"📊 Total computations: {n_task_pairs * n_subjects_max * 150 * 150:,}")
+    # Transfer ALL data to GPU in single transfer
+    means1_gpu = cp.asarray(all_means1_batch, dtype=cp.float32)
+    vars1_gpu = cp.asarray(all_vars1_batch, dtype=cp.float32)
+    means2_gpu = cp.asarray(all_means2_batch, dtype=cp.float32)
+    vars2_gpu = cp.asarray(all_vars2_batch, dtype=cp.float32)
+    valid_gpu = cp.asarray(valid_mask, dtype=cp.bool_)
+    # Pre-allocate output on GPU
+    overlap_batch_gpu = cp.zeros((n_task_pairs, n_subjects_max, 150, 150), dtype=cp.float32)
+    # MEGA BROADCASTING: Process ALL task pairs and subjects simultaneously
+    # Shape transformations for 5D broadcasting:
+    # (n_task_pairs, n_subjects_max, 150, 1, n_features) vs (n_task_pairs, n_subjects_max, 1, 150, n_features)
+    means1_exp = means1_gpu[:, :, :, cp.newaxis, :]  # Add phase_j dimension
+    vars1_exp = vars1_gpu[:, :, :, cp.newaxis, :]
+    means2_exp = means2_gpu[:, :, cp.newaxis, :, :]  # Add phase_i dimension
+    vars2_exp = vars2_gpu[:, :, cp.newaxis, :, :]
+    # Compute ALL differences and variance sums simultaneously
+    # Shape: (n_task_pairs, n_subjects_max, 150, 150, n_features)
+    diff = means1_exp - means2_exp
+    var_sum = vars1_exp + vars2_exp
+    # Create mega validity mask
+    # Shape: (n_task_pairs, n_subjects_max, 150, 150)
+    subject_valid = valid_gpu[:, :, cp.newaxis, cp.newaxis]  # Broadcast to all phase pairs
+    # NaN and variance validity for ALL data simultaneously
+    nan_valid = (~cp.isnan(diff).any(axis=4) &
+                 ~cp.isnan(var_sum).any(axis=4) &
+                 (var_sum > tol).all(axis=4))
+    # Combined validity mask
+    full_valid_mask = subject_valid & nan_valid
+    # Compute quadratic form for ALL valid entries
+    quad_terms = cp.where(full_valid_mask[:, :, :, :, cp.newaxis],
+                         diff * diff / var_sum,
+                         0.0)
+    # Sum over features for ALL task pairs simultaneously
+    quad_sum = cp.sum(quad_terms, axis=4)  # Shape: (n_task_pairs, n_subjects_max, 150, 150)
+    # Apply exponential with underflow protection
+    safe_exp_mask = full_valid_mask & (quad_sum * 0.5 <= 20.0)
+    overlap_batch_gpu = cp.where(safe_exp_mask,
+                                cp.exp(-0.5 * quad_sum),
+                                0.0)
+    # Apply biomechanical filtering if requested
+    if biomechanical_filter:
+        overlap_batch_gpu = _apply_biomechanical_filter_gpu_mega(
+            overlap_batch_gpu, means1_gpu, vars1_gpu, means2_gpu, vars2_gpu, valid_gpu, tol
+        )
+    # Transfer back to CPU - single transfer for ALL results
+    print("📥 Transferring results from GPU...")
+    result = cp.asnumpy(overlap_batch_gpu).astype(np.float64)
+    # Final clipping
+    np.clip(result, 0.0, 1.0, out=result)
+    print(f"✅ Mega-batch GPU computation complete!")
+    return result
+def _apply_biomechanical_filter_gpu_mega(overlap_batch, means1_batch, vars1_batch,
+                                        means2_batch, vars2_batch, valid_mask, tol):
+    """Apply biomechanical filtering for mega-batch on GPU."""
+    negligible_threshold = 0.1
+    ampable_threshold = 0.2
+    ci_factor = 1.96
+    n_task_pairs, n_subjects_max = overlap_batch.shape[:2]
+    # Only process first feature (torque) for biomechanical filtering
+    means1_torque = means1_batch[:, :, :, 0]  # Shape: (n_task_pairs, n_subjects_max, 150)
+    means2_torque = means2_batch[:, :, :, 0]
+    vars1_torque = vars1_batch[:, :, :, 0]
+    vars2_torque = vars2_batch[:, :, :, 0]
+    # Vectorized std and CI calculations for ALL task pairs
+    std1 = cp.sqrt(vars1_torque)
+    std2 = cp.sqrt(vars2_torque)
+    ci_lo1 = means1_torque - ci_factor * std1
+    ci_hi1 = means1_torque + ci_factor * std1
+    ci_lo2 = means2_torque - ci_factor * std2
+    ci_hi2 = means2_torque + ci_factor * std2
+    # Vectorized mask computation for ALL task pairs
+    negligible1 = ((ci_lo1 >= -negligible_threshold) &
+                   (ci_hi1 <= negligible_threshold))
+    negligible2 = ((ci_lo2 >= -negligible_threshold) &
+                   (ci_hi2 <= negligible_threshold))
+    ampable1 = cp.abs(means1_torque) > ampable_threshold
+    ampable2 = cp.abs(means2_torque) > ampable_threshold
+    # Broadcast to phase pair dimensions
+    # Shape: (n_task_pairs, n_subjects_max, 150, 1)
+    neg1_exp = negligible1[:, :, :, cp.newaxis]
+    amp1_exp = ampable1[:, :, :, cp.newaxis]
+    # Shape: (n_task_pairs, n_subjects_max, 1, 150)
+    neg2_exp = negligible2[:, :, cp.newaxis, :]
+    amp2_exp = ampable2[:, :, cp.newaxis, :]
+    # Apply subject validity mask
+    valid_exp = valid_mask[:, :, cp.newaxis, cp.newaxis]
+    # Three-level filtering masks for ALL task pairs
+    m0 = (neg1_exp & neg2_exp) & valid_exp  # Negligible-negligible
+    m1 = ((neg1_exp & amp2_exp) | (neg2_exp & amp1_exp)) & valid_exp  # Amplitude conflicts
+    m2 = ~(m0 | m1) & valid_exp  # Sign reversal cases
+    # Apply negligible-negligible rule
+    overlap_batch = cp.where(m0, 1.0, overlap_batch)
+    # Apply sign reversal filtering for m2 cases (if any exist)
+    if cp.any(m2):
+        # For mega-batch, we'll use a simplified linear ramp for performance
+        # (Full probability calculation would be too expensive for this scale)
+        # Get phase indices for m2 cases
+        t_idx, s_idx, i_idx, j_idx = cp.where(m2)
+        if len(t_idx) > 0:
+            # Mean-difference penalty (vectorized)
+            mean_diff = cp.abs(means1_torque[t_idx, s_idx, i_idx] -
+                              means2_torque[t_idx, s_idx, j_idx])
+            # Linear ramp penalty (simplified for mega-batch performance)
+            s_thresh, e_thresh = 0.2, 0.5
+            penalty = cp.clip((mean_diff - s_thresh) / (e_thresh - s_thresh), 0.0, 1.0)
+            # Apply penalty to overlaps
+            current_overlaps = overlap_batch[t_idx, s_idx, i_idx, j_idx]
+            output_diff = 1.0 - current_overlaps
+            scaled_output_diff = output_diff * penalty
+            overlap_batch[t_idx, s_idx, i_idx, j_idx] = 1.0 - scaled_output_diff
+    return overlap_batch
+def estimate_mega_batch_memory(n_task_pairs, n_subjects_max, n_features):
+    """
+    Estimate GPU memory requirements for mega-batch processing.
+    CRITICAL: This accounts for the 5D broadcasting that happens during GPU computation:
+    - Input: (n_task_pairs, n_subjects_max, 150, n_features)
+    - Broadcast to: (n_task_pairs, n_subjects_max, 150, 150, n_features) for computation
+    - The 150x150 expansion is the killer for large feature counts!
+    """
+    # Input arrays (pre-broadcasting)
+    input_size = 4 * n_task_pairs * n_subjects_max * 150 * n_features * 4  # 4 input arrays
+    # Output array
+    output_size = n_task_pairs * n_subjects_max * 150 * 150 * 4
+    # CRITICAL: 5D broadcasting intermediate tensors during computation
+    # These are the real memory hogs: (n_task_pairs, n_subjects_max, 150, 150, n_features)
+    broadcast_5d_size = n_task_pairs * n_subjects_max * 150 * 150 * n_features * 4
+    # We need multiple of these simultaneously (diff, var_sum, quad_terms, etc.)
+    intermediate_5d_size = broadcast_5d_size * 4  # Conservative estimate: 4 large 5D tensors
+    total_bytes = input_size + output_size + intermediate_5d_size
+    total_gb = total_bytes / (1024**3)
+    return total_gb
+def get_available_gpu_memory_gb():
+    """Get available GPU memory in GB."""
+    if not GPU_AVAILABLE:
+        return 0.0
+    try:
+        # Get GPU memory info directly from CuPy device
+        device = cp.cuda.Device()
+        total_mem = device.mem_info[1]  # Total memory
+        used_mem = device.mem_info[1] - device.mem_info[0]  # Used = Total - Free
+        # Use 70% of free memory as safety margin
+        free_mem = device.mem_info[0] * 0.7
+        available_gb = free_mem / (1024**3)
+        return max(0.5, available_gb)  # Ensure at least 0.5GB for minimal chunking
+    except:
+        # Fallback: assume 5GB available for RTX series
+        return 5.0
+def calculate_optimal_chunk_size(total_pairs, n_subjects_max, n_features, target_memory_gb=None):
+    """Calculate optimal chunk size based on available GPU memory."""
+    if not GPU_AVAILABLE:
+        return 1
+    if target_memory_gb is None:
+        target_memory_gb = get_available_gpu_memory_gb()
+    # Binary search for optimal chunk size
+    min_chunk = 1
+    max_chunk = total_pairs
+    optimal_chunk = 1
+    while min_chunk <= max_chunk:
+        mid_chunk = (min_chunk + max_chunk) // 2
+        memory_needed = estimate_mega_batch_memory(mid_chunk, n_subjects_max, n_features)
+        if memory_needed <= target_memory_gb:
+            optimal_chunk = mid_chunk
+            min_chunk = mid_chunk + 1
+        else:
+            max_chunk = mid_chunk - 1
+    # Ensure at least 1 task pair per chunk
+    return max(1, optimal_chunk)
+def get_available_ram_gb():
+    """Get available system RAM in GB."""
+    try:
+        import psutil
+        available_ram_gb = psutil.virtual_memory().available / (1024**3)
+        return available_ram_gb
+    except ImportError:
+        # Fallback: assume 16GB available (conservative)
+        return 16.0
+def calculate_ram_max_chunk_size(n_subjects_max, n_features, available_ram_gb):
+    """Calculate maximum chunk size based on available RAM for numpy arrays."""
+    # Each chunk needs 4 arrays: all_means1, all_vars1, all_means2, all_vars2
+    # Shape per array: (chunk_size, n_subjects_max, 150, n_features)
+    # Each element: 4 bytes (float32)
+    bytes_per_task_pair = 4 * n_subjects_max * 150 * n_features * 4  # 4 arrays × 4 bytes
+    # Use 70% of available RAM as safety margin
+    safe_ram_bytes = available_ram_gb * 0.7 * (1024**3)
+    max_chunk_size = int(safe_ram_bytes / bytes_per_task_pair)
+    return max(1, max_chunk_size)
+def calculate_optimal_chunk_size_dual_constraint(total_pairs, n_subjects_max, n_features):
+    """
+    Calculate optimal chunk size considering BOTH GPU memory and system RAM constraints.
+    This prevents out-of-memory errors by respecting both:
+    1. GPU memory limits (for CuPy processing)
+    2. System RAM limits (for numpy array allocation)
+    CRITICAL: For very large feature counts (>100), the 5D broadcasting becomes
+    prohibitively expensive, so we use much more conservative estimates.
+    Returns the minimum chunk size that satisfies both constraints.
+    """
+    if not GPU_AVAILABLE:
+        return 1
+    # Get available memory for both constraints
+    gpu_memory_gb = get_available_gpu_memory_gb()
+    ram_memory_gb = get_available_ram_gb()
+    # CRITICAL: For large feature counts, the 5D broadcasting dominates memory usage
+    # We need to be much more conservative
+    if n_features > 100:
+        print(f"⚠️  Large feature count ({n_features}) detected - using conservative chunking")
+        # For large features, memory usage scales roughly with features^2 due to broadcasting
+        # Use a much smaller base and scale down aggressively
+        feature_penalty = (n_features / 100) ** 1.5  # Exponential penalty
+        conservative_gpu_memory = gpu_memory_gb / feature_penalty
+        conservative_ram_memory = ram_memory_gb / (feature_penalty * 0.5)  # RAM less affected
+        gpu_max_chunk = calculate_optimal_chunk_size(total_pairs, n_subjects_max, n_features, conservative_gpu_memory)
+        ram_max_chunk = calculate_ram_max_chunk_size(n_subjects_max, n_features, conservative_ram_memory)
+    else:
+        # Normal calculation for reasonable feature counts
+        gpu_max_chunk = calculate_optimal_chunk_size(total_pairs, n_subjects_max, n_features, gpu_memory_gb)
+        ram_max_chunk = calculate_ram_max_chunk_size(n_subjects_max, n_features, ram_memory_gb)
+    # Use the most restrictive constraint
+    optimal_chunk = min(gpu_max_chunk, ram_max_chunk, total_pairs)
+    print(f"🔧 Dual-constraint analysis:")
+    print(f"   GPU memory: {gpu_memory_gb:.2f} GB → max {gpu_max_chunk} pairs")
+    print(f"   RAM memory: {ram_memory_gb:.2f} GB → max {ram_max_chunk} pairs")
+    print(f"   Using most restrictive: {optimal_chunk} pairs per chunk")
+    # For very large feature counts, ensure we don't go too high
+    if n_features > 100:
+        # Cap at a reasonable maximum for large feature counts
+        max_safe_chunk = max(1, int(50000 / n_features))  # Rough heuristic
+        optimal_chunk = min(optimal_chunk, max_safe_chunk)
+        if optimal_chunk == max_safe_chunk:
+            print(f"   🔒 Capped at {optimal_chunk} pairs due to large feature count")
+    return max(1, optimal_chunk)
+def compute_overlap_batch_gpu_mega_chunked(all_means1_batch, all_vars1_batch, all_means2_batch, all_vars2_batch,
+                                          valid_mask, tol=1e-12, biomechanical_filter=False, progress_callback=None):
+    """
+    Chunked mega-batch GPU computation: Process task pairs in optimal chunks.
+    Automatically determines chunk size based on available GPU memory and processes
+    task pairs in chunks while maintaining all subjects per chunk for maximum efficiency.
+    Parameters:
+        all_means1_batch: np.ndarray shape (n_task_pairs, n_subjects_max, 150, n_features)
+        all_vars1_batch: np.ndarray shape (n_task_pairs, n_subjects_max, 150, n_features)
+        all_means2_batch: np.ndarray shape (n_task_pairs, n_subjects_max, 150, n_features)
+        all_vars2_batch: np.ndarray shape (n_task_pairs, n_subjects_max, 150, n_features)
+        valid_mask: np.ndarray shape (n_task_pairs, n_subjects_max) - bool mask for valid subjects
+        tol: float, tolerance for variance validity
+        biomechanical_filter: bool, apply biomechanical filtering
+        progress_callback: callable, progress reporting function
+    Returns:
+        np.ndarray shape (n_task_pairs, n_subjects_max, 150, 150) - overlap values
+    """
+    if not GPU_AVAILABLE:
+        raise RuntimeError("CuPy not available for chunked mega-batch GPU computation")
+    n_task_pairs, n_subjects_max, n_phases, n_features = all_means1_batch.shape
+    # Calculate optimal chunk size using dual constraints (GPU + RAM)
+    chunk_size = calculate_optimal_chunk_size_dual_constraint(n_task_pairs, n_subjects_max, n_features)
+    print(f"🔧 Chunking Strategy:")
+    print(f"   Total task pairs: {n_task_pairs:,}")
+    print(f"   Optimal chunk size: {chunk_size:,} task pairs")
+    print(f"   Number of chunks: {(n_task_pairs + chunk_size - 1) // chunk_size}")
+    # Try single batch first, but catch out-of-memory errors
+    if chunk_size >= n_task_pairs:
+        print("🚀 Attempting single mega-batch processing...")
+        try:
+            return compute_overlap_batch_gpu_mega(
+                all_means1_batch, all_vars1_batch, all_means2_batch, all_vars2_batch,
+                valid_mask, tol, biomechanical_filter
+            )
+        except Exception as e:
+            if "OutOfMemoryError" in str(type(e)) or "out of memory" in str(e).lower():
+                print(f"⚠️  Single batch failed with memory error, forcing chunking...")
+                # Recalculate with much more conservative memory estimate
+                conservative_memory = min(available_memory * 0.3, 3.0)  # Use max 3GB or 30% of available
+                chunk_size = calculate_optimal_chunk_size(n_task_pairs, n_subjects_max, n_features, conservative_memory)
+                chunk_size = max(1, chunk_size // 2)  # Further reduce chunk size
+                print(f"🔧 Fallback chunk size: {chunk_size} pairs (conservative estimate)")
+            else:
+                raise e
+    # Process in chunks
+    print(f"🔄 Processing {n_task_pairs:,} task pairs in chunks of {chunk_size:,}...")
+    results = []
+    for chunk_start in range(0, n_task_pairs, chunk_size):
+        chunk_end = min(chunk_start + chunk_size, n_task_pairs)
+        chunk_num = len(results) + 1
+        total_chunks = (n_task_pairs + chunk_size - 1) // chunk_size
+        print(f"🚀 Processing chunk {chunk_num}/{total_chunks} (task pairs {chunk_start}:{chunk_end})...")
+        # Extract chunk data
+        chunk_means1 = all_means1_batch[chunk_start:chunk_end]
+        chunk_vars1 = all_vars1_batch[chunk_start:chunk_end]
+        chunk_means2 = all_means2_batch[chunk_start:chunk_end]
+        chunk_vars2 = all_vars2_batch[chunk_start:chunk_end]
+        chunk_valid = valid_mask[chunk_start:chunk_end]
+        # Process chunk with additional error handling
+        import time
+        start_time = time.time()
+        try:
+            chunk_result = compute_overlap_batch_gpu_mega(
+                chunk_means1, chunk_vars1, chunk_means2, chunk_vars2,
+                chunk_valid, tol, biomechanical_filter
+            )
+            chunk_time = time.time() - start_time
+        except Exception as e:
+            if "OutOfMemoryError" in str(type(e)) or "out of memory" in str(e).lower():
+                print(f"   ⚠️  Chunk {chunk_num} still too large, attempting progressive reduction...")
+                # Progressive reduction: try smaller and smaller chunks
+                chunk_result = _process_chunk_with_progressive_reduction(
+                    chunk_means1, chunk_vars1, chunk_means2, chunk_vars2,
+                    chunk_valid, tol, biomechanical_filter, chunk_num
+                )
+                chunk_time = time.time() - start_time
+            else:
+                raise e
+        results.append(chunk_result)
+        # Progress reporting
+        progress = (chunk_end) / n_task_pairs
+        if progress_callback:
+            progress_callback(progress * 0.9)  # Save 10% for final aggregation
+        # Performance metrics
+        chunk_pairs = chunk_end - chunk_start
+        valid_computations = np.sum(chunk_valid) * 150 * 150
+        throughput = valid_computations / chunk_time if chunk_time > 0 else 0
+        print(f"   ✅ Chunk {chunk_num} complete: {chunk_time:.2f}s, {throughput:,.0f} computations/sec")
+        # Memory cleanup
+        if GPU_AVAILABLE:
+            cp.get_default_memory_pool().free_all_blocks()
+    print("🔧 Combining chunk results...")
+    final_result = np.concatenate(results, axis=0)
+    if progress_callback:
+        progress_callback(1.0)
+    print(f"✅ Chunked mega-batch processing complete!")
+    print(f"📊 Final result shape: {final_result.shape}")
+    return final_result
+def _process_chunk_with_progressive_reduction(chunk_means1, chunk_vars1, chunk_means2, chunk_vars2,
+                                            chunk_valid, tol, biomechanical_filter, chunk_num):
+    """
+    Process a chunk with progressive size reduction if out-of-memory errors occur.
+    Tries progressively smaller sub-chunks until successful or reaches minimum size.
+    """
+    chunk_size = chunk_means1.shape[0]
+    # Try progressively smaller sub-chunks: 50%, 25%, 12.5%, etc.
+    reduction_factors = [0.5, 0.25, 0.125, 0.0625]  # Down to 1/16th
+    for factor in reduction_factors:
+        sub_chunk_size = max(1, int(chunk_size * factor))
+        print(f"      🔄 Trying sub-chunk size: {sub_chunk_size} pairs ({factor*100:.1f}% of original)")
+        try:
+            # Process the chunk in sub-chunks
+            sub_results = []
+            for start_idx in range(0, chunk_size, sub_chunk_size):
+                end_idx = min(start_idx + sub_chunk_size, chunk_size)
+                sub_result = compute_overlap_batch_gpu_mega(
+                    chunk_means1[start_idx:end_idx],
+                    chunk_vars1[start_idx:end_idx],
+                    chunk_means2[start_idx:end_idx],
+                    chunk_vars2[start_idx:end_idx],
+                    chunk_valid[start_idx:end_idx],
+                    tol, biomechanical_filter
+                )
+                sub_results.append(sub_result)
+                # Clear GPU memory between sub-chunks
+                if GPU_AVAILABLE:
+                    cp.get_default_memory_pool().free_all_blocks()
+            # Combine all sub-results
+            final_result = np.concatenate(sub_results, axis=0)
+            print(f"      ✅ Progressive reduction successful with {sub_chunk_size}-pair sub-chunks")
+            return final_result
+        except Exception as e:
+            if "OutOfMemoryError" in str(type(e)) or "out of memory" in str(e).lower():
+                print(f"      ❌ Sub-chunk size {sub_chunk_size} still too large")
+                continue
+            else:
+                raise e
+    # If all reduction attempts failed, we need to fall back to sequential processing
+    # Processing one pair at a time with GPU overhead is actually slower than CPU
+    print(f"      ❌ All reduction attempts failed - chunk too large for GPU mega-batch")
+    print(f"      💡 Recommendation: Use smaller time windows or switch to sequential processing")
+    print(f"      🔄 Falling back to CPU-based processing for this chunk...")
+    # Fall back to CPU processing for this chunk
+    try:
+        from .numba_overlap import compute_overlap_batch_numba_ultra_fast
+        # Process on CPU using Numba (much faster than single GPU pairs)
+        cpu_results = []
+        for i in range(chunk_size):
+            means1_i = chunk_means1[i]  # Shape: (n_subjects, 150, n_features)
+            vars1_i = chunk_vars1[i]
+            means2_i = chunk_means2[i]
+            vars2_i = chunk_vars2[i]
+            valid_i = chunk_valid[i]  # Shape: (n_subjects,)
+            # Process valid subjects only
+            valid_indices = np.where(valid_i)[0]
+            if len(valid_indices) > 0:
+                cpu_result = compute_overlap_batch_numba_ultra_fast(
+                    means1_i[valid_indices], vars1_i[valid_indices],
+                    means2_i[valid_indices], vars2_i[valid_indices]
+                )
+                # Reshape to expected format
+                full_result = np.zeros((1, chunk_valid.shape[1], 150, 150), dtype=np.float32)
+                full_result[0, valid_indices] = cpu_result
+                cpu_results.append(full_result)
+            else:
+                # No valid subjects
+                empty_result = np.zeros((1, chunk_valid.shape[1], 150, 150), dtype=np.float32)
+                cpu_results.append(empty_result)
+        final_result = np.concatenate(cpu_results, axis=0)
+        print(f"      ✅ CPU fallback processing completed")
+        return final_result
+    except ImportError:
+        print(f"      ❌ CPU fallback not available - creating zero results")
+        # Last resort: return zeros
+        final_result = np.zeros((chunk_size, chunk_valid.shape[1], 150, 150), dtype=np.float32)
+        return final_result
+if __name__ == "__main__":
+    print("🧪 Testing GPU overlap calculation...")
+    if GPU_AVAILABLE:
+        benchmark_gpu_vs_cpu()
+        # Test mega-batch functionality
+        print("\n🚀 Testing mega-batch functionality...")
+        # Create test data for multiple task pairs
+        n_task_pairs = 5
+        n_subjects_max = 3
+        n_features = 4
+        all_means1 = np.random.randn(n_task_pairs, n_subjects_max, 150, n_features).astype(np.float32)
+        all_vars1 = np.abs(np.random.randn(n_task_pairs, n_subjects_max, 150, n_features)).astype(np.float32) + 0.1
+        all_means2 = np.random.randn(n_task_pairs, n_subjects_max, 150, n_features).astype(np.float32)
+        all_vars2 = np.abs(np.random.randn(n_task_pairs, n_subjects_max, 150, n_features)).astype(np.float32) + 0.1
+        valid_mask = np.ones((n_task_pairs, n_subjects_max), dtype=bool)
+        import time
+        start = time.time()
+        result = compute_overlap_batch_gpu_mega(all_means1, all_vars1, all_means2, all_vars2, valid_mask)
+        end = time.time()
+        print(f"✅ Mega-batch result shape: {result.shape}")
+        print(f"⏱️ Mega-batch time: {end - start:.4f}s")
+        print(f"📊 Throughput: {n_task_pairs * n_subjects_max * 150 * 150 / (end - start):,.0f} computations/sec")
+    else:
+        print("❌ GPU testing requires CuPy and CUDA")

multivariate_gaussian_overlap.py CHANGED Viewed

The diff for this file is too large to render. See raw diff

numba_overlap.py ADDED Viewed

	@@ -0,0 +1,443 @@

+#!/usr/bin/env python3
+"""
+Ultra-fast Numba JIT-compiled implementation of multivariate Gaussian overlap calculation.
+This eliminates all Python overhead and runs at near-C speed.
+"""
+import numpy as np
+try:
+    import numba
+    from numba import jit, prange
+    NUMBA_AVAILABLE = True
+except ImportError:
+    NUMBA_AVAILABLE = False
+    print("Warning: Numba not installed. Install with: pip install numba")
+if NUMBA_AVAILABLE:
+    @jit(nopython=True, parallel=True, cache=True, fastmath=True)
+    def compute_overlap_batch_numba(means1_batch, vars1_batch, means2_batch, vars2_batch, tol=1e-12):
+        """
+        ULTIMATE PERFORMANCE: "It's just differences, divisions, and exponentials!"
+        Eliminates ALL overhead and just does the core mathematical operations:
+        overlap = exp(-0.5 * sum((μ1 - μ2)² / (σ1² + σ2²)))
+        Parameters:
+            means1_batch: (n_subjects, 150, n_features) array of means for task 1
+            vars1_batch: (n_subjects, 150, n_features) array of variances for task 1
+            means2_batch: (n_subjects, 150, n_features) array of means for task 2
+            vars2_batch: (n_subjects, 150, n_features) array of variances for task 2
+            tol: Tolerance for variance validity
+        Returns:
+            overlap_batch: (n_subjects, 150, 150) array of overlap values
+        """
+        n_subjects, n_phases, n_features = means1_batch.shape
+        # Pre-allocate output
+        overlap_batch = np.zeros((n_subjects, 150, 150), dtype=np.float64)
+        # CRITICAL OPTIMIZATION: Pre-compute ALL validation outside the main loops
+        # This eliminates millions of redundant NaN checks
+        valid_phases1 = np.zeros((n_subjects, 150), dtype=numba.boolean)
+        valid_phases2 = np.zeros((n_subjects, 150), dtype=numba.boolean)
+        # Pre-compute phase validity for all subjects at once
+        for s in prange(n_subjects):
+            for i in range(150):
+                # Check phase validity once per phase
+                valid1 = True
+                valid2 = True
+                for f in range(n_features):
+                    if np.isnan(means1_batch[s, i, f]) or np.isnan(vars1_batch[s, i, f]):
+                        valid1 = False
+                    if np.isnan(means2_batch[s, i, f]) or np.isnan(vars2_batch[s, i, f]):
+                        valid2 = False
+                valid_phases1[s, i] = valid1
+                valid_phases2[s, i] = valid2
+        # MAIN COMPUTATION: Process only valid phase pairs
+        for s in prange(n_subjects):
+            for i in range(150):
+                if not valid_phases1[s, i]:
+                    continue
+                # Extract data for phase i once (avoid repeated indexing)
+                means1_i = means1_batch[s, i]
+                vars1_i = vars1_batch[s, i]
+                for j in range(150):
+                    if not valid_phases2[s, j]:
+                        continue
+                    # Extract data for phase j once
+                    means2_j = means2_batch[s, j]
+                    vars2_j = vars2_batch[s, j]
+                    # VECTORIZED CORE COMPUTATION - "It's just math!"
+                    # Calculate: sum((μ1 - μ2)² / (σ1² + σ2²))
+                    # Step 1: Vector operations (no loops!)
+                    diff = means1_i - means2_j        # Vector subtraction
+                    var_sum = vars1_i + vars2_j       # Vector addition
+                    # Step 2: Check variance validity (vectorized)
+                    valid_variances = True
+                    for f in range(n_features):
+                        if var_sum[f] <= tol:
+                            valid_variances = False
+                            break
+                    if valid_variances:
+                        # Step 3: Quadratic form (vectorized)
+                        quad_terms = diff * diff / var_sum  # Element-wise operations
+                        quad_sum = 0.0
+                        for f in range(n_features):         # Fast accumulation
+                            quad_sum += quad_terms[f]
+                        # Step 4: Exponential with underflow protection
+                        half_quad = 0.5 * quad_sum
+                        if half_quad <= 20.0:
+                            overlap_batch[s, i, j] = np.exp(-half_quad)
+        return overlap_batch
+    @jit(nopython=True, parallel=True, cache=True, fastmath=True)
+    def compute_overlap_batch_numba_ultra_fast(means1_batch, vars1_batch, means2_batch, vars2_batch):
+        """
+        ULTRA-FAST MODE: Skip ALL validation for clean data.
+        This is the absolute fastest possible implementation - just pure math!
+        Use ONLY when you're certain the data has no NaN values.
+        Returns overlap = exp(-0.5 * sum((μ1 - μ2)² / (σ1² + σ2²)))
+        """
+        n_subjects, n_phases, n_features = means1_batch.shape
+        overlap_batch = np.zeros((n_subjects, 150, 150), dtype=np.float64)
+        for s in prange(n_subjects):
+            for i in range(150):
+                means1_i = means1_batch[s, i]
+                vars1_i = vars1_batch[s, i]
+                for j in range(150):
+                    means2_j = means2_batch[s, j]
+                    vars2_j = vars2_batch[s, j]
+                    # Pure mathematical computation - no checks, no validation
+                    diff = means1_i - means2_j
+                    var_sum = vars1_i + vars2_j
+                    quad_terms = diff * diff / var_sum
+                    quad_sum = 0.0
+                    for f in range(n_features):
+                        quad_sum += quad_terms[f]
+                    overlap_batch[s, i, j] = np.exp(-0.5 * quad_sum)
+        return overlap_batch
+    @jit(nopython=True, parallel=True, cache=True, fastmath=True)
+    def compute_overlap_batch_numba_vectorized(means1_batch, vars1_batch, means2_batch, vars2_batch):
+        """
+        VECTORIZED MODE: Enhanced Numba with better vectorization.
+        Processes entire rows at once to minimize inner loops and maximize cache efficiency.
+        This is the enhanced version that "throws more in" while staying on CPU.
+        """
+        n_subjects, n_phases, n_features = means1_batch.shape
+        overlap_batch = np.zeros((n_subjects, 150, 150), dtype=np.float64)
+        for s in prange(n_subjects):
+            # Process entire row at once for better vectorization
+            for i in range(150):
+                means1_i = means1_batch[s, i]  # Shape: (n_features,)
+                vars1_i = vars1_batch[s, i]
+                # OPTIMIZATION: Vectorize the inner j loop by processing all j at once
+                # Create arrays for all phase_j comparisons
+                for j in range(150):
+                    means2_j = means2_batch[s, j]
+                    vars2_j = vars2_batch[s, j]
+                    # Vectorized operations over features
+                    diff = means1_i - means2_j
+                    var_sum = vars1_i + vars2_j
+                    quad_terms = diff * diff / var_sum
+                    # Fast sum over features
+                    quad_sum = 0.0
+                    for f in range(n_features):
+                        quad_sum += quad_terms[f]
+                    overlap_batch[s, i, j] = np.exp(-0.5 * quad_sum)
+        return overlap_batch
+    @jit(nopython=True, parallel=True, cache=True, fastmath=True)
+    def compute_overlap_batch_numba_row_vectorized(means1_batch, vars1_batch, means2_batch, vars2_batch):
+        """
+        ROW-VECTORIZED MODE: Process entire rows of phase pairs at once.
+        This minimizes the innermost loops by computing all j phases for each i.
+        Better cache utilization and more vectorization opportunities.
+        """
+        n_subjects, n_phases, n_features = means1_batch.shape
+        overlap_batch = np.zeros((n_subjects, 150, 150), dtype=np.float64)
+        for s in prange(n_subjects):
+            for i in range(150):
+                means1_i = means1_batch[s, i]  # Current phase means (n_features,)
+                vars1_i = vars1_batch[s, i]    # Current phase variances
+                # Process all j phases for this i in one go
+                means2_all = means2_batch[s]   # All phase means (150, n_features)
+                vars2_all = vars2_batch[s]     # All phase variances
+                # Compute differences and sums for all j at once
+                for j in range(150):
+                    # Fast vectorized computation over features
+                    quad_sum = 0.0
+                    for f in range(n_features):
+                        diff_f = means1_i[f] - means2_all[j, f]
+                        var_sum_f = vars1_i[f] + vars2_all[j, f]
+                        quad_sum += diff_f * diff_f / var_sum_f
+                    overlap_batch[s, i, j] = np.exp(-0.5 * quad_sum)
+        return overlap_batch
+    @jit(nopython=True, cache=True)
+    def apply_biomechanical_filter_numba(overlap_batch, means1_batch, vars1_batch,
+                                         means2_batch, vars2_batch, tol=1e-12):
+        """
+        Apply biomechanical filtering in-place using Numba.
+        This modifies the overlap_batch array directly for maximum efficiency.
+        """
+        n_subjects = overlap_batch.shape[0]
+        negligible_threshold = 0.1
+        ampable_threshold = 0.2
+        ci_factor = 1.96
+        for s in range(n_subjects):
+            # Only process first feature (torque) for biomechanical filtering
+            for i in range(150):
+                mean1 = means1_batch[s, i, 0]
+                var1 = vars1_batch[s, i, 0]
+                if np.isnan(mean1) or np.isnan(var1):
+                    continue
+                std1 = np.sqrt(var1)
+                ci_lo1 = mean1 - ci_factor * std1
+                ci_hi1 = mean1 + ci_factor * std1
+                negligible1 = (ci_lo1 >= -negligible_threshold) and (ci_hi1 <= negligible_threshold)
+                ampable1 = np.abs(mean1) > ampable_threshold
+                for j in range(150):
+                    mean2 = means2_batch[s, j, 0]
+                    var2 = vars2_batch[s, j, 0]
+                    if np.isnan(mean2) or np.isnan(var2):
+                        continue
+                    std2 = np.sqrt(var2)
+                    ci_lo2 = mean2 - ci_factor * std2
+                    ci_hi2 = mean2 + ci_factor * std2
+                    negligible2 = (ci_lo2 >= -negligible_threshold) and (ci_hi2 <= negligible_threshold)
+                    ampable2 = np.abs(mean2) > ampable_threshold
+                    # Three-level filtering
+                    if negligible1 and negligible2:
+                        # Both negligible - set to 1
+                        overlap_batch[s, i, j] = 1.0
+                    elif (negligible1 and ampable2) or (negligible2 and ampable1):
+                        # Amplitude conflict - keep original
+                        pass
+                    else:
+                        # Sign reversal case - apply probability-based filtering
+                        std1_safe = max(std1, tol)
+                        std2_safe = max(std2, tol)
+                        # Normal CDF approximation (simplified for Numba)
+                        # Using a simple approximation since scipy.stats.norm is not available in nopython mode
+                        z1 = mean1 / std1_safe
+                        z2 = mean2 / std2_safe
+                        # Simple normal CDF approximation
+                        # This is less accurate but much faster and Numba-compatible
+                        def norm_cdf_approx(x):
+                            # Approximation of normal CDF
+                            t = 1.0 / (1.0 + 0.2316419 * np.abs(x))
+                            d = 0.3989423 * np.exp(-x * x / 2.0)
+                            prob = d * t * (0.3193815 + t * (-0.3565638 + t * (1.781478 + t * (-1.821256 + t * 1.330274))))
+                            if x > 0:
+                                return 1.0 - prob
+                            else:
+                                return prob
+                        Ppos1 = norm_cdf_approx(z1)
+                        Ppos2 = norm_cdf_approx(z2)
+                        # Sign-mismatch probability
+                        Pdiff_sign = Ppos1 * (1.0 - Ppos2) + (1.0 - Ppos1) * Ppos2
+                        # Mean-difference penalty
+                        mean_diff = np.abs(mean1 - mean2)
+                        s_thresh = 0.2
+                        e_thresh = 0.5
+                        if mean_diff <= s_thresh:
+                            penalty = 0.0
+                        elif mean_diff >= e_thresh:
+                            penalty = 1.0
+                        else:
+                            # Linear ramp (simplified from sigmoid)
+                            penalty = (mean_diff - s_thresh) / (e_thresh - s_thresh)
+                        # Apply combined penalty
+                        Pdiff = max(Pdiff_sign, penalty)
+                        output_diff = 1.0 - overlap_batch[s, i, j]
+                        overlap_batch[s, i, j] = 1.0 - output_diff * Pdiff
+        return overlap_batch
+def compute_overlap_batch_fallback(means1_batch, vars1_batch, means2_batch, vars2_batch, tol=1e-12):
+    """
+    Fallback implementation when Numba is not available.
+    This is a simple, clean NumPy implementation without excessive overhead.
+    """
+    n_subjects, n_phases, n_features = means1_batch.shape
+    overlap_batch = np.zeros((n_subjects, 150, 150), dtype=np.float64)
+    for s in range(n_subjects):
+        # Pre-compute NaN masks for this subject
+        has_nan1 = np.any(np.isnan(means1_batch[s]) | np.isnan(vars1_batch[s]), axis=1)
+        has_nan2 = np.any(np.isnan(means2_batch[s]) | np.isnan(vars2_batch[s]), axis=1)
+        for i in range(150):
+            if has_nan1[i]:
+                continue
+            for j in range(150):
+                if has_nan2[j]:
+                    continue
+                # Direct computation
+                diff = means1_batch[s, i] - means2_batch[s, j]
+                sum_var = vars1_batch[s, i] + vars2_batch[s, j]
+                # Check validity
+                if np.all(sum_var > tol):
+                    quad_sum = np.sum(diff**2 / sum_var)
+                    half_quad = 0.5 * quad_sum
+                    if half_quad <= 20.0:
+                        overlap_batch[s, i, j] = np.exp(-half_quad)
+    return overlap_batch
+# Main interface function
+def compute_overlap_batch(means1_batch, vars1_batch, means2_batch, vars2_batch,
+                         tol=1e-12, biomechanical_filter=False, ultra_fast=True,
+                         vectorized_mode='auto'):
+    """
+    Main interface for computing batch overlap with multiple vectorization modes.
+    Parameters:
+        ultra_fast: bool - Default True for maximum speed
+        vectorized_mode: str - 'auto', 'ultra_fast', 'vectorized', 'row_vectorized'
+    """
+    if NUMBA_AVAILABLE:
+        # Select best vectorization strategy
+        if vectorized_mode == 'auto':
+            # Auto-select based on data size
+            n_subjects, _, n_features = means1_batch.shape
+            if n_features >= 10 or n_subjects >= 15:
+                mode = 'row_vectorized'  # Best for larger feature sets
+            elif n_features >= 4:
+                mode = 'vectorized'      # Good for medium feature sets
+            else:
+                mode = 'ultra_fast'      # Simple and fast for small feature sets
+        else:
+            mode = vectorized_mode
+        try:
+            if mode == 'row_vectorized':
+                overlap_batch = compute_overlap_batch_numba_row_vectorized(
+                    means1_batch, vars1_batch, means2_batch, vars2_batch)
+            elif mode == 'vectorized':
+                overlap_batch = compute_overlap_batch_numba_vectorized(
+                    means1_batch, vars1_batch, means2_batch, vars2_batch)
+            else:  # ultra_fast
+                overlap_batch = compute_overlap_batch_numba_ultra_fast(
+                    means1_batch, vars1_batch, means2_batch, vars2_batch)
+        except:
+            # Fallback to validated version if any optimized version fails
+            overlap_batch = compute_overlap_batch_numba(means1_batch, vars1_batch,
+                                                        means2_batch, vars2_batch, tol)
+        if biomechanical_filter:
+            overlap_batch = apply_biomechanical_filter_numba(overlap_batch, means1_batch, vars1_batch,
+                                                            means2_batch, vars2_batch, tol)
+    else:
+        overlap_batch = compute_overlap_batch_fallback(means1_batch, vars1_batch,
+                                                      means2_batch, vars2_batch, tol)
+    # Final clipping
+    np.clip(overlap_batch, 0.0, 1.0, out=overlap_batch)
+    return overlap_batch
+if __name__ == "__main__":
+    # Simple test to verify it works
+    print("Testing Numba overlap calculation...")
+    # Create test data
+    n_subjects = 10
+    n_features = 20  # e.g., 10 time windows × 2 sensors
+    means1 = np.random.randn(n_subjects, 150, n_features)
+    vars1 = np.abs(np.random.randn(n_subjects, 150, n_features)) + 0.1
+    means2 = np.random.randn(n_subjects, 150, n_features)
+    vars2 = np.abs(np.random.randn(n_subjects, 150, n_features)) + 0.1
+    # Time the calculation
+    import time
+    print(f"Numba available: {NUMBA_AVAILABLE}")
+    print(f"Computing overlap for {n_subjects} subjects, {n_features} features...")
+    start = time.time()
+    result = compute_overlap_batch(means1, vars1, means2, vars2)
+    end = time.time()
+    print(f"Result shape: {result.shape}")
+    print(f"Execution time: {end - start:.3f} seconds")
+    print(f"Non-zero elements: {np.count_nonzero(result)}")
+    print(f"Max value: {np.max(result):.4f}")
+    print(f"Min value: {np.min(result):.4f}")
+    if NUMBA_AVAILABLE:
+        print("\n✅ Numba JIT compilation successful!")
+        print("The first run compiles the function, subsequent runs will be much faster.")
+        # Run again to show compiled performance
+        start = time.time()
+        result = compute_overlap_batch(means1, vars1, means2, vars2)
+        end = time.time()
+        print(f"Compiled execution time: {end - start:.3f} seconds")
+    else:
+        print("\n⚠️ Numba not available, using fallback implementation.")
+        print("Install Numba for 10-100x speedup: pip install numba")

pages/02_Tool.py CHANGED Viewed

@@ -7,7 +7,7 @@ import numpy as np
 import matplotlib.pyplot as plt
 import seaborn as sns
 from mpl_toolkits.axes_grid1 import make_axes_locatable
-from multivariate_gaussian_overlap import calculate_similarity_portrait_abstraction
 from plot_similarity import plot_similarity_measure
 from sensor_illustration import LegIllustration
 from shared_styling import set_plot_style, apply_cream_theme_to_figure
@@ -376,19 +376,32 @@ def calculate_overlap_measures(task1, task2, sensors, abstraction_level,
         # Get the phase windows from session state or use default
-        if time_windows is None:
             time_windows = [1]
-        similarity = calculate_similarity_portrait_abstraction(
-            sensors=list(sensors),
-            time_window=time_windows,
-            abstraction_level=abstraction_level.lower(),
-            task1_name=task1_spec,
-            task2_name=task2_spec,
-            output_difference=use_output_data,
-            biomechanical_difference=biomechanical_filtering,
-            progress_callback=lambda x: progress_bar.progress(x)
-        )
         # Clear the progress indicators
         progress_placeholder.empty()
@@ -495,7 +508,7 @@ if st.session_state.authenticated:
                 task2_name_backend = (task2, task2_incline, task2_speed)
             # This will be passed to calculate_overlap_measures, which then passes its .lower() version
-            # to calculate_similarity_portrait_abstraction.
             abstraction_level_backend = analysis_detail
             progress_placeholder = st.empty()
@@ -682,4 +695,4 @@ if st.session_state.authenticated:
              st.markdown("--- Marginal Distributions ---")
              # Placeholder: Add logic to plot marginal distributions if required.
              # This was previously outside the main conditional blocks.
-             st.write("(Marginal distribution plotting not implemented in this layout yet)")

 import matplotlib.pyplot as plt
 import seaborn as sns
 from mpl_toolkits.axes_grid1 import make_axes_locatable
+from multivariate_gaussian_overlap import calculate_portrait
 from plot_similarity import plot_similarity_measure
 from sensor_illustration import LegIllustration
 from shared_styling import set_plot_style, apply_cream_theme_to_figure
         # Get the phase windows from session state or use default
+        if not time_windows:
             time_windows = [1]
+        # Configure inputs for new calculate_portrait API
+        portrait_kwargs = {
+            'task1': task1_spec if any(task1_spec) else None,
+            'task2': task2_spec if any(task2_spec) else None,
+            'time_window': time_windows,
+            'progress_callback': lambda x: progress_bar.progress(x),
+            'method': 'auto'
+        }
+        if use_output_data:
+            portrait_kwargs.update({
+                'sensors': None,
+                'output_sensors': list(sensors),
+                'biomechanical_filter': biomechanical_filtering
+            })
+        else:
+            portrait_kwargs.update({
+                'sensors': list(sensors),
+                'output_sensors': None,
+                'biomechanical_filter': False
+            })
+        similarity = calculate_portrait(**portrait_kwargs)
         # Clear the progress indicators
         progress_placeholder.empty()
                 task2_name_backend = (task2, task2_incline, task2_speed)
             # This will be passed to calculate_overlap_measures, which then passes its .lower() version
+            # to calculate_portrait.
             abstraction_level_backend = analysis_detail
             progress_placeholder = st.empty()
              st.markdown("--- Marginal Distributions ---")
              # Placeholder: Add logic to plot marginal distributions if required.
              # This was previously outside the main conditional blocks.
+             st.write("(Marginal distribution plotting not implemented in this layout yet)")

plot_similarity.py CHANGED Viewed

@@ -17,7 +17,6 @@ def plot_similarity_measure(measure_data: np.ndarray, ax: plt.Axes = None,
                           fontsize: int = 16,
                           y_label_pad: int = 20,
                           cbar_labels: bool = True,
-                          cutoff_treshold:float=None,
                           high_level_plot: bool = False):
     """Plot similarity measure with consistent styling.
@@ -110,36 +109,13 @@ def plot_similarity_measure(measure_data: np.ndarray, ax: plt.Axes = None,
         else:
             cbar_obj.set_ticks([])
-        # Implement cutoff threshold annotation
-        if cutoff_treshold is not None:
-            # The actual cutoff will depend on the plot type
-            if plot_type == 'input':
-                true_cutoff = cutoff_treshold
-            elif plot_type == 'output':
-                true_cutoff = 1 - cutoff_treshold
-            elif plot_type == 'output_biomechanical':
-                true_cutoff = 1 - cutoff_treshold
-            else:  # conflict
-                true_cutoff = cutoff_treshold * (1 - cutoff_treshold)
-            # Count percent of values above threshold
-            if high_level_plot:
-                # Subtract diagonal from thresholded values
-                mask = np.ones_like(measure_data, dtype=bool)
-                np.fill_diagonal(mask, False)
-                total = np.sum(mask)
-                above = np.sum((measure_data > true_cutoff) & mask)
-            else:
-                total = measure_data.size
-                above = np.sum(measure_data > true_cutoff)
-            percent = 100.0 * above / total if total > 0 else 0.0
             # Format as e.g. "12.3% > 0.8"
-            annotation = f"{percent:.1f}% > {true_cutoff:g}"
             # Place annotation to the right of the colorbar
             cbar_obj.ax.text(1.2, 0.5, annotation, va='center', ha='left',
-                             fontsize=fontsize, rotation=90, transform=cbar_obj.ax.transAxes)
     # Set aspect ratio to equal
     ax.set_aspect('equal')

                           fontsize: int = 16,
                           y_label_pad: int = 20,
                           cbar_labels: bool = True,
                           high_level_plot: bool = False):
     """Plot similarity measure with consistent styling.
         else:
             cbar_obj.set_ticks([])
+        if plot_type != 'output':
+            percent = np.mean(measure_data) * 100
             # Format as e.g. "12.3% > 0.8"
+            annotation = r"$\tilde C_{total} = $" + f"{percent:.1f}%"
             # Place annotation to the right of the colorbar
             cbar_obj.ax.text(1.2, 0.5, annotation, va='center', ha='left',
+                                fontsize=fontsize, rotation=90, transform=cbar_obj.ax.transAxes)
     # Set aspect ratio to equal
     ax.set_aspect('equal')

plot_styling.py CHANGED Viewed

@@ -1,13 +1,59 @@
 """
 Unified styling module for both Streamlit UI and matplotlib plots.
 Contains all styling definitions to ensure consistency across the application.
 """
-import streamlit as st
 import matplotlib.pyplot as plt
 import seaborn as sns
 import matplotlib.font_manager as fm
 # ==========================
 # Shared Color Themes
 # ==========================
@@ -46,6 +92,40 @@ LIGHT_COLORS = {
     'spine_color': '#E0E0E0',
 }
 # Dark theme colors - consistent across UI and plots
 DARK_COLORS = {
     'background': '#1E1E1E',
@@ -80,8 +160,12 @@ DARK_COLORS = {
     'spine_color': '#505050',
 }
 def get_current_colors():
     """Get the current color scheme based on session state."""
     if 'dark_theme' not in st.session_state:
         st.session_state.dark_theme = False
@@ -89,12 +173,18 @@ def get_current_colors():
 def toggle_theme():
     """Toggle between light and dark themes."""
     if 'dark_theme' not in st.session_state:
         st.session_state.dark_theme = False
     st.session_state.dark_theme = not st.session_state.dark_theme
 def add_theme_toggle():
     """Add a theme toggle button to the sidebar."""
     with st.sidebar:
         st.markdown("---")
         current_theme = "🌙 Dark" if st.session_state.get('dark_theme', False) else "☀️ Light"
@@ -141,9 +231,18 @@ purple_helix = sns.cubehelix_palette(start=.2, rot=-.4, dark=0, light=0.85,
 my_purple_helix = sns.cubehelix_palette(start=.2, rot=-.1, dark=0, light=0.85,
                                              reverse=True, as_cmap=True)
-def get_plot_style():
-    """Get plot style with current theme colors."""
-    theme_colors = get_current_colors()
     return {
         'font_family': PLOT_STYLE_FONT_FAMILY,
@@ -168,9 +267,13 @@ def get_plot_style():
         'spine_color': theme_colors['spine_color'],
     }
-def set_plot_style():
-    """Set consistent plot styling across all figures"""
-    plot_style = get_plot_style()
     plt.rcParams['font.family'] = plot_style['font_family']
     plt.rcParams['font.size'] = plot_style['font_size']
@@ -233,6 +336,10 @@ def apply_theme_to_figure(fig, ax=None):
     return fig, ax
 # Legacy function name for backward compatibility
 def apply_cream_theme_to_figure(fig, ax=None):
     """Apply current theme to an existing figure and axes (legacy function name)"""
@@ -461,19 +568,27 @@ def get_tool_page_css():
 def apply_base_styling():
     """Apply the base styling to the current Streamlit page."""
     st.markdown(get_base_css(), unsafe_allow_html=True)
 def apply_home_page_styling():
     """Apply styling specific to the home page."""
     st.markdown(get_base_css(), unsafe_allow_html=True)
     st.markdown(get_home_page_css(), unsafe_allow_html=True)
 def apply_documentation_page_styling():
     """Apply styling specific to the documentation page."""
     st.markdown(get_base_css(), unsafe_allow_html=True)
     st.markdown(get_documentation_page_css(), unsafe_allow_html=True)
 def apply_tool_page_styling():
     """Apply styling specific to the analysis tool page."""
     st.markdown(get_base_css(), unsafe_allow_html=True)
     st.markdown(get_tool_page_css(), unsafe_allow_html=True)

 """
 Unified styling module for both Streamlit UI and matplotlib plots.
 Contains all styling definitions to ensure consistency across the application.
+Note: When used outside of Streamlit environment (e.g., in Jupyter notebooks),
+you may see warnings about missing ScriptRunContext or Session state. These
+warnings are harmless and can be safely ignored - the core plotting functions
+(get_plot_style, set_plot_style, PLOT_COLORS) work correctly regardless.
 """
+import warnings
+import logging
 import matplotlib.pyplot as plt
 import seaborn as sns
 import matplotlib.font_manager as fm
+# Suppress Streamlit warnings when running outside streamlit environment
+warnings.filterwarnings('ignore', category=UserWarning, module='streamlit')
+warnings.filterwarnings('ignore', message='.*ScriptRunContext.*')
+warnings.filterwarnings('ignore', message='.*Session state.*')
+warnings.filterwarnings('ignore', message='.*missing ScriptRunContext.*')
+warnings.filterwarnings('ignore', message='.*does not function when running.*')
+warnings.filterwarnings('ignore', module='streamlit.runtime.*')
+warnings.filterwarnings('ignore', module='streamlit.runtime.scriptrunner_utils.*')
+warnings.filterwarnings('ignore', module='streamlit.runtime.state.*')
+# Suppress Streamlit loggers that generate warnings outside streamlit environment
+logging.getLogger('streamlit.runtime.scriptrunner_utils.script_run_context').setLevel(logging.ERROR)
+logging.getLogger('streamlit.runtime.state.session_state_proxy').setLevel(logging.ERROR)
+logging.getLogger('streamlit').setLevel(logging.ERROR)
+try:
+    # Set logging level before importing to suppress initial warnings
+    for logger_name in ['streamlit', 'streamlit.runtime', 'streamlit.runtime.scriptrunner_utils',
+                       'streamlit.runtime.state', 'streamlit.runtime.scriptrunner_utils.script_run_context',
+                       'streamlit.runtime.state.session_state_proxy']:
+        logging.getLogger(logger_name).setLevel(logging.ERROR)
+    import streamlit as st
+    _STREAMLIT_AVAILABLE = True
+except ImportError:
+    _STREAMLIT_AVAILABLE = False
+    # Create a mock streamlit module for non-streamlit environments
+    class MockStreamlit:
+        class session_state:
+            dark_theme = False
+    st = MockStreamlit()
+def _suppress_streamlit_warnings(func):
+    """Decorator to suppress streamlit warnings in functions."""
+    def wrapper(*args, **kwargs):
+        with warnings.catch_warnings():
+            warnings.simplefilter('ignore')
+            return func(*args, **kwargs)
+    return wrapper
 # ==========================
 # Shared Color Themes
 # ==========================
     'spine_color': '#E0E0E0',
 }
+# Paper theme colors - pure white backgrounds for publication
+PAPER_COLORS = {
+    'background': '#FFFFFF',
+    'figure_background': '#FFFFFF',
+    'sidebar_bg_start': '#FAFAFA',
+    'sidebar_bg_end': '#F5F5F5',
+    'border_light': '#F0F0F0',
+    'border_medium': '#E0E0E0',
+    'text_primary': '#2C3E50',
+    'text_secondary': '#5D6D7E',
+    'text_tertiary': '#85929E',
+    'text_light': '#A6ACAF',
+    'button_bg_start': '#5D6D7E',
+    'button_bg_end': '#85929E',
+    'button_hover_start': '#85929E',
+    'button_hover_end': '#A6ACAF',
+    'alert_error_bg': '#FFE6E6',
+    'alert_error_border': '#FFAAAA',
+    'alert_error_text': '#CC0000',
+    'alert_info_bg': '#E6F3FF',
+    'alert_info_border': '#99D6FF',
+    'alert_info_text': '#0066CC',
+    'warning_bg': '#FFF8E1',
+    'warning_border': '#FF9800',
+    'success_bg': '#E8F5E8',
+    'success_border': '#4CAF50',
+    'generate_button_bg': '#228B22',
+    'generate_button_hover': '#32CD32',
+    # Plot-specific colors - pure white for papers
+    'axes_background': '#FFFFFF',
+    'grid_color': '#F0F0F0',
+    'spine_color': '#E0E0E0',
+}
 # Dark theme colors - consistent across UI and plots
 DARK_COLORS = {
     'background': '#1E1E1E',
     'spine_color': '#505050',
 }
+@_suppress_streamlit_warnings
 def get_current_colors():
     """Get the current color scheme based on session state."""
+    if not _STREAMLIT_AVAILABLE:
+        return LIGHT_COLORS
     if 'dark_theme' not in st.session_state:
         st.session_state.dark_theme = False
 def toggle_theme():
     """Toggle between light and dark themes."""
+    if not _STREAMLIT_AVAILABLE:
+        return
     if 'dark_theme' not in st.session_state:
         st.session_state.dark_theme = False
     st.session_state.dark_theme = not st.session_state.dark_theme
 def add_theme_toggle():
     """Add a theme toggle button to the sidebar."""
+    if not _STREAMLIT_AVAILABLE:
+        return
     with st.sidebar:
         st.markdown("---")
         current_theme = "🌙 Dark" if st.session_state.get('dark_theme', False) else "☀️ Light"
 my_purple_helix = sns.cubehelix_palette(start=.2, rot=-.1, dark=0, light=0.85,
                                              reverse=True, as_cmap=True)
+def get_plot_style(style='default'):
+    """Get plot style with specified color theme.
+    Args:
+        style: 'default' for cream theme, 'paper' for pure white backgrounds, 'dark' for dark theme
+    """
+    if style == 'paper':
+        theme_colors = PAPER_COLORS
+    elif style == 'dark':
+        theme_colors = DARK_COLORS
+    else:  # default
+        theme_colors = get_current_colors()
     return {
         'font_family': PLOT_STYLE_FONT_FAMILY,
         'spine_color': theme_colors['spine_color'],
     }
+def set_plot_style(style='default'):
+    """Set consistent plot styling across all figures.
+    Args:
+        style: 'default' for cream theme, 'paper' for pure white backgrounds, 'dark' for dark theme
+    """
+    plot_style = get_plot_style(style=style)
     plt.rcParams['font.family'] = plot_style['font_family']
     plt.rcParams['font.size'] = plot_style['font_size']
     return fig, ax
+def set_paper_plot_style():
+    """Convenience function to set pure white backgrounds for paper publication."""
+    set_plot_style(style='paper')
 # Legacy function name for backward compatibility
 def apply_cream_theme_to_figure(fig, ax=None):
     """Apply current theme to an existing figure and axes (legacy function name)"""
 def apply_base_styling():
     """Apply the base styling to the current Streamlit page."""
+    if not _STREAMLIT_AVAILABLE:
+        return
     st.markdown(get_base_css(), unsafe_allow_html=True)
 def apply_home_page_styling():
     """Apply styling specific to the home page."""
+    if not _STREAMLIT_AVAILABLE:
+        return
     st.markdown(get_base_css(), unsafe_allow_html=True)
     st.markdown(get_home_page_css(), unsafe_allow_html=True)
 def apply_documentation_page_styling():
     """Apply styling specific to the documentation page."""
+    if not _STREAMLIT_AVAILABLE:
+        return
     st.markdown(get_base_css(), unsafe_allow_html=True)
     st.markdown(get_documentation_page_css(), unsafe_allow_html=True)
 def apply_tool_page_styling():
     """Apply styling specific to the analysis tool page."""
+    if not _STREAMLIT_AVAILABLE:
+        return
     st.markdown(get_base_css(), unsafe_allow_html=True)
     st.markdown(get_tool_page_css(), unsafe_allow_html=True)

requirements.txt CHANGED Viewed

@@ -3,4 +3,5 @@ numpy>=1.21.0
 matplotlib>=3.4.0
 seaborn>=0.11.0
 pandas>=1.3.0
-scipy>=1.7.0

 matplotlib>=3.4.0
 seaborn>=0.11.0
 pandas>=1.3.0
+scipy>=1.7.0
+tqdm>=4.64.0

shared_styling.py CHANGED Viewed

@@ -1,13 +1,59 @@
 """
 Unified styling module for both Streamlit UI and matplotlib plots.
 Contains all styling definitions to ensure consistency across the application.
 """
-import streamlit as st
 import matplotlib.pyplot as plt
 import seaborn as sns
 import matplotlib.font_manager as fm
 # ==========================
 # Shared Color Themes
 # ==========================
@@ -46,6 +92,40 @@ LIGHT_COLORS = {
     'spine_color': '#E0E0E0',
 }
 # Dark theme colors - consistent across UI and plots
 DARK_COLORS = {
     'background': '#1E1E1E',
@@ -80,8 +160,12 @@ DARK_COLORS = {
     'spine_color': '#505050',
 }
 def get_current_colors():
     """Get the current color scheme based on session state."""
     if 'dark_theme' not in st.session_state:
         st.session_state.dark_theme = False
@@ -89,12 +173,18 @@ def get_current_colors():
 def toggle_theme():
     """Toggle between light and dark themes."""
     if 'dark_theme' not in st.session_state:
         st.session_state.dark_theme = False
     st.session_state.dark_theme = not st.session_state.dark_theme
 def add_theme_toggle():
     """Add a theme toggle button to the sidebar."""
     with st.sidebar:
         st.markdown("---")
         current_theme = "🌙 Dark" if st.session_state.get('dark_theme', False) else "☀️ Light"
@@ -141,9 +231,18 @@ purple_helix = sns.cubehelix_palette(start=.2, rot=-.4, dark=0, light=0.85,
 my_purple_helix = sns.cubehelix_palette(start=.2, rot=-.1, dark=0, light=0.85,
                                              reverse=True, as_cmap=True)
-def get_plot_style():
-    """Get plot style with current theme colors."""
-    theme_colors = get_current_colors()
     return {
         'font_family': PLOT_STYLE_FONT_FAMILY,
@@ -168,9 +267,13 @@ def get_plot_style():
         'spine_color': theme_colors['spine_color'],
     }
-def set_plot_style():
-    """Set consistent plot styling across all figures"""
-    plot_style = get_plot_style()
     plt.rcParams['font.family'] = plot_style['font_family']
     plt.rcParams['font.size'] = plot_style['font_size']
@@ -233,6 +336,10 @@ def apply_theme_to_figure(fig, ax=None):
     return fig, ax
 # Legacy function name for backward compatibility
 def apply_cream_theme_to_figure(fig, ax=None):
     """Apply current theme to an existing figure and axes (legacy function name)"""
@@ -461,19 +568,27 @@ def get_tool_page_css():
 def apply_base_styling():
     """Apply the base styling to the current Streamlit page."""
     st.markdown(get_base_css(), unsafe_allow_html=True)
 def apply_home_page_styling():
     """Apply styling specific to the home page."""
     st.markdown(get_base_css(), unsafe_allow_html=True)
     st.markdown(get_home_page_css(), unsafe_allow_html=True)
 def apply_documentation_page_styling():
     """Apply styling specific to the documentation page."""
     st.markdown(get_base_css(), unsafe_allow_html=True)
     st.markdown(get_documentation_page_css(), unsafe_allow_html=True)
 def apply_tool_page_styling():
     """Apply styling specific to the analysis tool page."""
     st.markdown(get_base_css(), unsafe_allow_html=True)
     st.markdown(get_tool_page_css(), unsafe_allow_html=True)

 """
 Unified styling module for both Streamlit UI and matplotlib plots.
 Contains all styling definitions to ensure consistency across the application.
+Note: When used outside of Streamlit environment (e.g., in Jupyter notebooks),
+you may see warnings about missing ScriptRunContext or Session state. These
+warnings are harmless and can be safely ignored - the core plotting functions
+(get_plot_style, set_plot_style, PLOT_COLORS) work correctly regardless.
 """
+import warnings
+import logging
 import matplotlib.pyplot as plt
 import seaborn as sns
 import matplotlib.font_manager as fm
+# Suppress Streamlit warnings when running outside streamlit environment
+warnings.filterwarnings('ignore', category=UserWarning, module='streamlit')
+warnings.filterwarnings('ignore', message='.*ScriptRunContext.*')
+warnings.filterwarnings('ignore', message='.*Session state.*')
+warnings.filterwarnings('ignore', message='.*missing ScriptRunContext.*')
+warnings.filterwarnings('ignore', message='.*does not function when running.*')
+warnings.filterwarnings('ignore', module='streamlit.runtime.*')
+warnings.filterwarnings('ignore', module='streamlit.runtime.scriptrunner_utils.*')
+warnings.filterwarnings('ignore', module='streamlit.runtime.state.*')
+# Suppress Streamlit loggers that generate warnings outside streamlit environment
+logging.getLogger('streamlit.runtime.scriptrunner_utils.script_run_context').setLevel(logging.ERROR)
+logging.getLogger('streamlit.runtime.state.session_state_proxy').setLevel(logging.ERROR)
+logging.getLogger('streamlit').setLevel(logging.ERROR)
+try:
+    # Set logging level before importing to suppress initial warnings
+    for logger_name in ['streamlit', 'streamlit.runtime', 'streamlit.runtime.scriptrunner_utils',
+                       'streamlit.runtime.state', 'streamlit.runtime.scriptrunner_utils.script_run_context',
+                       'streamlit.runtime.state.session_state_proxy']:
+        logging.getLogger(logger_name).setLevel(logging.ERROR)
+    import streamlit as st
+    _STREAMLIT_AVAILABLE = True
+except ImportError:
+    _STREAMLIT_AVAILABLE = False
+    # Create a mock streamlit module for non-streamlit environments
+    class MockStreamlit:
+        class session_state:
+            dark_theme = False
+    st = MockStreamlit()
+def _suppress_streamlit_warnings(func):
+    """Decorator to suppress streamlit warnings in functions."""
+    def wrapper(*args, **kwargs):
+        with warnings.catch_warnings():
+            warnings.simplefilter('ignore')
+            return func(*args, **kwargs)
+    return wrapper
 # ==========================
 # Shared Color Themes
 # ==========================
     'spine_color': '#E0E0E0',
 }
+# Paper theme colors - pure white backgrounds for publication
+PAPER_COLORS = {
+    'background': '#FFFFFF',
+    'figure_background': '#FFFFFF',
+    'sidebar_bg_start': '#FAFAFA',
+    'sidebar_bg_end': '#F5F5F5',
+    'border_light': '#F0F0F0',
+    'border_medium': '#E0E0E0',
+    'text_primary': '#2C3E50',
+    'text_secondary': '#5D6D7E',
+    'text_tertiary': '#85929E',
+    'text_light': '#A6ACAF',
+    'button_bg_start': '#5D6D7E',
+    'button_bg_end': '#85929E',
+    'button_hover_start': '#85929E',
+    'button_hover_end': '#A6ACAF',
+    'alert_error_bg': '#FFE6E6',
+    'alert_error_border': '#FFAAAA',
+    'alert_error_text': '#CC0000',
+    'alert_info_bg': '#E6F3FF',
+    'alert_info_border': '#99D6FF',
+    'alert_info_text': '#0066CC',
+    'warning_bg': '#FFF8E1',
+    'warning_border': '#FF9800',
+    'success_bg': '#E8F5E8',
+    'success_border': '#4CAF50',
+    'generate_button_bg': '#228B22',
+    'generate_button_hover': '#32CD32',
+    # Plot-specific colors - pure white for papers
+    'axes_background': '#FFFFFF',
+    'grid_color': '#F0F0F0',
+    'spine_color': '#E0E0E0',
+}
 # Dark theme colors - consistent across UI and plots
 DARK_COLORS = {
     'background': '#1E1E1E',
     'spine_color': '#505050',
 }
+@_suppress_streamlit_warnings
 def get_current_colors():
     """Get the current color scheme based on session state."""
+    if not _STREAMLIT_AVAILABLE:
+        return LIGHT_COLORS
     if 'dark_theme' not in st.session_state:
         st.session_state.dark_theme = False
 def toggle_theme():
     """Toggle between light and dark themes."""
+    if not _STREAMLIT_AVAILABLE:
+        return
     if 'dark_theme' not in st.session_state:
         st.session_state.dark_theme = False
     st.session_state.dark_theme = not st.session_state.dark_theme
 def add_theme_toggle():
     """Add a theme toggle button to the sidebar."""
+    if not _STREAMLIT_AVAILABLE:
+        return
     with st.sidebar:
         st.markdown("---")
         current_theme = "🌙 Dark" if st.session_state.get('dark_theme', False) else "☀️ Light"
 my_purple_helix = sns.cubehelix_palette(start=.2, rot=-.1, dark=0, light=0.85,
                                              reverse=True, as_cmap=True)
+def get_plot_style(style='default'):
+    """Get plot style with specified color theme.
+    Args:
+        style: 'default' for cream theme, 'paper' for pure white backgrounds, 'dark' for dark theme
+    """
+    if style == 'paper':
+        theme_colors = PAPER_COLORS
+    elif style == 'dark':
+        theme_colors = DARK_COLORS
+    else:  # default
+        theme_colors = get_current_colors()
     return {
         'font_family': PLOT_STYLE_FONT_FAMILY,
         'spine_color': theme_colors['spine_color'],
     }
+def set_plot_style(style='default'):
+    """Set consistent plot styling across all figures.
+    Args:
+        style: 'default' for cream theme, 'paper' for pure white backgrounds, 'dark' for dark theme
+    """
+    plot_style = get_plot_style(style=style)
     plt.rcParams['font.family'] = plot_style['font_family']
     plt.rcParams['font.size'] = plot_style['font_size']
     return fig, ax
+def set_paper_plot_style():
+    """Convenience function to set pure white backgrounds for paper publication."""
+    set_plot_style(style='paper')
 # Legacy function name for backward compatibility
 def apply_cream_theme_to_figure(fig, ax=None):
     """Apply current theme to an existing figure and axes (legacy function name)"""
 def apply_base_styling():
     """Apply the base styling to the current Streamlit page."""
+    if not _STREAMLIT_AVAILABLE:
+        return
     st.markdown(get_base_css(), unsafe_allow_html=True)
 def apply_home_page_styling():
     """Apply styling specific to the home page."""
+    if not _STREAMLIT_AVAILABLE:
+        return
     st.markdown(get_base_css(), unsafe_allow_html=True)
     st.markdown(get_home_page_css(), unsafe_allow_html=True)
 def apply_documentation_page_styling():
     """Apply styling specific to the documentation page."""
+    if not _STREAMLIT_AVAILABLE:
+        return
     st.markdown(get_base_css(), unsafe_allow_html=True)
     st.markdown(get_documentation_page_css(), unsafe_allow_html=True)
 def apply_tool_page_styling():
     """Apply styling specific to the analysis tool page."""
+    if not _STREAMLIT_AVAILABLE:
+        return
     st.markdown(get_base_css(), unsafe_allow_html=True)
     st.markdown(get_tool_page_css(), unsafe_allow_html=True)

st_logs/dashboard_access.log ADDED Viewed

File without changes