Spaces:

CIMT
/

LCMV_Xtra

Sleeping

App Files Files Community

LCMV_Xtra / lcmv_class.py

JayLacoma

Update lcmv_class.py

56bcf87 verified 3 months ago

raw

history blame contribute delete

23.2 kB

	# lcmv_class.py

	# Imports
	import mne
	import numpy as np
	import pandas as pd
	import nibabel as nib
	from tqdm import tqdm
	import os, time, pickle
	from pathlib import Path
	import matplotlib.pyplot as plt
	from nilearn import datasets, image

	# Set MNE to only show warnings and errors
	mne.set_log_level('warning')

	class LCMVSourceEstimator:
	def __init__(self, config):
	"""
	Initialize the LCMV Source Estimator with configuration.

	Parameters:
	config (dict): Configuration dictionary containing all necessary parameters
	"""
	self.config = config
	self.project_base = Path(config['project_base'])
	self.subject_id = config['subject_id']
	self.task = config['task']

	# GLOBAL directory for shared resources (fsaverage)
	self.global_subjects_dir = self.project_base / 'derivatives/lcmv'

	# SUBJECT-SPECIFIC directory for output
	self.subject_output = self.project_base / f'derivatives/lcmv/{self.subject_id}_{self.task}'
	self.subject_output.mkdir(parents=True, exist_ok=True)

	def parse_gpsc(self, filepath):
	"""Parse .gpsc file and normalize coordinates to center the origin."""
	channels = []
	with open(filepath, 'r') as file:
	lines = file.readlines()
	for line in lines:
	parts = line.strip().split()
	if len(parts) < 4:
	continue
	name = parts[0]
	try:
	x, y, z = map(float, parts[1:4])
	channels.append((name, x, y, z))
	except ValueError:
	continue
	return channels

	def run_enhanced_computation(self):
	"""Run the complete enhanced LCMV pipeline with improved coregistration"""
	print("="*60)
	print(f"🎯 ENHANCED LCMV SOURCE ESTIMATION - Subject: {self.subject_id}")
	print("="*60)

	print("\n=== Loading Data ===")
	ica_file = self.project_base / self.config['ica_file_path']
	gpsc_file = self.project_base / self.config['gpsc_file_path']

	if not ica_file.exists():
	raise FileNotFoundError(f"ICA file not found: {ica_file}")
	if not gpsc_file.exists():
	raise FileNotFoundError(f"GPSC file not found: {gpsc_file}")

	# Load data
	raw = mne.io.read_raw_fif(ica_file, preload=True)
	sfreq = raw.info['sfreq']
	duration_min = raw.n_times / sfreq / 60
	print(f"Data: {duration_min:.1f}min, {sfreq}Hz, {raw.n_times} samples")

	# === ENHANCED PREPROCESSING PIPELINE ===
	print("\n=== Enhanced Preprocessing Pipeline ===")

	# Rename channels to match .gpsc file
	channel_map = {str(i): f'E{i}' for i in range(1, 281)}
	channel_map['REF CZ'] = 'Cz'

	# Only rename existing channels
	existing_channels = set(raw.info['ch_names'])
	valid_channel_map = {}
	for old_name, new_name in channel_map.items():
	if old_name in existing_channels:
	valid_channel_map[old_name] = new_name

	if valid_channel_map:
	raw.rename_channels(valid_channel_map)
	print(f"Renamed {len(valid_channel_map)} channels")



	# === ENHANCED MONTAGE CREATION ===
	print("\n=== Creating Enhanced Montage with Coordinate Normalization ===")

	# Parse .gpsc file
	channels = self.parse_gpsc(gpsc_file)

	if not channels:
	raise ValueError("No valid channels found in .gpsc file")

	# Normalize coordinates to center the origin (enhanced method)
	gpsc_array = np.array([ch[1:4] for ch in channels])
	mean_pos = np.mean(gpsc_array, axis=0)
	print(f"Original mean position (mm): {mean_pos}")

	# Normalize and convert to meters
	channels_normalized = [(ch[0], ch[1] - mean_pos[0], ch[2] - mean_pos[1], ch[3] - mean_pos[2])
	for ch in channels]
	ch_pos = {ch[0]: np.array(ch[1:4]) / 1000.0 for ch in channels_normalized}

	# Check fiducials
	required_fids = ['FidNz', 'FidT9', 'FidT10']
	missing = [fid for fid in required_fids if fid not in ch_pos]
	if missing:
	raise ValueError(f"Missing fiducials: {missing}")

	# Create montage with normalized coordinates
	montage = mne.channels.make_dig_montage(
	ch_pos=ch_pos,
	nasion=ch_pos['FidNz'],
	lpa=ch_pos['FidT9'],
	rpa=ch_pos['FidT10'],
	coord_frame='head'
	)




	# Apply montage and preprocessing
	raw.set_montage(montage, on_missing='warn')
	raw = raw.pick(['eeg', 'stim'], exclude=raw.info['bads'])

	print("\n🔍 Checking EEG reference status...")
	print(f"custom_ref_applied: {raw.info['custom_ref_applied']}")
	print(f"n_projs: {len(raw.info['projs'])}")
	print(f"proj_applied: {raw.proj}")

	# --- Ensure average reference projection is present ---
	if not any(p['desc'] == 'average' for p in raw.info['projs']):
	print("📎 No average reference projection found. Applying it...")
	raw.set_eeg_reference('average', projection=True)
	else:
	print("✅ Average reference projection already in place.")

	# --- Apply projections if not already applied ---
	if not raw.proj:
	print("🎯 Applying EEG average reference projection...")
	raw.apply_proj()
	else:
	print("💡 Projections already applied.")

	print("✓ Enhanced preprocessing complete (reference now valid for inverse modeling)")

	print(f"Enhanced montage applied:")
	print(f"FidNz (nasion): {ch_pos['FidNz']}")
	print(f"FidT9 (lpa): {ch_pos['FidT9']}")
	print(f"FidT10 (rpa): {ch_pos['FidT10']}")

	# === SOURCE SPACE SETUP ===
	print("\n=== Source Space Setup ===")
	subject = 'fsaverage'

	# Download fsaverage if needed
	bem_file = self.global_subjects_dir / 'fsaverage' / 'bem' / 'fsaverage-5120-5120-5120-bem-sol.fif'
	bem_head = self.global_subjects_dir / 'fsaverage' / 'bem' / 'fsaverage-head-dense.fif'
	src_file = self.global_subjects_dir / 'fsaverage-vol-5mm-src.fif'

	if not bem_file.exists() or not bem_head.exists():
	print("Downloading fsaverage to GLOBAL directory...")
	mne.datasets.fetch_fsaverage(subjects_dir=self.global_subjects_dir, verbose=False)

	# === ENHANCED COREGISTRATION ===
	print("\n=== Running Enhanced Coregistration ===")
	trans_file = self.subject_output / 'fsaverage-trans.fif'

	try:
	# Initialize coregistration with normalized coordinates
	coreg = mne.coreg.Coregistration(
	raw.info,
	subject=subject,
	subjects_dir=self.global_subjects_dir,
	fiducials={
	'nasion': ch_pos['FidNz'],
	'lpa': ch_pos['FidT9'],
	'rpa': ch_pos['FidT10']
	}
	)

	# Step 1: Fit with fiducials first
	print("1/3: Fitting with fiducials...")
	coreg.fit_fiducials(verbose=False)

	# Step 2: Use EEG channels as head shape points for ICP
	print("2/3: Using EEG channels as head shape points for ICP...")
	coreg.fit_icp(n_iterations=6, nasion_weight=2.0, verbose=False)

	# Remove outliers
	print(" Removing outlier points...")
	dists = coreg.compute_dig_mri_distances()
	n_excluded = np.sum(dists > 5.0/1000)

	if n_excluded > 0:
	print(f" Excluding {n_excluded} outlier points (distance > 5mm)")
	coreg.omit_head_shape_points(distance=5.0/1000)
	else:
	print(" No outlier points to exclude")

	# Step 3: Final refinement with higher weight on nasion
	print("3/3: Final ICP refinement...")
	coreg.fit_icp(n_iterations=20, nasion_weight=10.0, verbose=False)

	# Save transformation
	trans = coreg.trans
	mne.write_trans(trans_file, trans, overwrite=True)
	print(f"✓ Enhanced coregistration successful: {trans_file}")

	# Compute and display error metrics
	dists = coreg.compute_dig_mri_distances() * 1000 # mm
	mean_err = np.mean(dists)
	median_err = np.median(dists)
	max_err = np.max(dists)

	print(f"\nCoregistration Error (mm):")
	print(f"Mean: {mean_err:.2f}, Median: {median_err:.2f}, Max: {max_err:.2f}")

	if mean_err > 5.0:
	print(f"⚠️ WARNING: Mean error = {mean_err:.2f}mm > 5mm")
	else:
	print("✅ Enhanced coregistration error acceptable")

	except Exception as e:
	# ❌ REMOVED IDENTITY FALLBACK — fail fast instead
	print(f"❌ Coregistration failed irrecoverably: {e}")
	raise RuntimeError(f"Coregistration failed: {e}")

	# === SOURCE SPACE CREATION ===
	print("\n=== Creating Source Space ===")
	if not src_file.exists():
	print("Creating volume source space...")
	# ✅ FIXED: Added mri='T1.mgz' to ensure mri_ras_t exists
	src = mne.setup_volume_source_space(
	subject=subject,
	subjects_dir=self.global_subjects_dir,
	pos=5.0,
	mri='T1.mgz', # ← critical for DiFuMo
	add_interpolator=True
	)
	src.save(src_file, overwrite=True)
	else:
	src = mne.read_source_spaces(src_file)

	print(f"Source space: {len(src[0]['vertno'])} active sources out of {src[0]['np']} total points")

	# === FORWARD SOLUTION ===
	print("\n=== Creating Forward Solution ===")
	fwd_file = self.subject_output / 'fsaverage-vol-eeg-fwd.fif'
	bem = mne.read_bem_solution(bem_file)
	fwd = mne.make_forward_solution(
	raw.info, trans=trans, src=src, bem=bem, eeg=True, mindist=5.0, n_jobs=self.config['n_jobs']
	)
	mne.write_forward_solution(fwd_file, fwd, overwrite=True)
	print("✓ Enhanced source space setup complete")

	# === LCMV BEAMFORMER ===
	print("\n=== LCMV Beamformer ===")

	# Compute SINGLE covariance from entire recording (CORRECT FOR CONTINUOUS DATA)
	print("Computing single covariance from entire recording...")

	cov = mne.compute_raw_covariance(
	raw,
	method='oas', # ✅ STATE-OF-THE-ART for long continuous data 'shrunk' or 'oas'
	picks='eeg',
	rank='info', # ✅ CRITICAL: Accounts for average reference
	n_jobs=self.config['n_jobs'],
	verbose=False)



	# Create LCMV filters: Same covariance with proper rank handling
	print("Creating LCMV spatial filters...")
	filters = mne.beamformer.make_lcmv(
	info=raw.info,
	forward=fwd,
	data_cov=cov,
	noise_cov=cov, # Same matrix - correct for continuous data
	reg=self.config['reg'],
	pick_ori='max-power',
	weight_norm='unit-noise-gain',
	reduce_rank=True, # Must be True for average reference
	rank='info', # CORRECT: Use rank information from info object
	verbose=True
	)



	# Apply LCMV to continuous data
	print("Applying LCMV filters to continuous data...")
	stc = mne.beamformer.apply_lcmv_raw(raw=raw, filters=filters)

	# Save STC in H5 format (required for complex data)
	print("Saving STC in H5 format (required for complex data)...")
	stc_file = self.subject_output / 'source_estimate_LCMV.h5'
	stc.save(stc_file, ftype='h5', overwrite=True)
	print(f"✓ STC saved successfully in H5 format: {stc_file}")

	print(f"✓ LCMV complete: {stc.data.shape} (sources x timepoints)")
	print(f"✓ STC file saved as: {stc_file}")

	# === SAVE SOURCE SPACE INFORMATION ===
	print("\n=== Saving source space information ===")

	# For volume source spaces, stc.vertices[0] contains the indices of active sources
	vertices = stc.vertices[0]

	# Get the active source indices from the source space
	active_indices = src[0]['vertno'] # indices of active sources in the full grid


	# Map STC vertices to actual source space positions
	src_points_m = src[0]['rr'][vertices]

	src_points_mm = src_points_m * 1000 # Convert to mm

	# Save the correctly indexed source points
	np.save(self.subject_output / 'source_space_points_mm.npy', src_points_mm)

	# Verify shapes match
	print(f"STC data shape: {stc.data.shape}")
	print(f"Source points shape: {src_points_mm.shape}")

	if src_points_mm.shape[0] != stc.data.shape[0]:
	print(f"WARNING: Shape mismatch detected!")
	n_sources = min(src_points_mm.shape[0], stc.data.shape[0])
	src_points_mm = src_points_mm[:n_sources]
	print(f"Using first {n_sources} source points to match STC data")

	print(f"✓ Source space points saved: {src_points_mm.shape} points")
	print(f" Matches STC data shape: {stc.data.shape[0]} sources")

	# === SAVE DEBUG INFO AND METADATA ===
	print("\n=== Saving debug info and metadata ===")

	# Save debugging info
	debug_info = {
	'src_vertno': active_indices.tolist(),
	'stc_vertices': vertices.tolist(),
	'src_np': src[0]['np'],
	'n_active_sources': len(active_indices),
	'n_stc_vertices': len(vertices),
	'coregistration_error_mm': {
	'mean': mean_err if 'mean_err' in locals() else None,
	'median': median_err if 'median_err' in locals() else None,
	'max': max_err if 'max_err' in locals() else None
	}
	}
	with open(self.subject_output / 'debug_source_info.pkl', 'wb') as f:
	pickle.dump(debug_info, f)

	# Save metadata
	metadata = {
	'stc_shape': stc.data.shape,
	'n_source_points': len(vertices),
	'source_space_indices': vertices.tolist(),
	'sfreq': sfreq,
	'duration_min': duration_min,
	'stc_file': str(stc_file),
	'src_file': str(src_file),
	'subject_output': str(self.subject_output),
	'global_subjects_dir': str(self.global_subjects_dir),
	'enhanced_coregistration': True,
	'coordinate_normalization': 'mean_centered',
	'fiducials': {
	'FidNz': ch_pos['FidNz'].tolist(),
	'FidT9': ch_pos['FidT9'].tolist(),
	'FidT10': ch_pos['FidT10'].tolist()
	}
	}
	with open(self.subject_output / 'computation_metadata.pkl', 'wb') as f:
	pickle.dump(metadata, f)

	print(f"✓ Enhanced computation complete and metadata saved")
	print(f"\n🎉 ENHANCED LCMV SOURCE ESTIMATION COMPLETE!")
	print(f" - Enhanced coregistration with error checking")
	print(f" - Proper coordinate normalization")
	print(f" - All original outputs maintained")
	print(f" - Results saved to: {self.subject_output}")

	return metadata

	def extract_difumo_time_courses(self, stc, src, config, subject_output):
	"""Extract weighted time courses from DiFuMo atlas."""
	print("\n=== DiFuMo Processing ===")
	atlas = datasets.fetch_atlas_difumo(
	dimension=config['dimension'],
	resolution_mm=config['resolution_mm']
	)
	atlas_img = nib.load(atlas.maps)
	atlas_shape = atlas_img.shape # (x, y, z, n_components)
	n_components = atlas_shape[3]

	# Get source locations in mm
	vertices = stc.vertices[0]
	src_rr = src[0]['rr'][vertices] * 1000 # m → mm

	# Apply MRI RAS transform to get MNI coordinates
	try:
	trans = src[0]['mri_ras_t']['trans']
	except KeyError:
	raise ValueError("Source space missing 'mri_ras_t' transform. Ensure it's a proper volume source space.")

	mni_coords = image.coord_transform(src_rr[:, 0], src_rr[:, 1], src_rr[:, 2], trans)
	src_coords_mni = np.array(mni_coords).T # (n_sources, 3)

	# Convert MNI mm → voxel indices in atlas space
	homog = np.column_stack([src_coords_mni, np.ones(len(src_coords_mni))])
	vox_coords = (np.linalg.inv(atlas_img.affine) @ homog.T).T[:, :3]
	vox_coords = np.round(vox_coords).astype(int)

	# Filter valid voxels inside atlas bounds
	valid_mask = (
	(vox_coords >= 0).all(axis=1) &
	(vox_coords[:, 0] < atlas_shape[0]) &
	(vox_coords[:, 1] < atlas_shape[1]) &
	(vox_coords[:, 2] < atlas_shape[2])
	)
	valid_indices = np.where(valid_mask)[0]
	valid_voxels = vox_coords[valid_mask]

	print(f"Using {len(valid_indices)}/{len(vertices)} sources within atlas bounds")

	# Extract time courses
	time_courses = []
	component_info = []
	threshold = 1e-6

	# ✅ ADDED: Handle complex-valued STC (though max-power should be real)
	if np.iscomplexobj(stc.data):
	print("⚠️ STC is complex — taking absolute value for DiFuMo")
	stc.data = np.abs(stc.data)

	for comp_idx in range(n_components):
	if comp_idx % 100 == 0:
	print(f"Processing component {comp_idx + 1}/{n_components}")

	try:
	comp_map = atlas_img.slicer[..., comp_idx].get_fdata()
	weights, stc_indices = [], []

	for i, (x, y, z) in enumerate(valid_voxels):
	prob = comp_map[x, y, z]
	if prob > threshold:
	weights.append(prob)
	stc_indices.append(valid_indices[i])

	if weights:
	weights = np.array(weights)
	weights /= weights.sum() # Normalize
	tc = np.average(stc.data[stc_indices], axis=0, weights=weights)
	info = {
	'component': comp_idx,
	'n_sources': len(stc_indices),
	'max_weight': weights.max(),
	'mean_weight': weights.mean()
	}
	else:
	tc = np.zeros(stc.data.shape[1])
	info = {
	'component': comp_idx,
	'n_sources': 0,
	'max_weight': 0.0,
	'mean_weight': 0.0
	}

	time_courses.append(tc)
	component_info.append(info)

	except Exception as e:
	print(f"Error in component {comp_idx}: {e}")
	time_courses.append(np.zeros(stc.data.shape[1]))
	component_info.append({
	'component': comp_idx, 'n_sources': 0, 'max_weight': 0.0, 'mean_weight': 0.0
	})

	# Summary
	valid_comps = sum(1 for info in component_info if info['n_sources'] > 0)
	print(f"✅ {valid_comps}/{n_components} components have at least one source")

	# Save outputs
	subject_output = Path(subject_output)
	np.save(subject_output / 'difumo_time_courses.npy', np.array(time_courses))
	pd.DataFrame(component_info).to_csv(subject_output / 'difumo_component_info.csv', index=False)
	print(f"💾 Saved to: {subject_output}")

	return np.array(time_courses), component_info

	def run_difumo_extraction(self, difumo_config=None):
	"""Run DiFuMo time course extraction on existing data."""
	if difumo_config is None:
	difumo_config = {
	'dimension': 512,
	'resolution_mm': 2 # 2mm resolution for 512-component DiFuMo
	}

	try:
	# --- USER INPUT: UPDATE THESE IF NEEDED ---
	subject_output = self.subject_output
	stc_base_name = "source_estimate_LCMV" # without extension

	# --- AUTODETECT STC FILE (handles .stc, -vl.stc, .h5) ---
	stc_file = None
	for suffix in ['-vl.stc', '.stc', '.h5']:
	candidate = subject_output / f"{stc_base_name}{suffix}"
	if candidate.exists():
	stc_file = candidate
	break
	if not stc_file:
	raise FileNotFoundError(f"STC file not found in {subject_output}")

	# --- LOAD DATA ---
	print(f"🔁 Loading STC: {stc_file}")
	stc = mne.read_source_estimate(stc_file)
	print(f"Loaded STC: {stc.data.shape} (sources × time)")

	# ✅ FIXED: Use consistent global path (no hardcoded path)
	src_file = self.global_subjects_dir / "fsaverage-vol-5mm-src.fif"

	print(f"🔁 Loading source space: {src_file}")
	if not src_file.exists():
	raise FileNotFoundError(f"Source space not found: {src_file}")
	src = mne.read_source_spaces(src_file)
	print(f"Loaded source space with {len(src[0]['vertno'])} active sources")

	# --- RUN EXTRACTION ---
	time_courses, component_info = self.extract_difumo_time_courses(
	stc=stc,
	src=src,
	config=difumo_config,
	subject_output=subject_output
	)

	print("\n🎉 SUCCESS: DiFuMo time series extraction complete!")
	print(f"📊 Output shape: {time_courses.shape} (512 components × {time_courses.shape[1]} time points)")
	print(f"📄 Details saved in:\n - {subject_output / 'difumo_time_courses.npy'}\n - {subject_output / 'difumo_component_info.csv'}")

	return time_courses, component_info

	except Exception as e:
	print(f"❌ Error during DiFuMo extraction: {e}")
	raise

	def list_output_files(self):
	"""List all files in the output folder."""
	print(f"\n=== Files in output folder: {self.subject_output} ===")
	for file in os.listdir(self.subject_output):
	print(file)
	return list(os.listdir(self.subject_output))



	# --- CONFIGURATION ---
	PROJECT_BASE = "/home/jaizor/jaizor/xtra"
	CROP_BASE_DIR = Path(PROJECT_BASE) / "derivatives/ica"
	GPS_FILE_PATH = "data/ghw280_from_egig.gpsc"

	# Configuration template
	CONFIG_TEMPLATE = {
	'project_base': PROJECT_BASE,
	'gpsc_file_path': GPS_FILE_PATH,
	'reg': 0.01,
	'n_jobs': -1,
	'skip_difumo': False
	}