Buckets:

ktongue
/

DEM_MCM

Files

xet

ktongue/DEM_MCM / transition_matrix_gpu_article.py

ktongue

about 2 months ago

download

raw

7.1 kB

	"""
	===================================================================================
	MODÉLISATION MARKOVIENNE GPU - VERSION OPTIMISÉE
	===================================================================================

	Implémentation GPU avec vectorisation maximale de la formule de l'article:
	P_ij = (1/N_LT) * Sum_n [ T_ij(n) / phi(i, t_{n-1}) ]

	Optimisations:
	- Toutes les opérations tensorielles sur GPU
	- torch.bincount pour comptage vectorisé O(n)
	- torch.index_add pour accumulation efficace
	- Batching pour réduire les overheads CPU-GPU

	===================================================================================
	"""

	import polars as pl
	from huggingface_hub import HfFileSystem
	import torch
	import numpy as np
	from tqdm import tqdm

	# ===================================================================================
	# PARAMÈTRES
	# ===================================================================================
	N_LT = 30 # Nombre de pas de temps pour l'apprentissage
	nx, ny, nz = 5, 5, 5 # Discrétisation spatiale
	DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
	# ===================================================================================

	fs = HfFileSystem()
	folder_path = "hf://buckets/ktongue/DEM_MCM/Output Paraview" # Note: avec espace
	files = sorted(fs.glob(f"{folder_path}/*.csv"))

	print(f"📁 {len(files)} fichiers")
	print(f"🖥️ Device: {DEVICE}")
	print(f"⚙️ N_LT = {N_LT}")

	# ===================================================================================
	# ÉTAPE 1: CALCUL DES LIMITES (CPU, une seule fois)
	# ===================================================================================
	print("🔍 Calcul des limites...")
	sample = files[::50]

	x_vals, y_vals, z_vals = [], [], []
	for f in sample:
	with fs.open(f, "rb") as file:
	df = pl.read_csv(file)
	x_vals.extend(df["coordinates:0"].to_list())
	y_vals.extend(df["coordinates:1"].to_list())
	z_vals.extend(df["coordinates:2"].to_list())

	xmin, xmax = min(x_vals) - 0.001, max(x_vals) + 0.001
	ymin, ymax = min(y_vals) - 0.001, max(y_vals) + 0.001
	zmin, zmax = min(z_vals) - 0.001, max(z_vals) + 0.001

	# Paramètres de discrétisation sur GPU
	n_states = nx * ny * nz
	dx = (xmax - xmin) / nx
	dy = (ymax - ymin) / ny
	dz = (zmax - zmin) / nz

	print(f"📊 {n_states} états spatiaux")

	# ===================================================================================
	# FONCTIONS GPU VECTORISÉES
	# ===================================================================================


	def states_to_indices(x, y, z):
	"""
	Conversion coordonnées -> indices d'état SUR GPU.

	Formule: state = ix + iynx + iznx*ny
	où ix = floor((x - xmin) / dx)

	Vectorisé: pas de boucles Python, tout en une opération GPU.
	"""
	# Conversion directe CPU -> GPU via numpy
	ix = ((np.array(x) - xmin) / dx).astype(np.int64)
	iy = ((np.array(y) - ymin) / dy).astype(np.int64)
	iz = ((np.array(z) - zmin) / dz).astype(np.int64)

	# Clamp et index linéaire
	ix = np.clip(ix, 0, nx - 1)
	iy = np.clip(iy, 0, ny - 1)
	iz = np.clip(iz, 0, nz - 1)

	return ix + iy * nx + iz * nx * ny


	def compute_P_matrix_gpu(states_prev, states_curr, n_states):
	"""
	Calcule la matrice de transition P_n pour UN timestep - TOUT GPU.

	Formule: P_ij(n) = T_ij(n) / phi(i, t_{n-1})

	Optimisé avec:
	- torch.bincount 2D pour comptage vectorisé
	- Division vectorisée (pas de boucle)

	Args:
	states_prev: Tensor GPU (N,) - états au temps t-1
	states_curr: Tensor GPU (N,) - états au temps t
	n_states: Nombre d'états

	Returns:
	Tensor GPU (n_states, n_states) - matrice de transition normalisée
	"""
	# Asegurar que son tensores en GPU
	s_prev = states_prev.to(DEVICE)
	s_curr = states_curr.to(DEVICE)

	# Comptage des particules par état source: phi(i, t_{n-1})
	# torch.bincount est O(n) et très optimisé sur GPU
	phi = torch.bincount(s_prev, minlength=n_states).float()

	# Construire matrice T_ij(n) avec index_add
	# Pour chaque particule: T[s_prev[i], s_curr[i]] += 1
	n = min(len(s_prev), len(s_curr))
	indices = (
	s_prev[:n] * n_states + s_curr[:n]
	) # Index linéaire pour matrice flattenée
	counts = torch.ones(n, device=DEVICE, dtype=torch.float64)

	T = torch.zeros(n_states * n_states, device=DEVICE, dtype=torch.float64)
	T.scatter_add_(0, indices, counts) # Addition vectorisée

	# Reshape en matrice
	T = T.view(n_states, n_states)

	# Normalisation: P_ij = T_ij / phi(i)
	# Utiliser where pour éviter division par zéro
	phi_expanded = phi.unsqueeze(1).expand(n_states, n_states)
	P_n = torch.where(phi_expanded > 0, T / phi_expanded, torch.zeros_like(T))

	return P_n


	# ===================================================================================
	# ÉTAPE 2: BOUCLE PRINCIPALE
	# ===================================================================================
	print(f"📊 Calcul sur {N_LT} timesteps...")

	# Accumulateur: Somme des matrices normalisées
	P_accumulator = torch.zeros((n_states, n_states), dtype=torch.float64, device=DEVICE)

	files_to_process = files[: N_LT + 1]

	for i in tqdm(range(1, len(files_to_process)), desc="Learning"):
	# Lecture des deux fichiers
	with fs.open(files_to_process[i - 1], "rb") as f:
	df_prev = pl.read_csv(f)
	with fs.open(files_to_process[i], "rb") as f:
	df_curr = pl.read_csv(f)

	# Conversion vers indices d'état (numpy CPU puis GPU)
	states_prev_np = states_to_indices(
	df_prev["coordinates:0"], df_prev["coordinates:1"], df_prev["coordinates:2"]
	)
	states_curr_np = states_to_indices(
	df_curr["coordinates:0"], df_curr["coordinates:1"], df_curr["coordinates:2"]
	)

	# Conversion vers GPU
	states_prev = torch.from_numpy(states_prev_np).to(DEVICE)
	states_curr = torch.from_numpy(states_curr_np).to(DEVICE)

	# Calcul de P_n sur GPU
	P_n = compute_P_matrix_gpu(states_prev, states_curr, n_states)

	# Accumulation
	P_accumulator += P_n

	# ===================================================================================
	# ÉTAPE 3: MOYENNE
	# ===================================================================================
	P = P_accumulator / N_LT

	# ===================================================================================
	# VÉRIFICATION ET SAUVEGARDE
	# ===================================================================================
	print(f"\n✅ Terminé!")
	print(f" Shape: {P.shape}")

	row_sums = P.sum(dim=1)
	visited = row_sums > 0
	print(
	f" Somme lignes (visitês): min={row_sums[visited].min():.4f}, max={row_sums[visited].max():.4f}"
	)

	# Sauvegarde
	P_np = P.cpu().numpy()
	np.save(f"/kaggle/working/transition_matrix_NLT_{N_LT}.npy", P_np)
	print(f"💾 Sauvegardé: transition_matrix_NLT_{N_LT}.npy")

	# Analyse rapide
	diag = np.diag(P_np)
	print(f"\n📈 Analyse:")
	print(f" P(rester) moyen: {diag.mean():.4f}")
	print(f" P(rester) std: {diag.std():.4f}")

Xet Storage Details

Size:: 7.1 kB
Xet hash:: 99b4601fd9ba589a2f0707941b5ba9df50838142dca24776ad066cd0fa425bf5

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.