quantum / qlbm /fluid3d_pyvista.py
harishaseebat92's picture
Moved fluid3d_pyvista to qlbm folder '
6e27ad5
import os
import math
import tempfile
import cudaq
import numpy as np
import cupy as cp
from pathlib import Path
import pyvista as pv
from cupy.cuda.memory import MemoryPointer, UnownedMemory
# Enable headless rendering for dev container
if os.environ.get("DISPLAY") is None:
pv.start_xvfb()
# Set Plotly engine for image export
try:
import plotly.io as pio
pio.kaleido.scope.mathjax = None
except (ImportError, AttributeError):
pass
# -----------------------------------------------------------------------------
# Module-level CUDA-Q primitives (registered once to avoid repeated definition)
# -----------------------------------------------------------------------------
NUM_ANC = 3
NDIR = 7
_PREP_OP_REGISTERED = False
def _ensure_prep_operation_registered():
"""Register the fixed preparation operation exactly once."""
global _PREP_OP_REGISTERED
if _PREP_OP_REGISTERED:
return
v = np.pad(np.array([1 / 4, 1 / 4, 0, 1 / 4, 0, 1 / 4, 0], dtype=float),
(0, 2**NUM_ANC - NDIR))
v = np.sqrt(v)
v[0] += 1
v = v / np.linalg.norm(v)
U_prep = 2 * np.outer(v, v) - np.eye(len(v))
# Name must remain "prep_op" because kernels reference it symbolically.
cudaq.register_operation("prep_op", U_prep)
_PREP_OP_REGISTERED = True
_ensure_prep_operation_registered()
@cudaq.kernel
def alloc_kernel(num_qubits_alloc: int):
"""Allocate a register of the requested size."""
cudaq.qvector(num_qubits_alloc)
@cudaq.kernel
def rshift(q: cudaq.qview, n: int):
for i in range(n):
if i == n - 1:
x(q[n - 1 - i])
elif i == n - 2:
x.ctrl(q[n - 1 - (i + 1)], q[n - 1 - i])
else:
x.ctrl(q[0:n - 1 - i], q[n - 1 - i])
@cudaq.kernel
def lshift(q: cudaq.qview, n: int):
for i in range(n):
if i == 0:
x(q[0])
elif i == 1:
x.ctrl(q[0], q[1])
else:
x.ctrl(q[0:i], q[i])
@cudaq.kernel
def d2q5_tstep(q: cudaq.qview, nqx: int, nqy: int, nqz: int, nq_dir: int, dirs_i: list[int]):
qx = q[0:nqx]
qy = q[nqx:nqx + nqy]
qz = q[nqx + nqy:nqx + nqy + nqz]
qdir = q[nqx + nqy + nqz:nqx + nqy + nqz + nq_dir]
i = 2
b_list = dirs_i[i * nq_dir:(i + 1) * nq_dir]
for j in range(nq_dir):
b = b_list[j]
if b == 0:
x(qdir[j])
cudaq.control(lshift, qdir, qx, nqx)
for j in range(nq_dir):
b = b_list[j]
if b == 0:
x(qdir[j])
i = 1
b_list = dirs_i[i * nq_dir:(i + 1) * nq_dir]
for j in range(nq_dir):
b = b_list[j]
if b == 0:
x(qdir[j])
cudaq.control(rshift, qdir, qx, nqx)
for j in range(nq_dir):
b = b_list[j]
if b == 0:
x(qdir[j])
i = 4
b_list = dirs_i[i * nq_dir:(i + 1) * nq_dir]
for j in range(nq_dir):
b = b_list[j]
if b == 0:
x(qdir[j])
cudaq.control(lshift, qdir, qy, nqy)
for j in range(nq_dir):
b = b_list[j]
if b == 0:
x(qdir[j])
i = 3
b_list = dirs_i[i * nq_dir:(i + 1) * nq_dir]
for j in range(nq_dir):
b = b_list[j]
if b == 0:
x(qdir[j])
cudaq.control(rshift, qdir, qy, nqy)
for j in range(nq_dir):
b = b_list[j]
if b == 0:
x(qdir[j])
i = 6
b_list = dirs_i[i * nq_dir:(i + 1) * nq_dir]
for j in range(nq_dir):
b = b_list[j]
if b == 0:
x(qdir[j])
cudaq.control(lshift, qdir, qz, nqz)
for j in range(nq_dir):
b = b_list[j]
if b == 0:
x(qdir[j])
i = 5
b_list = dirs_i[i * nq_dir:(i + 1) * nq_dir]
for j in range(nq_dir):
b = b_list[j]
if b == 0:
x(qdir[j])
cudaq.control(rshift, qdir, qz, nqz)
for j in range(nq_dir):
b = b_list[j]
if b == 0:
x(qdir[j])
@cudaq.kernel
def d2q5_tstep_wrapper(state: cudaq.State,
nqx: int,
nqy: int,
nqz: int,
nq_dir: int,
dirs_i: list[int],
x_coeff_var_indices: list[int],
x_coeffs: list[float],
y_coeff_var_indices: list[int],
y_coeffs: list[float],
z_coeff_var_indices: list[int],
z_coeffs: list[float],
x_coeff_var_indices_: list[int],
x_coeffs_: list[float],
y_coeff_var_indices_: list[int],
y_coeffs_: list[float],
z_coeff_var_indices_: list[int],
z_coeffs_: list[float],
unprep1_coeff_var_indices: list[int],
unprep1_coeffs: list[float],
unprep2_coeff_var_indices: list[int],
unprep2_coeffs: list[float]):
q = cudaq.qvector(state)
qdir = q[nqx + nqy + nqz:nqx + nqy + nqz + nq_dir]
prep_op(qdir[2], qdir[1], qdir[0])
x.ctrl(qdir[0], qdir[1])
ind = 0
coeff_ind = 0
x(qdir[2])
while ind < len(x_coeff_var_indices):
tuple_length = x_coeff_var_indices[ind]
for sub_ind in range(ind + 1, ind + 1 + tuple_length):
x.ctrl(q[nqx + nqy + nqz - 1 - x_coeff_var_indices[sub_ind]], qdir[0])
ry.ctrl(-x_coeffs[coeff_ind], [qdir[2], qdir[1]], qdir[0])
coeff_ind += 1
ind += (1 + tuple_length)
x(qdir[2])
ind = 0
coeff_ind = 0
while ind < len(z_coeff_var_indices):
tuple_length = z_coeff_var_indices[ind]
for sub_ind in range(ind + 1, ind + 1 + tuple_length):
x.ctrl(q[nqx + nqy + nqz - 1 - z_coeff_var_indices[sub_ind]], qdir[0])
ry.ctrl(-z_coeffs[coeff_ind], [qdir[2], qdir[1]], qdir[0])
coeff_ind += 1
ind += (1 + tuple_length)
x.ctrl(qdir[0], qdir[1])
ind = 0
coeff_ind = 0
while ind < len(y_coeff_var_indices):
tuple_length = y_coeff_var_indices[ind]
for sub_ind in range(ind + 1, ind + 1 + tuple_length):
x.ctrl(q[nqx + nqy + nqz - 1 - y_coeff_var_indices[sub_ind]], qdir[2])
ry.ctrl(y_coeffs[coeff_ind], [qdir[0], qdir[1]], qdir[2])
coeff_ind += 1
ind += (1 + tuple_length)
d2q5_tstep(q, nqx, nqy, nqz, nq_dir, dirs_i)
ind = 0
coeff_ind = 0
while ind < len(y_coeff_var_indices_):
tuple_length = y_coeff_var_indices_[ind]
for sub_ind in range(ind + 1, ind + 1 + tuple_length):
x.ctrl(q[nqx + nqy + nqz - 1 - y_coeff_var_indices_[sub_ind]], qdir[2])
ry.ctrl(-y_coeffs_[coeff_ind], [qdir[0], qdir[1]], qdir[2])
coeff_ind += 1
ind += (1 + tuple_length)
x.ctrl(qdir[0], qdir[1])
ind = 0
coeff_ind = 0
x(qdir[2])
while ind < len(x_coeff_var_indices_):
tuple_length = x_coeff_var_indices_[ind]
for sub_ind in range(ind + 1, ind + 1 + tuple_length):
x.ctrl(q[nqx + nqy + nqz - 1 - x_coeff_var_indices_[sub_ind]], qdir[0])
ry.ctrl(x_coeffs_[coeff_ind], [qdir[1], qdir[2]], qdir[0])
coeff_ind += 1
ind += (1 + tuple_length)
x(qdir[2])
ind = 0
coeff_ind = 0
while ind < len(z_coeff_var_indices_):
tuple_length = z_coeff_var_indices_[ind]
for sub_ind in range(ind + 1, ind + 1 + tuple_length):
x.ctrl(q[nqx + nqy + nqz - 1 - z_coeff_var_indices_[sub_ind]], qdir[0])
ry.ctrl(z_coeffs_[coeff_ind], [qdir[1], qdir[2]], qdir[0])
coeff_ind += 1
ind += (1 + tuple_length)
x.ctrl(qdir[0], qdir[1])
ind = 0
coeff_ind = 0
x.ctrl(qdir[1], qdir[2])
while ind < len(unprep2_coeff_var_indices):
tuple_length = unprep2_coeff_var_indices[ind]
for sub_ind in range(ind + 1, ind + 1 + tuple_length):
x.ctrl(q[nqx + nqy + nqz - 1 - unprep2_coeff_var_indices[sub_ind]], qdir[1])
ry.ctrl(unprep2_coeffs[coeff_ind], qdir[2], qdir[1])
coeff_ind += 1
ind += (1 + tuple_length)
x.ctrl(qdir[1], qdir[2])
ind = 0
coeff_ind = 0
while ind < len(unprep1_coeff_var_indices):
tuple_length = unprep1_coeff_var_indices[ind]
for sub_ind in range(ind + 1, ind + 1 + tuple_length):
x.ctrl(q[nqx + nqy + nqz - 1 - unprep1_coeff_var_indices[sub_ind]], qdir[1])
ry.ctrl(-unprep1_coeffs[coeff_ind], qdir[0], qdir[1])
coeff_ind += 1
ind += (1 + tuple_length)
ry(-2 * np.pi / 3, qdir[0])
# -----------------------------------------------------------------------------
# Helper Functions
# -----------------------------------------------------------------------------
def bin_to_gray(bin_s):
XOR=lambda x,y: (x or y) and not (x and y)
gray_s=bin_s[0]
for i in range(len(bin_s)-1):
c_bool=XOR(bool(int(bin_s[i])),bool(int(bin_s[i+1])))
gray_s+=str(int(c_bool))
return gray_s
def gray_to_bin(gray_s):
XOR=lambda x,y: (x or y) and not (x and y)
bin_s=gray_s[0]
for i in range(len(gray_s)-1):
c_bool=XOR(bool(int(bin_s[i])),bool(int(gray_s[i+1])))
bin_s+=str(int(c_bool))
return bin_s
def bin_to_int(bin_s):
return int(bin_s,2)
def int_to_bin(i,pad):
return bin(i)[2:].zfill(pad)
def fwht_approx(f,N,num_points_per_dim,threshold=1e-10):
linear_block_size=int(N//num_points_per_dim)
num_angles_per_block=int(np.log2(linear_block_size))
thetas={}
for k in range(num_points_per_dim):
for j in range(num_points_per_dim):
for i in range(num_points_per_dim):
avg_f=2*np.arccos(f(i*linear_block_size+(linear_block_size-1)/2,j*linear_block_size+(linear_block_size-1)/2,k*linear_block_size+(linear_block_size-1)/2))
thetas[k*(N**2)*linear_block_size+j*N*linear_block_size+i*linear_block_size]=avg_f
slope_x=(2*np.arccos(f(i*linear_block_size,j*linear_block_size+(linear_block_size-1)/2,k*linear_block_size+(linear_block_size-1)/2))-2*np.arccos(f(((i+1)%N)*linear_block_size,j*linear_block_size+(linear_block_size-1)/2,k*linear_block_size+(linear_block_size-1)/2)))/linear_block_size
slope_y=(2*np.arccos(f(i*linear_block_size+(linear_block_size-1)/2,j*linear_block_size,k*linear_block_size+(linear_block_size-1)/2))-2*np.arccos(f(i*linear_block_size+(linear_block_size-1)/2,((j+1)%N)*linear_block_size,k*linear_block_size+(linear_block_size-1)/2)))/linear_block_size
slope_z=(2*np.arccos(f(i*linear_block_size+(linear_block_size-1)/2,j*linear_block_size+(linear_block_size-1)/2,k*linear_block_size))-2*np.arccos(f(i*linear_block_size+(linear_block_size-1)/2,j*linear_block_size+(linear_block_size-1)/2,((k+1)%N)*linear_block_size)))/linear_block_size
for m in range(num_angles_per_block):
thetas[k*(N**2)*linear_block_size+j*N*linear_block_size+i*linear_block_size + 2**m]=slope_x*(2**(m-1))
thetas[k*(N**2)*linear_block_size+j*N*linear_block_size+i*linear_block_size + N*(2**m)]=slope_y*(2**(m-1))
thetas[k*(N**2)*linear_block_size+j*N*linear_block_size+i*linear_block_size + (N**2)*(2**m)]=slope_z*(2**(m-1))
h = linear_block_size
while h < N**3:
for i in range(0, N**3, h * 2):
if (i//N)%linear_block_size!=0:
continue
if (i//(N**2))%linear_block_size!=0:
continue
j=i
while j<i+h:
index=j
x = thetas[index]
y = thetas[index + h]
thetas[index] = (x + y)/2
thetas[index + h] = (x - y)/2
for ax in range(3):
for m in range(num_angles_per_block):
index = j + (N**ax) * (2**m)
x = thetas[index]
y = thetas[index + h]
thetas[index] = (x + y)/2
thetas[index + h] = (x - y)/2
j+=linear_block_size
if (j//N)%linear_block_size==1:
j+=(linear_block_size-1)*N
if (j//(N**2))%linear_block_size==1:
j+=(linear_block_size-1)*(N**2)
h *= 2
if h==N:
h=N*linear_block_size
if h==N**2:
h=(N**2)*linear_block_size
return [theta for theta in thetas.values() if abs(theta)>threshold],[key for key in thetas.keys() if abs(thetas[key])>threshold]
def get_circuit_inputs(f,num_reg_qubits,num_points_per_dim):
theta_vec,indices=fwht_approx(f,2**num_reg_qubits,num_points_per_dim)
circ_pos=[]
for ind in indices:
circ_pos+=[bin_to_int(gray_to_bin(int_to_bin(ind,num_reg_qubits*3)))]
sorted_theta_vec=sorted(zip(theta_vec,circ_pos),key=lambda el:el[1])
ctrls=[]
current_bs="0"*(3*num_reg_qubits)
for el in sorted_theta_vec:
new_bs=bin_to_gray(int_to_bin((el[1])%(2**(3*num_reg_qubits)),(3*num_reg_qubits)))
ctrls += [[i for i, (char1, char2) in enumerate(zip(current_bs, new_bs)) if char1 != char2]]
current_bs=new_bs
new_bs="0"*(3*num_reg_qubits)
ctrls += [[i for i, (char1, char2) in enumerate(zip(current_bs, new_bs)) if char1 != char2]]
ctrls_flat_list=[]
for ctrl_list in ctrls:
ctrls_flat_list+=[len(ctrl_list)]+ctrl_list
return [el[0] for el in sorted_theta_vec]+[0.0],ctrls_flat_list
def to_cupy_array(state):
tensor = state.getTensor()
pDevice = tensor.data()
sizeByte = tensor.get_num_elements() * tensor.get_element_size()
mem = UnownedMemory(pDevice, sizeByte, owner=state)
memptr_obj = MemoryPointer(mem, 0)
cupy_array_val = cp.ndarray(tensor.get_num_elements(),
dtype=cp.complex128,
memptr=memptr_obj)
return cupy_array_val
# -----------------------------------------------------------------------------
# Main Simulation Class
# -----------------------------------------------------------------------------
class QLBMAdvecDiffD3Q7_new:
def __init__(self, vx, vy, vz, current_N, num_reg_qubits, num_ranks, num_anc, rank, N_sub_per_rank, N_tot_state_vector, intermediate_folder_path, downsampling_factor) -> None:
self.vx_func = vx
self.vy_func = vy
self.vz_func = vz
self.current_N = current_N
self.num_reg_qubits = num_reg_qubits
self.num_ranks = num_ranks
self.num_anc = num_anc
self.rank = rank
self.N_sub_per_rank = N_sub_per_rank
self.N_tot_state_vector = N_tot_state_vector
self.intermediate_folder_path = intermediate_folder_path
self.downsampling_factor = downsampling_factor
self.dim = 3
self.ndir = 7
self.nq_dir = math.ceil(np.log2(self.ndir))
self.dirs=[]
for dir_int in range(self.ndir):
if dir_int==4:
dir_bin="111"
else:
dir_bin = f"{dir_int:b}".zfill(self.nq_dir)
self.dirs.append(dir_bin)
self.cs = 1/np.sqrt(3)
self.ux = lambda x,y,z: self.vx_func(x,y,z)/self.cs**2
self.uy = lambda x,y,z: self.vy_func(x,y,z)/self.cs**2
self.uz = lambda x,y,z: self.vz_func(x,y,z)/self.cs**2
self.create_circuit()
def create_circuit(self):
print("Creating circuit")
current_N = self.current_N
num_reg_qubits = self.num_reg_qubits
x_coeffs,x_coeff_var_indices=get_circuit_inputs(lambda x,y,z: ((1+self.ux(x/current_N,y/current_N,z/current_N))/2)**0.5,num_reg_qubits,min(current_N,32))
y_coeffs,y_coeff_var_indices=get_circuit_inputs(lambda x,y,z: ((1+self.uy(x/current_N,y/current_N,z/current_N))/2)**0.5,num_reg_qubits,min(current_N,32))
z_coeffs,z_coeff_var_indices=get_circuit_inputs(lambda x,y,z: ((1+self.uz(x/current_N,y/current_N,z/current_N))/2)**0.5,num_reg_qubits,min(current_N,32))
x_coeffs_,x_coeff_var_indices_=get_circuit_inputs(lambda x,y,z: 0 if (1+self.ux((x-1)/current_N,y/current_N,z/current_N))==0 else \
((1+self.ux((x-1)/current_N,y/current_N,z/current_N))/(2+self.ux((x-1)/current_N,y/current_N,z/current_N)-self.ux((x+1)/current_N,y/current_N,z/current_N)))**0.5,num_reg_qubits,min(current_N,32))
y_coeffs_,y_coeff_var_indices_=get_circuit_inputs(lambda x,y,z: 0 if (1+self.uy(x/current_N,(y-1)/current_N,z/current_N))==0 else \
((1+self.uy(x/current_N,(y-1)/current_N,z/current_N))/(2+self.uy(x/current_N,(y-1)/current_N,z/current_N)-self.uy(x/current_N,(y+1)/current_N,z/current_N)))**0.5,num_reg_qubits,min(current_N,32))
z_coeffs_,z_coeff_var_indices_=get_circuit_inputs(lambda x,y,z: 0 if (1+self.uz(x/current_N,y/current_N,(z-1)/current_N))==0 else \
((1+self.uz(x/current_N,y/current_N,(z-1)/current_N))/(2+self.uz(x/current_N,y/current_N,(z-1)/current_N)-self.uz(x/current_N,y/current_N,(z+1)/current_N)))**0.5,num_reg_qubits,min(current_N,32))
unprep1_coeffs,unprep1_coeff_var_indices=get_circuit_inputs(lambda x,y,z:\
(1/3**0.5)*(1+(self.ux((x-1)/current_N,y/current_N,z/current_N)-self.ux((x+1)/current_N,y/current_N,z/current_N))/2)**0.5,num_reg_qubits,min(current_N,32))
unprep2_coeffs, unprep2_coeff_var_indices = get_circuit_inputs(lambda x, y, z: ((1 + (self.uy(x/current_N, (y-1)/current_N, z/current_N) - self.uy(x/current_N, (y+1)/current_N, z/current_N))/2) /(2 - (self.ux((x-1)/current_N, y/current_N, z/current_N) - self.ux((x+1)/current_N, y/current_N, z/current_N))/2))**0.5, num_reg_qubits, min(current_N, 32))
print("Generated angles")
def collisionOp(dirs):
dirs_i_list=[]
for dir_ in dirs:
dirs_i=[(int(c)) for c in dir_]
dirs_i_list+=dirs_i[::-1]
return dirs_i_list
self.dirs_i_list=collisionOp(self.dirs)
print("Generated dirs_i_list")
# Store coefficients for run_timestep
self.x_coeffs = x_coeffs
self.x_coeff_var_indices = x_coeff_var_indices
self.y_coeffs = y_coeffs
self.y_coeff_var_indices = y_coeff_var_indices
self.z_coeffs = z_coeffs
self.z_coeff_var_indices = z_coeff_var_indices
self.x_coeffs_ = x_coeffs_
self.x_coeff_var_indices_ = x_coeff_var_indices_
self.y_coeffs_ = y_coeffs_
self.y_coeff_var_indices_ = y_coeff_var_indices_
self.z_coeffs_ = z_coeffs_
self.z_coeff_var_indices_ = z_coeff_var_indices_
self.unprep1_coeffs = unprep1_coeffs
self.unprep1_coeff_var_indices = unprep1_coeff_var_indices
self.unprep2_coeffs = unprep2_coeffs
self.unprep2_coeff_var_indices = unprep2_coeff_var_indices
print("Circuit created")
def run_timestep(self, vec_arg):
result=cudaq.get_state(d2q5_tstep_wrapper,vec_arg,self.num_reg_qubits,self.num_reg_qubits,self.num_reg_qubits,self.nq_dir,self.dirs_i_list,\
self.x_coeff_var_indices,self.x_coeffs,\
self.y_coeff_var_indices,self.y_coeffs,\
self.z_coeff_var_indices,self.z_coeffs,\
self.x_coeff_var_indices_,self.x_coeffs_,\
self.y_coeff_var_indices_,self.y_coeffs_,\
self.z_coeff_var_indices_,self.z_coeffs_,\
self.unprep1_coeff_var_indices,self.unprep1_coeffs,\
self.unprep2_coeff_var_indices,self.unprep2_coeffs)
num_nonzero_ranks = self.num_ranks / (2**self.num_anc)
rank_slice_cupy = to_cupy_array(result)
if self.rank >= num_nonzero_ranks and num_nonzero_ranks > 0:
sub_sv_zeros = np.zeros(self.N_sub_per_rank, dtype=np.complex128)
cp.cuda.runtime.memcpy(rank_slice_cupy.data.ptr, sub_sv_zeros.ctypes.data, sub_sv_zeros.nbytes, cp.cuda.runtime.memcpyHostToDevice)
if self.rank == 0 and num_nonzero_ranks < 1 and self.N_sub_per_rank > 0:
limit_idx = int(self.N_tot_state_vector / (2**self.num_anc))
if limit_idx < rank_slice_cupy.size:
rank_slice_cupy[limit_idx:] = 0
return result
def write_state(self, state_to_write, t_step_str_val):
rank_slice_cupy = to_cupy_array(state_to_write)
num_nonzero_ranks = self.num_ranks / (2**self.num_anc)
if self.rank < num_nonzero_ranks or (self.rank == 0 and num_nonzero_ranks <= 0):
save_path = self.intermediate_folder_path / f"{t_step_str_val}_{self.rank}.npy"
with open(save_path, 'wb') as f:
arr_to_save = None
data_limit = self.N_sub_per_rank
if num_nonzero_ranks < 1 and self.rank == 0:
data_limit = int(self.N_tot_state_vector / (2**self.num_anc))
if data_limit > 0:
relevant_part_cupy = cp.real(rank_slice_cupy[:data_limit])
else:
relevant_part_cupy = cp.array([], dtype=cp.float64)
if relevant_part_cupy.size >= self.current_N * self.current_N * self.current_N:
arr_flat = relevant_part_cupy[:self.current_N * self.current_N * self.current_N]
if self.downsampling_factor > 1 and self.current_N > 0:
arr_reshaped = arr_flat.reshape((self.current_N, self.current_N, self.current_N))
arr_downsampled = arr_reshaped[::self.downsampling_factor, ::self.downsampling_factor, ::self.downsampling_factor]
arr_to_save = arr_downsampled.flatten()
else:
arr_to_save = arr_flat
elif relevant_part_cupy.size > 0:
if self.downsampling_factor > 1:
arr_to_save = relevant_part_cupy[::self.downsampling_factor]
else:
arr_to_save = relevant_part_cupy
if arr_to_save is not None and arr_to_save.size > 0:
np.save(f, arr_to_save.get() if isinstance(arr_to_save, cp.ndarray) else arr_to_save)
def run_evolution(self, initial_state_arg, total_timesteps, time_steps_to_save, observable=False, progress_callback=None):
current_state_val = initial_state_arg
save_times = set(time_steps_to_save)
if 0 in save_times:
print("Writing first state")
self.write_state(current_state_val, '0')
for t_iter in range(total_timesteps):
# print("Running timestep")
next_state_val = self.run_timestep(current_state_val)
if (t_iter + 1) in save_times:
print("Writing next state")
self.write_state(next_state_val, str(t_iter + 1))
cp.get_default_memory_pool().free_all_blocks()
current_state_val = next_state_val
if progress_callback:
percent = int(((t_iter + 1) / total_timesteps) * 100)
progress_callback(percent)
if self.rank == 0:
print(f"Timestep: {total_timesteps}/{total_timesteps} (Evolution complete)")
cp.get_default_memory_pool().free_all_blocks()
self.final_state = current_state_val
# -----------------------------------------------------------------------------
# Main Function
# -----------------------------------------------------------------------------
def simulate_qlbm_3D_and_animate(num_reg_qubits: int, T: int, distribution_type: str, vx_input, vy_input, vz_input, boundary_condition: str, plotter=None, add_slider=True, progress_callback=None):
num_anc = NUM_ANC
num_qubits_total = 3 * num_reg_qubits + num_anc
current_N = 2**num_reg_qubits
N_tot_state_vector = 2**num_qubits_total
num_ranks = 1
rank = 0
N_sub_per_rank = int(N_tot_state_vector // num_ranks)
# Simplified time steps for 3D since slider steps are removed
NUM_ANIMATION_FRAMES_3D = 10 # Default number of frames if no specific slider steps
if T == 0:
time_steps = [0]
else:
num_points = min(T + 1, NUM_ANIMATION_FRAMES_3D)
time_steps = np.linspace(start=0, stop=T, num=num_points, dtype=int)
time_steps = sorted(list(set(time_steps)))
if distribution_type == "Sinusoidal":
selected_initial_state_function_raw = lambda x, y, z, N_val_func: \
np.sin(x * 2 * np.pi / N_val_func) * \
np.sin(y * 2 * np.pi / N_val_func) * \
np.sin(z * 2 * np.pi / N_val_func) + 1
elif distribution_type == "Gaussian":
selected_initial_state_function_raw = lambda x, y, z, N_val_func: \
np.exp(-((x - N_val_func / 2)**2 / (2 * (N_val_func / 5)**2) + (y - N_val_func / 2)**2 / (2 * (N_val_func / 5)**2) +
(z - N_val_func / 2)**2 / (2 * (N_val_func / 5)**2))) * 1.8 + 0.2
else:
print(f"Warning: Unknown distribution type '{distribution_type}'. Defaulting to Sinusoidal.")
selected_initial_state_function_raw = lambda x, y, z, N_val_func: \
np.sin(x * 2 * np.pi / N_val_func) * \
np.sin(y * 2 * np.pi / N_val_func) * \
np.sin(z * 2 * np.pi / N_val_func) + 1
initial_state_func_eval = lambda i:\
selected_initial_state_function_raw(i%current_N,(i//current_N)%current_N,i//(current_N**2),current_N)*(i<(current_N**3)).astype(int)
with tempfile.TemporaryDirectory() as tmp_npy_dir:
intermediate_folder_path = Path(tmp_npy_dir)
cudaq.set_target('nvidia', option='fp64')
downsampling_factor = 1
if current_N == 0:
print("Error: current_N is zero. num_reg_qubits likely too small.")
return None, None, None, None # Modified return
if current_N < downsampling_factor:
downsampling_factor = current_N if current_N > 0 else 1
qlbm_obj = QLBMAdvecDiffD3Q7_new(
vx=vx_input, vy=vy_input, vz=vz_input,
current_N=current_N, num_reg_qubits=num_reg_qubits,
num_ranks=num_ranks, num_anc=num_anc, rank=rank,
N_sub_per_rank=N_sub_per_rank, N_tot_state_vector=N_tot_state_vector,
intermediate_folder_path=intermediate_folder_path,
downsampling_factor=downsampling_factor
)
initial_state_val = cudaq.get_state(alloc_kernel, num_qubits_total)
sub_sv_init_flat = initial_state_func_eval(np.arange(N_sub_per_rank)).astype(np.complex128)
norm = np.linalg.norm(sub_sv_init_flat)
if norm > 0:
sub_sv_init_flat /= norm
else:
print("Error: Initial state norm is zero.")
return None, None, None, None # Modified return
full_initial_sv_host = np.zeros(N_sub_per_rank, dtype=np.complex128)
num_computational_states = current_N ** 3
if len(sub_sv_init_flat) == num_computational_states:
if num_computational_states <= N_sub_per_rank:
full_initial_sv_host[:num_computational_states] = sub_sv_init_flat
else:
print(f"Error: Grid data {num_computational_states} > N_sub_per_rank {N_sub_per_rank}")
return None, None, None, None # Modified return
else:
print(f"Warning: Initial state size {len(sub_sv_init_flat)} != expected {num_computational_states}")
fill_len = min(len(sub_sv_init_flat), num_computational_states, N_sub_per_rank)
full_initial_sv_host[:fill_len] = sub_sv_init_flat[:fill_len]
rank_slice_init = to_cupy_array(initial_state_val)
print(f'Rank {rank}: Initializing state with {distribution_type} (vx={vx_input}, vy={vy_input})...')
cp.cuda.runtime.memcpy(rank_slice_init.data.ptr, full_initial_sv_host.ctypes.data, full_initial_sv_host.nbytes, cp.cuda.runtime.memcpyHostToDevice)
print(f'Rank {rank}: Initial state copied. Size: {len(sub_sv_init_flat)}. N_sub_per_rank: {N_sub_per_rank}')
print("Starting QLBM evolution...")
qlbm_obj.run_evolution(initial_state_val, T, time_steps, progress_callback=progress_callback)
print("QLBM evolution complete.")
downsampled_N = current_N // downsampling_factor
if downsampled_N == 0 and current_N > 0:
downsampled_N = 1
elif current_N == 0:
print("Error: current_N is zero before Plotly stage.")
return None, None, None, None # Modified return
data_frames = []
actual_timesteps = []
total_frames = len(time_steps) if time_steps else 0
for idx, t in enumerate(time_steps):
file_path = intermediate_folder_path / f"{t}_{rank}.npy"
if file_path.exists():
sol_loaded = np.load(file_path)
if sol_loaded.size == downsampled_N * downsampled_N* downsampled_N:
data = np.reshape(sol_loaded, (downsampled_N, downsampled_N, downsampled_N))
data_frames.append(data)
actual_timesteps.append(t)
if total_frames:
progress = int(((idx + 1) / total_frames) * 100)
print(f"Loading frames: {progress}% complete (t = {t})")
else:
print(f"Warning: File {file_path} size {sol_loaded.size} != expected {downsampled_N*downsampled_N*downsampled_N}. Skipping.")
else:
print(f"Warning: File {file_path} not found. Skipping.")
if not data_frames:
print("Error: No data frames loaded for plotting.")
return None, None, None, None # Modified return
print("Generating interactive plot with PyVista...")
# Create PyVista plotter
if plotter is None:
plotter = pv.Plotter()
else:
plotter.clear()
# Create a structured grid for the volume
x_coords_plot = np.linspace(0, 1, downsampled_N)
y_coords_plot = np.linspace(0, 1, downsampled_N)
z_coords_plot = np.linspace(0, 1, downsampled_N)
X_grid_mesh, Y_grid_mesh, Z_grid_mesh = np.meshgrid(x_coords_plot, y_coords_plot, z_coords_plot, indexing='ij')
grid = pv.StructuredGrid()
grid.points = np.c_[X_grid_mesh.ravel(), Y_grid_mesh.ravel(), Z_grid_mesh.ravel()]
grid.dimensions = [downsampled_N, downsampled_N, downsampled_N]
# Add the first frame data
grid["scalars"] = data_frames[0].flatten()
# Create isosurfaces
isosurfaces = grid.contour(isosurfaces=10, scalars="scalars")
# Add mesh to plotter
actor = plotter.add_mesh(isosurfaces, cmap="viridis", opacity=0.5, show_scalar_bar=True)
plotter.add_axes()
plotter.show_grid()
# Add slider widget for time steps
class TimeSliderCallback:
def __init__(self, plotter, grid, data_frames, actual_timesteps, actor):
self.plotter = plotter
self.grid = grid
self.data_frames = data_frames
self.actual_timesteps = actual_timesteps
self.actor = actor
def __call__(self, value):
idx = int(round(value))
if 0 <= idx < len(self.data_frames):
self.grid["scalars"] = self.data_frames[idx].flatten()
new_iso = self.grid.contour(isosurfaces=10, scalars="scalars")
self.plotter.remove_actor(self.actor)
self.actor = self.plotter.add_mesh(new_iso, cmap="viridis", opacity=0.5, show_scalar_bar=False)
self.plotter.add_text(f"Time: {self.actual_timesteps[idx]}", name="time_label", position="upper_right")
callback = TimeSliderCallback(plotter, grid, data_frames, actual_timesteps, actor)
if add_slider:
plotter.add_slider_widget(
callback,
[0, len(data_frames) - 1],
value=0,
title="Time Step",
pointa=(0.4, 0.9),
pointb=(0.9, 0.9),
style='modern',
)
plotter.add_text(f"Time: {actual_timesteps[0]}", name="time_label", position="upper_right")
# Return plotter, data_frames, actual_timesteps, and grid for external time slider usage
return plotter, data_frames, actual_timesteps, grid