""" FireEcho Quantum Gold - State Vector Simulator High-performance quantum circuit simulator optimized for SM120 (Blackwell). Uses Triton kernels with Thread Block Clusters for cooperative execution. Performance: - 20 qubits: ~2M state vector elements, ~10ms per gate - 25 qubits: ~32M elements, ~150ms per gate - 30 qubits: ~1B elements, requires ~8GB VRAM Theory: State vector simulation maintains the full quantum state |ψ⟩ as a vector of 2^n complex amplitudes. Each gate transforms the state via matrix-vector multiplication. """ import torch import math from typing import Optional, List, Dict, Any, Union from dataclasses import dataclass from .circuit import QuantumCircuit, Gate from . import gates as gate_ops @dataclass class StateVector: """ Quantum state vector representation. Stores the full quantum state as 2^n complex amplitudes where state[i] is the amplitude of basis state |i⟩. The probability of measuring basis state |i⟩ is |state[i]|². """ amplitudes: torch.Tensor num_qubits: int @classmethod def zeros(cls, num_qubits: int, device: str = 'cuda:0') -> 'StateVector': """Create |00...0⟩ state.""" size = 2 ** num_qubits amplitudes = torch.zeros(size, dtype=torch.complex64, device=device) amplitudes[0] = 1.0 + 0j return cls(amplitudes, num_qubits) @classmethod def from_label(cls, label: str, device: str = 'cuda:0') -> 'StateVector': """ Create state from basis state label. Example: StateVector.from_label("101") creates |101⟩ """ num_qubits = len(label) size = 2 ** num_qubits amplitudes = torch.zeros(size, dtype=torch.complex64, device=device) # Convert binary string to index (reversed for qubit ordering) idx = int(label[::-1], 2) amplitudes[idx] = 1.0 + 0j return cls(amplitudes, num_qubits) @classmethod def uniform_superposition(cls, num_qubits: int, device: str = 'cuda:0') -> 'StateVector': """Create uniform superposition (H⊗n |0⟩⊗n).""" size = 2 ** num_qubits amplitudes = torch.full( (size,), 1.0 / math.sqrt(size), dtype=torch.complex64, device=device ) return cls(amplitudes, num_qubits) def probabilities(self) -> torch.Tensor: """Get measurement probabilities for all basis states.""" return (self.amplitudes.abs() ** 2).real def normalize(self) -> 'StateVector': """Normalize the state vector.""" norm = torch.sqrt((self.amplitudes.abs() ** 2).sum()) self.amplitudes = self.amplitudes / norm return self def fidelity(self, other: 'StateVector') -> float: """ Compute fidelity |⟨ψ|φ⟩|² between two states. Fidelity of 1.0 means identical states. """ overlap = torch.sum(self.amplitudes.conj() * other.amplitudes) return (overlap.abs() ** 2).item() def inner_product(self, other: 'StateVector') -> complex: """Compute inner product ⟨ψ|φ⟩.""" return torch.sum(self.amplitudes.conj() * other.amplitudes).item() def copy(self) -> 'StateVector': """Return a copy of this state.""" return StateVector(self.amplitudes.clone(), self.num_qubits) def to_dict(self) -> Dict[str, complex]: """Convert to dictionary of {basis_label: amplitude}.""" result = {} for i in range(2 ** self.num_qubits): amp = self.amplitudes[i].item() if abs(amp) > 1e-10: label = format(i, f'0{self.num_qubits}b')[::-1] result[label] = amp return result def __repr__(self): return f"StateVector(num_qubits={self.num_qubits}, device={self.amplitudes.device})" def __str__(self): """Pretty print the state.""" lines = [f"StateVector ({self.num_qubits} qubits):"] probs = self.probabilities() for i in range(min(16, 2 ** self.num_qubits)): amp = self.amplitudes[i].item() prob = probs[i].item() if prob > 1e-10: label = format(i, f'0{self.num_qubits}b')[::-1] lines.append(f" |{label}⟩: {amp.real:+.4f}{amp.imag:+.4f}i (p={prob:.4f})") if 2 ** self.num_qubits > 16: lines.append(f" ... ({2**self.num_qubits - 16} more states)") return "\n".join(lines) class QuantumSimulator: """ FireEcho Quantum Gold Simulator. High-performance state vector simulator for SM120 (Blackwell) GPUs. Uses Triton kernels with Thread Block Clusters for cooperative execution. Args: device: CUDA device to use (default: 'cuda:0') precision: Floating point precision ('single' or 'double') Example: sim = QuantumSimulator() # Build circuit qc = QuantumCircuit(3) qc.h(0).cx(0, 1).cx(0, 2) # Run simulation state = sim.run(qc) print(state) # GHZ state # Sample measurements counts = sim.sample(qc, shots=1000) """ def __init__(self, device: str = 'cuda:0', precision: str = 'single'): self.device = device self.precision = precision self.dtype = torch.complex64 if precision == 'single' else torch.complex128 # Verify CUDA available if not torch.cuda.is_available(): raise RuntimeError("CUDA not available. FireEcho Quantum requires GPU.") # Handle device string if device == 'cuda': device = 'cuda:0' torch.cuda.set_device(torch.device(device)) # Get device info props = torch.cuda.get_device_properties(0) self.gpu_name = props.name self.sm_version = f"{props.major}.{props.minor}" def run(self, circuit: QuantumCircuit, initial_state: Optional[StateVector] = None) -> StateVector: """ Execute a quantum circuit. Args: circuit: The quantum circuit to execute initial_state: Optional initial state (default: |00...0⟩) Returns: Final state vector after all gates applied """ # Initialize state if initial_state is None: state = StateVector.zeros(circuit.num_qubits, self.device) else: state = initial_state.copy() if state.num_qubits != circuit.num_qubits: raise ValueError( f"Initial state has {state.num_qubits} qubits, " f"but circuit has {circuit.num_qubits}" ) # Apply gates for gate in circuit.gates: self._apply_gate(state, gate) return state def _apply_gate(self, state: StateVector, gate: Gate): """Apply a single gate to the state.""" name = gate.name targets = gate.targets params = gate.params # Single-qubit gates if name == "H": gate_ops.hadamard(state.amplitudes, targets[0]) elif name == "X": gate_ops.pauli_x(state.amplitudes, targets[0]) elif name == "Y": gate_ops.pauli_y(state.amplitudes, targets[0]) elif name == "Z": gate_ops.pauli_z(state.amplitudes, targets[0]) elif name == "S": gate_ops.phase_gate(state.amplitudes, targets[0], math.pi / 2) elif name == "SDG": gate_ops.phase_gate(state.amplitudes, targets[0], -math.pi / 2) elif name == "T": gate_ops.t_gate(state.amplitudes, targets[0]) elif name == "TDG": gate_ops.phase_gate(state.amplitudes, targets[0], -math.pi / 4) elif name == "RX": gate_ops.rotation_x(state.amplitudes, targets[0], params[0]) elif name == "RY": gate_ops.rotation_y(state.amplitudes, targets[0], params[0]) elif name == "RZ": gate_ops.rotation_z(state.amplitudes, targets[0], params[0]) elif name == "P": gate_ops.phase_gate(state.amplitudes, targets[0], params[0]) elif name == "U": # U(θ,φ,λ) = Rz(φ) Ry(θ) Rz(λ) gate_ops.rotation_z(state.amplitudes, targets[0], params[2]) gate_ops.rotation_y(state.amplitudes, targets[0], params[0]) gate_ops.rotation_z(state.amplitudes, targets[0], params[1]) elif name == "I": pass # Identity - do nothing # Two-qubit gates elif name == "CX": gate_ops.cnot(state.amplitudes, targets[0], targets[1]) elif name == "CY": # CY = (I ⊗ S†) CX (I ⊗ S) gate_ops.phase_gate(state.amplitudes, targets[1], math.pi / 2) gate_ops.cnot(state.amplitudes, targets[0], targets[1]) gate_ops.phase_gate(state.amplitudes, targets[1], -math.pi / 2) elif name == "CZ": gate_ops.cz(state.amplitudes, targets[0], targets[1]) elif name == "SWAP": gate_ops.swap(state.amplitudes, targets[0], targets[1]) elif name == "CP": # Controlled phase: apply phase to |11⟩ self._apply_controlled_phase(state, targets[0], targets[1], params[0]) elif name == "CRX": self._apply_controlled_rotation(state, targets[0], targets[1], 'x', params[0]) elif name == "CRY": self._apply_controlled_rotation(state, targets[0], targets[1], 'y', params[0]) elif name == "CRZ": self._apply_controlled_rotation(state, targets[0], targets[1], 'z', params[0]) # Three-qubit gates (decomposed) elif name == "CCX": self._apply_toffoli(state, targets[0], targets[1], targets[2]) elif name == "CSWAP": self._apply_fredkin(state, targets[0], targets[1], targets[2]) # Special gates elif name == "BARRIER": pass # Barrier has no effect on state elif name == "MEASURE": pass # Measurement handled separately else: raise ValueError(f"Unknown gate: {name}") def _apply_controlled_phase(self, state: StateVector, control: int, target: int, phi: float): """Apply controlled phase gate.""" # CP only affects |11⟩ state (both control and target are 1) size = 2 ** state.num_qubits control_mask = 1 << control target_mask = 1 << target phase = complex(math.cos(phi), math.sin(phi)) for i in range(size): if (i & control_mask) and (i & target_mask): state.amplitudes[i] = state.amplitudes[i] * phase def _apply_controlled_rotation(self, state: StateVector, control: int, target: int, axis: str, theta: float): """Apply controlled rotation gate (CRx, CRy, CRz).""" # Decompose into basic gates # CR(θ) = (I ⊗ R(θ/2)) CX (I ⊗ R(-θ/2)) CX (I ⊗ R(θ/2))... simplified version: # For now, use matrix approach for correctness size = 2 ** state.num_qubits control_mask = 1 << control target_stride = 1 << target cos_half = math.cos(theta / 2) sin_half = math.sin(theta / 2) for i in range(size): if (i & control_mask): # Control is |1⟩ # Find pair indices i0 = i & ~(1 << target) # target = 0 i1 = i | (1 << target) # target = 1 if i == i0: # Only process once per pair a0 = state.amplitudes[i0].clone() a1 = state.amplitudes[i1].clone() if axis == 'x': state.amplitudes[i0] = cos_half * a0 - 1j * sin_half * a1 state.amplitudes[i1] = -1j * sin_half * a0 + cos_half * a1 elif axis == 'y': state.amplitudes[i0] = cos_half * a0 - sin_half * a1 state.amplitudes[i1] = sin_half * a0 + cos_half * a1 elif axis == 'z': state.amplitudes[i0] = (cos_half - 1j * sin_half) * a0 state.amplitudes[i1] = (cos_half + 1j * sin_half) * a1 def _apply_toffoli(self, state: StateVector, c1: int, c2: int, target: int): """Apply Toffoli (CCX) gate.""" # Flip target when both controls are |1⟩ size = 2 ** state.num_qubits c1_mask = 1 << c1 c2_mask = 1 << c2 target_mask = 1 << target for i in range(size): if (i & c1_mask) and (i & c2_mask) and not (i & target_mask): j = i | target_mask state.amplitudes[i], state.amplitudes[j] = ( state.amplitudes[j].clone(), state.amplitudes[i].clone() ) def _apply_fredkin(self, state: StateVector, control: int, t1: int, t2: int): """Apply Fredkin (CSWAP) gate.""" # Swap targets when control is |1⟩ size = 2 ** state.num_qubits control_mask = 1 << control t1_mask = 1 << t1 t2_mask = 1 << t2 for i in range(size): # Only swap when control=1 and targets differ (01 or 10) if (i & control_mask): bit_t1 = (i & t1_mask) >> t1 bit_t2 = (i & t2_mask) >> t2 if bit_t1 == 1 and bit_t2 == 0: j = (i ^ t1_mask) ^ t2_mask state.amplitudes[i], state.amplitudes[j] = ( state.amplitudes[j].clone(), state.amplitudes[i].clone() ) def sample(self, circuit: QuantumCircuit, shots: int = 1024, seed: Optional[int] = None) -> Dict[str, int]: """ Run circuit and sample measurement outcomes. Args: circuit: Circuit to execute shots: Number of measurement samples seed: Random seed for reproducibility Returns: Dictionary of {bitstring: count} """ if seed is not None: torch.manual_seed(seed) # Run circuit state = self.run(circuit) # Get probabilities probs = state.probabilities() # Sample indices = torch.multinomial(probs, shots, replacement=True) # Count occurrences counts = {} for idx in indices.tolist(): bitstring = format(idx, f'0{circuit.num_qubits}b')[::-1] counts[bitstring] = counts.get(bitstring, 0) + 1 return counts def expectation(self, circuit: QuantumCircuit, observable: torch.Tensor) -> float: """ Compute expectation value ⟨ψ|O|ψ⟩. Args: circuit: Circuit to prepare state |ψ⟩ observable: Observable matrix O Returns: Expectation value """ state = self.run(circuit) # O|ψ⟩ o_psi = torch.mv(observable.to(state.amplitudes.device), state.amplitudes) # ⟨ψ|O|ψ⟩ expectation = torch.sum(state.amplitudes.conj() * o_psi) return expectation.real.item() def __repr__(self): return f"QuantumSimulator(device={self.device}, gpu={self.gpu_name}, sm={self.sm_version})"