File size: 19,872 Bytes
4eff328 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 | # Copyright (c) 2026 Salvatore Pennacchio <jtatopenn@libero.it>
# Distributed under the Business Source License 1.1 (BSL 1.1)
# See LICENSE.md in the project root for full license terms.
import re
from typing import List, Dict, Optional, Tuple
from dataclasses import dataclass, field
import numpy as np
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# Data model
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
@dataclass
class QASMCircuit:
"""
Parsed representation of an OpenQASM 2.0 / 3.0 circuit.
Attributes
----------
n_qubits : total qubit count declared in qreg / qubit statements
n_cbits : total classical bit count declared in creg / bit statements
ops : list of gate dicts β each dict has keys:
'type' : 'gate'
'name' : lowercase gate name (aliases resolved)
'qubits' : list[int] β absolute qubit indices
'params' : list[float] β evaluated rotation angles
"""
n_qubits: int = 0
n_cbits: int = 0
ops: List[Dict] = field(default_factory=list)
def to_tuples(self) -> List[Tuple]:
"""
Convert ops to the tuple format expected by DenseSVSimulator.run_circuit:
(name, qubit0[, qubit1, ...][, param0, ...])
BUG FIX (original): the original returned
(name,) + tuple(qubits) + tuple(params)
which placed params *after* qubits, but run_circuit expects
params interleaved or trailing depending on gate type.
For the standard (name, qubit, param) convention used throughout
the simulator, this ordering is correct β preserved here but
documented explicitly so callers know what to expect.
"""
out = []
for op in self.ops:
row = (op['name'],) + tuple(op['qubits']) + tuple(op['params'])
out.append(row)
return out
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# Parser
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
class QASMParser:
"""
Robust OpenQASM 2.0 / 3.0 parser.
Supported features
------------------
- qreg / creg (QASM 2.0)
- qubit / bit (QASM 3.0)
- Parametric gates: rx, ry, rz, p, u1, u2, u3, cp, crz, ...
- Compound parameter expressions: pi/2, sqrt(2), cos(0.3), ...
- Block comments /* ... */ and line comments // ...
- Gate aliases: cu1βcp, u1βp, toffoliβccx, cnotβcx, ...
- Range syntax q[0:3] expanded to individual qubits
- Bare register name (no index) resolved to qubit 0 of that register
- Silent fallback (0.0) for unparseable parameter expressions
"""
# ββ compiled regexes ββββββββββββββββββββββββββββββββββββββββββββ
_RE_BLOCK_CMT = re.compile(r'/\*.*?\*/', re.DOTALL)
_RE_LINE_CMT = re.compile(r'//[^\n]*')
_RE_INDEX = re.compile(r'\[(\d+)\]')
_RE_RANGE = re.compile(r'^([a-zA-Z_]\w*)\[(\d+):(\d+)\]$') # q[0:3]
_RE_QREG2 = re.compile(r'^qreg\s+([a-zA-Z_]\w*)\s*\[(\d+)\]')
_RE_CREG2 = re.compile(r'^creg\s+([a-zA-Z_]\w*)\s*\[(\d+)\]')
_RE_QREG3 = re.compile(r'^qubit(?:\s*\[(\d+)\])?\s+([a-zA-Z_]\w*)')
_RE_CREG3 = re.compile(r'^bit(?:\s*\[(\d+)\])?\s+([a-zA-Z_]\w*)')
_RE_GATE_HEAD = re.compile(r'^([a-zA-Z_]\w*)(?:\((.*)\))?$')
# ββ gate name aliases ββββββββββββββββββββββββββββββββββββββββββββ
_ALIAS: Dict[str, str] = {
'cu1': 'cp',
'u1': 'p',
'toffoli': 'ccx',
'fredkin': 'cswap',
'cnot': 'cx',
'not': 'x',
'id': 'i',
'cx': 'cx', # explicit identity mappings for safety
'cz': 'cz',
'ccx': 'ccx',
}
# ββ statements to skip entirely ββββββββββββββββββββββββββββββββββ
# BUG FIX (original): 'gate ' had a trailing space making it miss
# 'gate foo(...)' where the token is 'gate' followed by space.
# Using startswith on lowercased tokens is correct but the original
# also skipped 'def ' and 'for ' which are QASM 3.0 keywords β
# kept here for forward compatibility.
_SKIP = frozenset((
'openqasm', 'include', 'barrier', 'measure',
'reset', 'gate', 'def', 'if', 'for', 'while',
))
# ββ safe math environment for eval() ββββββββββββββββββββββββββββ
_MATH_ENV: Dict = {
'__builtins__': {},
'pi': np.pi,
'tau': 2.0 * np.pi,
'euler': np.e,
'np': np,
'sin': np.sin, 'cos': np.cos, 'tan': np.tan,
'sqrt': np.sqrt, 'exp': np.exp, 'log': np.log,
'asin': np.arcsin,'acos': np.arccos,'atan': np.arctan,
'arcsin': np.arcsin,'arccos': np.arccos,'arctan': np.arctan,
'abs': abs, 'round': round,
}
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# Public interface
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def parse(self, qasm_str: str) -> QASMCircuit:
"""
Parse an OpenQASM 2.0 or 3.0 string into a QASMCircuit.
BUG FIX 1 (original): the original joined all lines with a single
space then split on ';'. Multi-line gate definitions (gate foo ...)
were not stripped before joining, causing 'gate foo ...' to appear
as a runnable instruction. Fixed by stripping comments *before*
joining and by using the frozenset _SKIP check on the first token.
BUG FIX 2 (original): bare register names (e.g. 'h q' instead of
'h q[0]') were silently dropped if the register had more than one
qubit, because qubit_map only stored 'name[0]' β 0 for size-1
registers. Fixed: bare names always map to qubit 0 of that register
regardless of register size.
BUG FIX 3 (original): range syntax q[0:3] was never handled β
such tokens fell through to the digit-extraction fallback which
returned only the last digit. Fixed in _resolve_qubits.
"""
qubit_map: Dict[str, int] = {}
cbit_map: Dict[str, int] = {}
n_qubits = 0
n_cbits = 0
ops: List[Dict] = []
# ββ strip comments βββββββββββββββββββββββββββββββββββββββββββ
cleaned = self._RE_BLOCK_CMT.sub(' ', qasm_str)
cleaned = self._RE_LINE_CMT.sub(' ', cleaned)
# ββ split into statements βββββββββββββββββββββββββββββββββββββ
statements = [s.strip() for s in cleaned.split(';') if s.strip()]
for instr in statements:
# collapse internal whitespace runs to a single space
instr = re.sub(r'\s+', ' ', instr).strip()
if not instr:
continue
# first token (before any space or '(') for keyword detection
first_token = re.split(r'[\s(]', instr)[0].lower()
if first_token in self._SKIP:
continue
# ββ qreg (QASM 2.0) βββββββββββββββββββββββββββββββββββββ
m = self._RE_QREG2.match(instr)
if m:
reg_name, sz = m.group(1), int(m.group(2))
for i in range(sz):
qubit_map[f'{reg_name}[{i}]'] = n_qubits + i
qubit_map[reg_name] = n_qubits # bare name β first qubit
n_qubits += sz
continue
# ββ creg (QASM 2.0) βββββββββββββββββββββββββββββββββββββ
m = self._RE_CREG2.match(instr)
if m:
reg_name, sz = m.group(1), int(m.group(2))
for i in range(sz):
cbit_map[f'{reg_name}[{i}]'] = n_cbits + i
cbit_map[reg_name] = n_cbits
n_cbits += sz
continue
# ββ qubit (QASM 3.0) βββββββββββββββββββββββββββββββββββββ
m = self._RE_QREG3.match(instr)
if m:
sz_s, reg_name = m.group(1), m.group(2)
sz = int(sz_s) if sz_s else 1
for i in range(sz):
qubit_map[f'{reg_name}[{i}]'] = n_qubits + i
qubit_map[reg_name] = n_qubits
n_qubits += sz
continue
# ββ bit (QASM 3.0) βββββββββββββββββββββββββββββββββββββββ
m = self._RE_CREG3.match(instr)
if m:
sz_s, reg_name = m.group(1), m.group(2)
sz = int(sz_s) if sz_s else 1
for i in range(sz):
cbit_map[f'{reg_name}[{i}]'] = n_cbits + i
cbit_map[reg_name] = n_cbits
n_cbits += sz
continue
# ββ gate application βββββββββββββββββββββββββββββββββββββ
op = self._parse_gate(instr, qubit_map)
if op is not None:
ops.append(op)
# update n_qubits from seen qubit indices
# (handles circuits without explicit qreg declarations)
if op['qubits']:
n_qubits = max(n_qubits, max(op['qubits']) + 1)
return QASMCircuit(n_qubits, n_cbits, ops)
def validate(self, circ: QASMCircuit) -> Tuple[bool, str]:
"""Light structural validation β does not verify gate semantics."""
if circ.n_qubits <= 0:
return False, 'n_qubits must be > 0.'
if not circ.ops:
return False, 'No gate operations found in circuit.'
# check for out-of-range qubit references
for i, op in enumerate(circ.ops):
for q in op.get('qubits', []):
if not (0 <= q < circ.n_qubits):
return False, (
f"Gate '{op['name']}' at op[{i}] references "
f"qubit {q} but n_qubits={circ.n_qubits}.")
return True, 'OK'
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# Private helpers
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def _parse_gate(self,
instr: str,
qubit_map: Dict[str, int]) -> Optional[Dict]:
"""
Parse a single gate instruction into an op dict.
BUG FIX 4 (original): the original code had two independent
code paths for extracting param_str β one using _RE_GATE_HEAD
and one rescanning for '(' β that could disagree, leaving
param_str as the group(2) of an earlier (shorter) match while
paren_start/paren_end referred to a different range. Unified
into a single pass that:
1. finds the parameter parentheses (balanced),
2. extracts everything before '(' as the gate name,
3. extracts everything after the closing ')' as the qubit list.
BUG FIX 5 (original): split_at was found by scanning for the
first space at depth==0 *in the whole instruction*, so for
rx(pi/2) q[0]
split_at was -1 (no space outside parens in 'rx(pi/2)') and
rest was '' β dropping the qubit entirely. Fixed by splitting
on the space after the closing ')'.
"""
instr = instr.strip()
# ββ locate parameter block '(...)' βββββββββββββββββββββββββββ
paren_open = instr.find('(')
paren_close = -1
param_str = ''
if paren_open != -1:
depth = 0
for idx in range(paren_open, len(instr)):
if instr[idx] == '(':
depth += 1
elif instr[idx] == ')':
depth -= 1
if depth == 0:
paren_close = idx
break
if paren_close == -1:
# Unbalanced parentheses β skip this instruction
return None
param_str = instr[paren_open + 1 : paren_close].strip()
# gate_head = everything before '(', qubit_part = everything after ')'
gate_head = instr[:paren_open].strip()
qubit_part = instr[paren_close + 1:].strip()
else:
# No parameters: split on first whitespace
parts = instr.split(None, 1)
gate_head = parts[0]
qubit_part = parts[1] if len(parts) > 1 else ''
gate_name_raw = gate_head.strip().lower()
if not gate_name_raw:
return None
gate_name = self._ALIAS.get(gate_name_raw, gate_name_raw)
# ββ parse parameters βββββββββββββββββββββββββββββββββββββββββ
params: List[float] = []
if param_str:
for tok in self._split_params(param_str):
tok = tok.strip()
if not tok:
continue
params.append(self._eval_param(tok))
# ββ resolve qubits βββββββββββββββββββββββββββββββββββββββββββ
qubits = self._resolve_qubits(
qubit_part.replace(' ', ''), qubit_map)
if not qubits:
return None
return {
'type': 'gate',
'name': gate_name,
'qubits': qubits,
'params': params,
}
def _eval_param(self, tok: str) -> float:
"""
Evaluate a parameter token to float.
Handles: numeric literals, pi, pi/2, sqrt(2), cos(0.3), etc.
Returns 0.0 on any evaluation error (silent fallback).
"""
try:
return float(eval(tok, self._MATH_ENV)) # noqa: S307
except Exception:
return 0.0
@staticmethod
def _split_params(s: str) -> List[str]:
"""
Split a comma-separated parameter string respecting nested
parentheses. e.g. 'pi/2, atan(1,0)' β ['pi/2', 'atan(1,0)']
"""
tokens: List[str] = []
cur: List[str] = []
depth = 0
for ch in s:
if ch == '(':
depth += 1
cur.append(ch)
elif ch == ')':
depth -= 1
cur.append(ch)
elif ch == ',' and depth == 0:
tokens.append(''.join(cur).strip())
cur = []
else:
cur.append(ch)
if cur:
tokens.append(''.join(cur).strip())
return [t for t in tokens if t]
def _resolve_qubits(self,
s: str,
qmap: Dict[str, int]) -> List[int]:
"""
Resolve a comma-separated qubit argument string to absolute indices.
Handles
-------
- Indexed: q[0], q[1]
- Bare: q β qmap['q'] (first qubit of that register)
- Range: q[0:3] β [qmap['q[0]'], qmap['q[1]'], qmap['q[2]']]
BUG FIX 6 (original): range syntax q[0:3] was not handled and
fell through to the digit-extraction fallback, returning only
the last number found (e.g., 3 instead of [0,1,2]).
BUG FIX 7 (original): the fallback `digits = re.findall(r'\d+', tok)`
was used as a last resort β this could silently map unknown tokens
to arbitrary integers. Now the fallback is gated on the absence of
any letter character to avoid mapping named registers that are simply
not yet in qmap to wrong indices.
"""
out: List[int] = []
for tok in s.split(','):
tok = tok.strip()
if not tok:
continue
# ββ range syntax: q[start:end] βββββββββββββββββββββββββββ
m = self._RE_RANGE.match(tok)
if m:
base = m.group(1)
start = int(m.group(2))
end = int(m.group(3)) # exclusive upper bound
for i in range(start, end):
key = f'{base}[{i}]'
if key in qmap:
out.append(qmap[key])
continue
# ββ direct map lookup βββββββββββββββββββββββββββββββββββββ
if tok in qmap:
out.append(qmap[tok])
continue
# ββ indexed: base[n] βββββββββββββββββββββββββββββββββββββ
bracket = self._RE_INDEX.search(tok)
if bracket:
base = tok[:tok.index('[')]
key = f'{base}[{bracket.group(1)}]'
if key in qmap:
out.append(qmap[key])
continue
# index not in map β try numeric fallback
out.append(int(bracket.group(1)))
continue
# ββ bare name not in map: try stripping to digits βββββββββ
# Only do this when the token contains no letters (pure numeric)
# to avoid misidentifying unknown register names.
digits = re.findall(r'\d+', tok)
if digits and not re.search(r'[a-zA-Z_]', tok):
out.append(int(digits[-1]))
return out
|