QureadAI / quread /qec_bundle.py
hchevva's picture
Upload qec_bundle.py
95db9a9 verified
from __future__ import annotations
import io
import json
import pathlib
import zipfile
from typing import Any, Dict, List, Tuple
_ALLOWED_SOURCES = {"nvidia_ising_decoding", "generic_qec_bundle"}
_SUPPORTED_BUNDLE_VERSIONS = {"1.0"}
_TEXT_PREVIEW_SUFFIXES = {".log", ".txt", ".json", ".csv"}
_REQUIRED_ROOT_FIELDS = (
"bundle_version",
"source",
"code_family",
"experiment_name",
"distance",
"n_rounds",
"basis",
"rotation",
"noise_model_label",
)
_REQUIRED_MODEL_FIELDS = ("variant", "model_id", "checkpoint_name")
_REQUIRED_DECODER_FIELDS = (
"name",
"ler",
"latency_ms",
"syndrome_density_before",
"syndrome_density_after",
"logical_failures",
"num_samples",
)
_SAMPLE_BUNDLE_FILENAME = "quread_qec_bundle_sample.zip"
_SAMPLE_MANIFEST = {
"bundle_version": "1.0",
"source": "nvidia_ising_decoding",
"code_family": "surface_code",
"experiment_name": "surface_d13_public_demo",
"distance": 13,
"n_rounds": 104,
"basis": "X",
"rotation": "O1",
"noise_model_label": "public_default",
"generated_by": "Quread sample generator",
"timestamp": "2026-04-15T12:00:00Z",
"notes": (
"Sample artifact-driven bundle for decoder comparison review. "
"Metrics are illustrative and not intended for benchmarking claims."
),
"model": {
"variant": "fast",
"model_id": 1,
"checkpoint_name": "Ising-Decoder-SurfaceCode-1-Fast.pt",
},
"decoders": {
"baseline": {
"name": "pymatching",
"ler": 0.0123,
"latency_ms": 4.8,
"syndrome_density_before": 0.031,
"syndrome_density_after": 0.031,
"logical_failures": 123,
"num_samples": 10000,
},
"ai_predecoder_plus_baseline": {
"name": "ising_predecoder_plus_pymatching",
"ler": 0.0104,
"latency_ms": 3.2,
"syndrome_density_before": 0.031,
"syndrome_density_after": 0.011,
"logical_failures": 104,
"num_samples": 10000,
},
},
"artifacts": [
{"path": "artifacts/run.log", "kind": "log"},
{"path": "artifacts/notes.txt", "kind": "notes"},
{"path": "artifacts/decoder_metrics.csv", "kind": "csv"},
{"path": "artifacts/model.onnx", "kind": "onnx"},
{"path": "artifacts/config.json", "kind": "json"},
],
}
_SAMPLE_ARTIFACTS = {
"artifacts/run.log": "\n".join(
[
"[sample] source=nvidia_ising_decoding",
"[sample] experiment=surface_d13_public_demo",
"[baseline] decoder=pymatching ler=0.0123 latency_ms=4.8 logical_failures=123",
"[ai_predecoder] decoder=ising_predecoder_plus_pymatching ler=0.0104 latency_ms=3.2 logical_failures=104",
"[summary] latency_speedup=1.500 syndrome_density_reduction=0.020",
]
)
+ "\n",
"artifacts/notes.txt": (
"This sample bundle demonstrates the expected Quread QEC Bundle structure.\n"
"Upload it into QEC Decoder Lab to test parsing, preview, comparison charts, and exports.\n"
),
"artifacts/decoder_metrics.csv": (
"decoder_name,mode,ler,latency_ms,syndrome_density_before,syndrome_density_after,logical_failures,num_samples\n"
"pymatching,baseline,0.0123,4.8,0.031,0.031,123,10000\n"
"ising_predecoder_plus_pymatching,ai_predecoder_plus_baseline,0.0104,3.2,0.031,0.011,104,10000\n"
),
"artifacts/model.onnx": b"sample-onnx-placeholder",
"artifacts/config.json": json.dumps(
{
"surface_code": {"distance": 13, "rounds": 104, "basis": "X", "rotation": "O1"},
"runtime": {"device": "offline_review_only", "batch_size": 1024},
},
indent=2,
sort_keys=True,
),
}
def _read_bundle_bytes(bundle_input) -> Tuple[bytes, str]:
if bundle_input is None:
raise ValueError("Upload a Quread QEC Bundle zip first.")
if isinstance(bundle_input, bytes):
data = bytes(bundle_input)
filename = "qec_bundle.zip"
elif isinstance(bundle_input, str):
path = pathlib.Path(bundle_input)
if not path.exists():
raise ValueError("Uploaded QEC bundle path does not exist.")
data = path.read_bytes()
filename = path.name
elif isinstance(bundle_input, dict):
raw = bundle_input.get("data")
if isinstance(raw, bytes):
data = bytes(raw)
filename = pathlib.Path(str(bundle_input.get("name") or "qec_bundle.zip")).name
else:
maybe_path = bundle_input.get("name") or bundle_input.get("path")
if not maybe_path:
raise ValueError("Unsupported QEC bundle input format.")
path = pathlib.Path(str(maybe_path))
if not path.exists():
raise ValueError("Uploaded QEC bundle path does not exist.")
data = path.read_bytes()
filename = path.name
else:
maybe_name = getattr(bundle_input, "name", None)
if maybe_name:
path = pathlib.Path(str(maybe_name))
if not path.exists():
raise ValueError("Uploaded QEC bundle path does not exist.")
data = path.read_bytes()
filename = path.name
else:
raise ValueError("Unsupported QEC bundle input format.")
if not zipfile.is_zipfile(io.BytesIO(data)):
raise ValueError("Uploaded file is not a valid QEC bundle zip archive.")
return data, filename
def _require_fields(obj: Dict[str, Any], required: Tuple[str, ...], prefix: str) -> None:
missing = [f"{prefix}.{field}" if prefix else field for field in required if field not in obj]
if missing:
raise ValueError("QEC bundle manifest missing required fields: " + ", ".join(missing))
def _as_dict(value: Any, field_name: str) -> Dict[str, Any]:
if not isinstance(value, dict):
raise ValueError(f"QEC bundle field `{field_name}` must be an object.")
return dict(value)
def _as_str(value: Any, field_name: str) -> str:
text = str(value or "").strip()
if not text:
raise ValueError(f"QEC bundle field `{field_name}` must be a non-empty string.")
return text
def _as_int(value: Any, field_name: str) -> int:
try:
return int(value)
except Exception as exc:
raise ValueError(f"QEC bundle field `{field_name}` must be an integer.") from exc
def _as_float(value: Any, field_name: str) -> float:
try:
return float(value)
except Exception as exc:
raise ValueError(f"QEC bundle field `{field_name}` must be numeric.") from exc
def _normalize_manifest(raw_manifest: Dict[str, Any]) -> Dict[str, Any]:
manifest = _as_dict(raw_manifest, "manifest")
_require_fields(manifest, _REQUIRED_ROOT_FIELDS, "")
bundle_version = _as_str(manifest.get("bundle_version"), "bundle_version")
if bundle_version not in _SUPPORTED_BUNDLE_VERSIONS:
raise ValueError(
f"Unsupported QEC bundle version `{bundle_version}`. Supported versions: {', '.join(sorted(_SUPPORTED_BUNDLE_VERSIONS))}."
)
source = _as_str(manifest.get("source"), "source").lower()
if source not in _ALLOWED_SOURCES:
raise ValueError(
f"Unsupported QEC bundle source `{source}`. Supported sources: {', '.join(sorted(_ALLOWED_SOURCES))}."
)
code_family = _as_str(manifest.get("code_family"), "code_family").lower()
if code_family != "surface_code":
raise ValueError(f"Unsupported QEC code_family `{code_family}`. V1 supports `surface_code` only.")
model = _as_dict(manifest.get("model"), "model")
_require_fields(model, _REQUIRED_MODEL_FIELDS, "model")
decoders = _as_dict(manifest.get("decoders"), "decoders")
for block_name in ("baseline", "ai_predecoder_plus_baseline"):
if block_name not in decoders:
raise ValueError(
"QEC bundle manifest must include both `decoders.baseline` and `decoders.ai_predecoder_plus_baseline`."
)
normalized_decoders: Dict[str, Dict[str, Any]] = {}
for block_name in ("baseline", "ai_predecoder_plus_baseline"):
decoder = _as_dict(decoders.get(block_name), f"decoders.{block_name}")
_require_fields(decoder, _REQUIRED_DECODER_FIELDS, f"decoders.{block_name}")
normalized_decoders[block_name] = {
"name": _as_str(decoder.get("name"), f"decoders.{block_name}.name"),
"ler": _as_float(decoder.get("ler"), f"decoders.{block_name}.ler"),
"latency_ms": _as_float(decoder.get("latency_ms"), f"decoders.{block_name}.latency_ms"),
"syndrome_density_before": _as_float(
decoder.get("syndrome_density_before"),
f"decoders.{block_name}.syndrome_density_before",
),
"syndrome_density_after": _as_float(
decoder.get("syndrome_density_after"),
f"decoders.{block_name}.syndrome_density_after",
),
"logical_failures": _as_int(
decoder.get("logical_failures"),
f"decoders.{block_name}.logical_failures",
),
"num_samples": _as_int(decoder.get("num_samples"), f"decoders.{block_name}.num_samples"),
}
raw_artifacts = manifest.get("artifacts") or []
if not isinstance(raw_artifacts, list):
raise ValueError("QEC bundle field `artifacts` must be a list when present.")
normalized_artifacts = []
for idx, artifact in enumerate(raw_artifacts):
art = _as_dict(artifact, f"artifacts[{idx}]")
if "path" not in art:
raise ValueError(f"QEC bundle artifact entry `artifacts[{idx}]` must include `path`.")
path = _as_str(art.get("path"), f"artifacts[{idx}].path")
kind = str(art.get("kind") or pathlib.Path(path).suffix.lstrip(".") or "file").strip().lower()
normalized_artifacts.append({"path": path, "kind": kind})
return {
"bundle_version": bundle_version,
"source": source,
"code_family": code_family,
"experiment_name": _as_str(manifest.get("experiment_name"), "experiment_name"),
"distance": _as_int(manifest.get("distance"), "distance"),
"n_rounds": _as_int(manifest.get("n_rounds"), "n_rounds"),
"basis": _as_str(manifest.get("basis"), "basis"),
"rotation": _as_str(manifest.get("rotation"), "rotation"),
"noise_model_label": _as_str(manifest.get("noise_model_label"), "noise_model_label"),
"generated_by": str(manifest.get("generated_by") or "").strip(),
"timestamp": str(manifest.get("timestamp") or "").strip(),
"notes": str(manifest.get("notes") or "").strip(),
"model": {
"variant": _as_str(model.get("variant"), "model.variant"),
"model_id": _as_int(model.get("model_id"), "model.model_id"),
"checkpoint_name": _as_str(model.get("checkpoint_name"), "model.checkpoint_name"),
},
"decoders": normalized_decoders,
"artifacts": normalized_artifacts,
}
def _summary_rows(manifest: Dict[str, Any], source_name: str) -> List[List[Any]]:
rows = [
["Input bundle", str(source_name)],
["Bundle version", str(manifest["bundle_version"])],
["Source repo", str(manifest["source"])],
["Code family", str(manifest["code_family"])],
["Experiment name", str(manifest["experiment_name"])],
["Distance", int(manifest["distance"])],
["Rounds", int(manifest["n_rounds"])],
["Basis", str(manifest["basis"])],
["Rotation", str(manifest["rotation"])],
["Noise model", str(manifest["noise_model_label"])],
["Model variant", str(manifest["model"]["variant"])],
["Model ID", int(manifest["model"]["model_id"])],
["Checkpoint", str(manifest["model"]["checkpoint_name"])],
["Generated by", str(manifest.get("generated_by") or "-")],
["Timestamp", str(manifest.get("timestamp") or "-")],
["Notes", str(manifest.get("notes") or "-")],
]
return rows
def sample_qec_manifest() -> Dict[str, Any]:
return json.loads(json.dumps(_SAMPLE_MANIFEST))
def build_sample_qec_bundle_bytes() -> bytes:
buf = io.BytesIO()
manifest = sample_qec_manifest()
with zipfile.ZipFile(buf, "w", compression=zipfile.ZIP_DEFLATED) as zf:
zf.writestr("manifest.json", json.dumps(manifest, indent=2, sort_keys=True))
for path, content in _SAMPLE_ARTIFACTS.items():
if isinstance(content, bytes):
zf.writestr(path, content)
else:
zf.writestr(path, str(content))
return buf.getvalue()
def sample_qec_bundle_filename() -> str:
return _SAMPLE_BUNDLE_FILENAME
def parse_qec_bundle(bundle_input) -> Dict[str, Any]:
data, source_name = _read_bundle_bytes(bundle_input)
warnings: List[str] = []
with zipfile.ZipFile(io.BytesIO(data), "r") as zf:
names = sorted(n for n in zf.namelist() if not n.endswith("/"))
if "manifest.json" not in names:
raise ValueError("QEC bundle is missing required file `manifest.json`.")
try:
raw_manifest = json.loads(zf.read("manifest.json").decode("utf-8"))
except Exception as exc:
raise ValueError("QEC bundle `manifest.json` is not valid JSON.") from exc
manifest = _normalize_manifest(raw_manifest)
manifest_json = json.dumps(manifest, indent=2, sort_keys=True)
artifact_decl = {str(item["path"]): str(item["kind"]) for item in manifest.get("artifacts", [])}
artifact_rows: List[Dict[str, Any]] = []
preview_chunks = [f"===== manifest.json =====\n{manifest_json}"]
artifact_files = sorted(n for n in names if n != "manifest.json")
for path in artifact_files:
suffix = pathlib.Path(path).suffix.lower()
kind = artifact_decl.get(path) or suffix.lstrip(".") or "file"
info = zf.getinfo(path)
previewable = suffix in _TEXT_PREVIEW_SUFFIXES
artifact_rows.append(
{
"path": path,
"kind": kind,
"status": "present",
"size_bytes": int(info.file_size),
"previewable": bool(previewable),
}
)
if previewable:
try:
text = zf.read(path).decode("utf-8", errors="replace")
except Exception:
text = ""
if text.strip():
trimmed = text[:6000]
if len(text) > len(trimmed):
trimmed += "\n... [truncated]"
preview_chunks.append(f"===== {path} =====\n{trimmed}")
for path, kind in artifact_decl.items():
if path not in names:
warnings.append(f"Listed artifact missing from bundle: {path}")
artifact_rows.append(
{
"path": path,
"kind": kind,
"status": "missing",
"size_bytes": 0,
"previewable": pathlib.Path(path).suffix.lower() in _TEXT_PREVIEW_SUFFIXES,
}
)
baseline_samples = int(manifest["decoders"]["baseline"]["num_samples"])
ai_samples = int(manifest["decoders"]["ai_predecoder_plus_baseline"]["num_samples"])
if baseline_samples != ai_samples:
warnings.append(
"Baseline and AI decoder sample counts differ; comparisons are shown but should be interpreted carefully."
)
artifact_rows.sort(key=lambda row: (0 if str(row["status"]) == "present" else 1, str(row["path"])))
return {
"source_name": source_name,
"original_bundle_bytes": data,
"manifest": manifest,
"manifest_json": manifest_json,
"summary_rows": _summary_rows(manifest, source_name),
"artifact_rows": artifact_rows,
"preview_text": "\n\n".join(preview_chunks),
"warnings": warnings,
}