from __future__ import annotations import io import json import pathlib import zipfile from typing import Any, Dict, List, Tuple _ALLOWED_SOURCES = {"nvidia_ising_decoding", "generic_qec_bundle"} _SUPPORTED_BUNDLE_VERSIONS = {"1.0"} _TEXT_PREVIEW_SUFFIXES = {".log", ".txt", ".json", ".csv"} _REQUIRED_ROOT_FIELDS = ( "bundle_version", "source", "code_family", "experiment_name", "distance", "n_rounds", "basis", "rotation", "noise_model_label", ) _REQUIRED_MODEL_FIELDS = ("variant", "model_id", "checkpoint_name") _REQUIRED_DECODER_FIELDS = ( "name", "ler", "latency_ms", "syndrome_density_before", "syndrome_density_after", "logical_failures", "num_samples", ) _SAMPLE_BUNDLE_FILENAME = "quread_qec_bundle_sample.zip" _SAMPLE_MANIFEST = { "bundle_version": "1.0", "source": "nvidia_ising_decoding", "code_family": "surface_code", "experiment_name": "surface_d13_public_demo", "distance": 13, "n_rounds": 104, "basis": "X", "rotation": "O1", "noise_model_label": "public_default", "generated_by": "Quread sample generator", "timestamp": "2026-04-15T12:00:00Z", "notes": ( "Sample artifact-driven bundle for decoder comparison review. " "Metrics are illustrative and not intended for benchmarking claims." ), "model": { "variant": "fast", "model_id": 1, "checkpoint_name": "Ising-Decoder-SurfaceCode-1-Fast.pt", }, "decoders": { "baseline": { "name": "pymatching", "ler": 0.0123, "latency_ms": 4.8, "syndrome_density_before": 0.031, "syndrome_density_after": 0.031, "logical_failures": 123, "num_samples": 10000, }, "ai_predecoder_plus_baseline": { "name": "ising_predecoder_plus_pymatching", "ler": 0.0104, "latency_ms": 3.2, "syndrome_density_before": 0.031, "syndrome_density_after": 0.011, "logical_failures": 104, "num_samples": 10000, }, }, "artifacts": [ {"path": "artifacts/run.log", "kind": "log"}, {"path": "artifacts/notes.txt", "kind": "notes"}, {"path": "artifacts/decoder_metrics.csv", "kind": "csv"}, {"path": "artifacts/model.onnx", "kind": "onnx"}, {"path": "artifacts/config.json", "kind": "json"}, ], } _SAMPLE_ARTIFACTS = { "artifacts/run.log": "\n".join( [ "[sample] source=nvidia_ising_decoding", "[sample] experiment=surface_d13_public_demo", "[baseline] decoder=pymatching ler=0.0123 latency_ms=4.8 logical_failures=123", "[ai_predecoder] decoder=ising_predecoder_plus_pymatching ler=0.0104 latency_ms=3.2 logical_failures=104", "[summary] latency_speedup=1.500 syndrome_density_reduction=0.020", ] ) + "\n", "artifacts/notes.txt": ( "This sample bundle demonstrates the expected Quread QEC Bundle structure.\n" "Upload it into QEC Decoder Lab to test parsing, preview, comparison charts, and exports.\n" ), "artifacts/decoder_metrics.csv": ( "decoder_name,mode,ler,latency_ms,syndrome_density_before,syndrome_density_after,logical_failures,num_samples\n" "pymatching,baseline,0.0123,4.8,0.031,0.031,123,10000\n" "ising_predecoder_plus_pymatching,ai_predecoder_plus_baseline,0.0104,3.2,0.031,0.011,104,10000\n" ), "artifacts/model.onnx": b"sample-onnx-placeholder", "artifacts/config.json": json.dumps( { "surface_code": {"distance": 13, "rounds": 104, "basis": "X", "rotation": "O1"}, "runtime": {"device": "offline_review_only", "batch_size": 1024}, }, indent=2, sort_keys=True, ), } def _read_bundle_bytes(bundle_input) -> Tuple[bytes, str]: if bundle_input is None: raise ValueError("Upload a Quread QEC Bundle zip first.") if isinstance(bundle_input, bytes): data = bytes(bundle_input) filename = "qec_bundle.zip" elif isinstance(bundle_input, str): path = pathlib.Path(bundle_input) if not path.exists(): raise ValueError("Uploaded QEC bundle path does not exist.") data = path.read_bytes() filename = path.name elif isinstance(bundle_input, dict): raw = bundle_input.get("data") if isinstance(raw, bytes): data = bytes(raw) filename = pathlib.Path(str(bundle_input.get("name") or "qec_bundle.zip")).name else: maybe_path = bundle_input.get("name") or bundle_input.get("path") if not maybe_path: raise ValueError("Unsupported QEC bundle input format.") path = pathlib.Path(str(maybe_path)) if not path.exists(): raise ValueError("Uploaded QEC bundle path does not exist.") data = path.read_bytes() filename = path.name else: maybe_name = getattr(bundle_input, "name", None) if maybe_name: path = pathlib.Path(str(maybe_name)) if not path.exists(): raise ValueError("Uploaded QEC bundle path does not exist.") data = path.read_bytes() filename = path.name else: raise ValueError("Unsupported QEC bundle input format.") if not zipfile.is_zipfile(io.BytesIO(data)): raise ValueError("Uploaded file is not a valid QEC bundle zip archive.") return data, filename def _require_fields(obj: Dict[str, Any], required: Tuple[str, ...], prefix: str) -> None: missing = [f"{prefix}.{field}" if prefix else field for field in required if field not in obj] if missing: raise ValueError("QEC bundle manifest missing required fields: " + ", ".join(missing)) def _as_dict(value: Any, field_name: str) -> Dict[str, Any]: if not isinstance(value, dict): raise ValueError(f"QEC bundle field `{field_name}` must be an object.") return dict(value) def _as_str(value: Any, field_name: str) -> str: text = str(value or "").strip() if not text: raise ValueError(f"QEC bundle field `{field_name}` must be a non-empty string.") return text def _as_int(value: Any, field_name: str) -> int: try: return int(value) except Exception as exc: raise ValueError(f"QEC bundle field `{field_name}` must be an integer.") from exc def _as_float(value: Any, field_name: str) -> float: try: return float(value) except Exception as exc: raise ValueError(f"QEC bundle field `{field_name}` must be numeric.") from exc def _normalize_manifest(raw_manifest: Dict[str, Any]) -> Dict[str, Any]: manifest = _as_dict(raw_manifest, "manifest") _require_fields(manifest, _REQUIRED_ROOT_FIELDS, "") bundle_version = _as_str(manifest.get("bundle_version"), "bundle_version") if bundle_version not in _SUPPORTED_BUNDLE_VERSIONS: raise ValueError( f"Unsupported QEC bundle version `{bundle_version}`. Supported versions: {', '.join(sorted(_SUPPORTED_BUNDLE_VERSIONS))}." ) source = _as_str(manifest.get("source"), "source").lower() if source not in _ALLOWED_SOURCES: raise ValueError( f"Unsupported QEC bundle source `{source}`. Supported sources: {', '.join(sorted(_ALLOWED_SOURCES))}." ) code_family = _as_str(manifest.get("code_family"), "code_family").lower() if code_family != "surface_code": raise ValueError(f"Unsupported QEC code_family `{code_family}`. V1 supports `surface_code` only.") model = _as_dict(manifest.get("model"), "model") _require_fields(model, _REQUIRED_MODEL_FIELDS, "model") decoders = _as_dict(manifest.get("decoders"), "decoders") for block_name in ("baseline", "ai_predecoder_plus_baseline"): if block_name not in decoders: raise ValueError( "QEC bundle manifest must include both `decoders.baseline` and `decoders.ai_predecoder_plus_baseline`." ) normalized_decoders: Dict[str, Dict[str, Any]] = {} for block_name in ("baseline", "ai_predecoder_plus_baseline"): decoder = _as_dict(decoders.get(block_name), f"decoders.{block_name}") _require_fields(decoder, _REQUIRED_DECODER_FIELDS, f"decoders.{block_name}") normalized_decoders[block_name] = { "name": _as_str(decoder.get("name"), f"decoders.{block_name}.name"), "ler": _as_float(decoder.get("ler"), f"decoders.{block_name}.ler"), "latency_ms": _as_float(decoder.get("latency_ms"), f"decoders.{block_name}.latency_ms"), "syndrome_density_before": _as_float( decoder.get("syndrome_density_before"), f"decoders.{block_name}.syndrome_density_before", ), "syndrome_density_after": _as_float( decoder.get("syndrome_density_after"), f"decoders.{block_name}.syndrome_density_after", ), "logical_failures": _as_int( decoder.get("logical_failures"), f"decoders.{block_name}.logical_failures", ), "num_samples": _as_int(decoder.get("num_samples"), f"decoders.{block_name}.num_samples"), } raw_artifacts = manifest.get("artifacts") or [] if not isinstance(raw_artifacts, list): raise ValueError("QEC bundle field `artifacts` must be a list when present.") normalized_artifacts = [] for idx, artifact in enumerate(raw_artifacts): art = _as_dict(artifact, f"artifacts[{idx}]") if "path" not in art: raise ValueError(f"QEC bundle artifact entry `artifacts[{idx}]` must include `path`.") path = _as_str(art.get("path"), f"artifacts[{idx}].path") kind = str(art.get("kind") or pathlib.Path(path).suffix.lstrip(".") or "file").strip().lower() normalized_artifacts.append({"path": path, "kind": kind}) return { "bundle_version": bundle_version, "source": source, "code_family": code_family, "experiment_name": _as_str(manifest.get("experiment_name"), "experiment_name"), "distance": _as_int(manifest.get("distance"), "distance"), "n_rounds": _as_int(manifest.get("n_rounds"), "n_rounds"), "basis": _as_str(manifest.get("basis"), "basis"), "rotation": _as_str(manifest.get("rotation"), "rotation"), "noise_model_label": _as_str(manifest.get("noise_model_label"), "noise_model_label"), "generated_by": str(manifest.get("generated_by") or "").strip(), "timestamp": str(manifest.get("timestamp") or "").strip(), "notes": str(manifest.get("notes") or "").strip(), "model": { "variant": _as_str(model.get("variant"), "model.variant"), "model_id": _as_int(model.get("model_id"), "model.model_id"), "checkpoint_name": _as_str(model.get("checkpoint_name"), "model.checkpoint_name"), }, "decoders": normalized_decoders, "artifacts": normalized_artifacts, } def _summary_rows(manifest: Dict[str, Any], source_name: str) -> List[List[Any]]: rows = [ ["Input bundle", str(source_name)], ["Bundle version", str(manifest["bundle_version"])], ["Source repo", str(manifest["source"])], ["Code family", str(manifest["code_family"])], ["Experiment name", str(manifest["experiment_name"])], ["Distance", int(manifest["distance"])], ["Rounds", int(manifest["n_rounds"])], ["Basis", str(manifest["basis"])], ["Rotation", str(manifest["rotation"])], ["Noise model", str(manifest["noise_model_label"])], ["Model variant", str(manifest["model"]["variant"])], ["Model ID", int(manifest["model"]["model_id"])], ["Checkpoint", str(manifest["model"]["checkpoint_name"])], ["Generated by", str(manifest.get("generated_by") or "-")], ["Timestamp", str(manifest.get("timestamp") or "-")], ["Notes", str(manifest.get("notes") or "-")], ] return rows def sample_qec_manifest() -> Dict[str, Any]: return json.loads(json.dumps(_SAMPLE_MANIFEST)) def build_sample_qec_bundle_bytes() -> bytes: buf = io.BytesIO() manifest = sample_qec_manifest() with zipfile.ZipFile(buf, "w", compression=zipfile.ZIP_DEFLATED) as zf: zf.writestr("manifest.json", json.dumps(manifest, indent=2, sort_keys=True)) for path, content in _SAMPLE_ARTIFACTS.items(): if isinstance(content, bytes): zf.writestr(path, content) else: zf.writestr(path, str(content)) return buf.getvalue() def sample_qec_bundle_filename() -> str: return _SAMPLE_BUNDLE_FILENAME def parse_qec_bundle(bundle_input) -> Dict[str, Any]: data, source_name = _read_bundle_bytes(bundle_input) warnings: List[str] = [] with zipfile.ZipFile(io.BytesIO(data), "r") as zf: names = sorted(n for n in zf.namelist() if not n.endswith("/")) if "manifest.json" not in names: raise ValueError("QEC bundle is missing required file `manifest.json`.") try: raw_manifest = json.loads(zf.read("manifest.json").decode("utf-8")) except Exception as exc: raise ValueError("QEC bundle `manifest.json` is not valid JSON.") from exc manifest = _normalize_manifest(raw_manifest) manifest_json = json.dumps(manifest, indent=2, sort_keys=True) artifact_decl = {str(item["path"]): str(item["kind"]) for item in manifest.get("artifacts", [])} artifact_rows: List[Dict[str, Any]] = [] preview_chunks = [f"===== manifest.json =====\n{manifest_json}"] artifact_files = sorted(n for n in names if n != "manifest.json") for path in artifact_files: suffix = pathlib.Path(path).suffix.lower() kind = artifact_decl.get(path) or suffix.lstrip(".") or "file" info = zf.getinfo(path) previewable = suffix in _TEXT_PREVIEW_SUFFIXES artifact_rows.append( { "path": path, "kind": kind, "status": "present", "size_bytes": int(info.file_size), "previewable": bool(previewable), } ) if previewable: try: text = zf.read(path).decode("utf-8", errors="replace") except Exception: text = "" if text.strip(): trimmed = text[:6000] if len(text) > len(trimmed): trimmed += "\n... [truncated]" preview_chunks.append(f"===== {path} =====\n{trimmed}") for path, kind in artifact_decl.items(): if path not in names: warnings.append(f"Listed artifact missing from bundle: {path}") artifact_rows.append( { "path": path, "kind": kind, "status": "missing", "size_bytes": 0, "previewable": pathlib.Path(path).suffix.lower() in _TEXT_PREVIEW_SUFFIXES, } ) baseline_samples = int(manifest["decoders"]["baseline"]["num_samples"]) ai_samples = int(manifest["decoders"]["ai_predecoder_plus_baseline"]["num_samples"]) if baseline_samples != ai_samples: warnings.append( "Baseline and AI decoder sample counts differ; comparisons are shown but should be interpreted carefully." ) artifact_rows.sort(key=lambda row: (0 if str(row["status"]) == "present" else 1, str(row["path"]))) return { "source_name": source_name, "original_bundle_bytes": data, "manifest": manifest, "manifest_json": manifest_json, "summary_rows": _summary_rows(manifest, source_name), "artifact_rows": artifact_rows, "preview_text": "\n\n".join(preview_chunks), "warnings": warnings, }