// Shared audio output for the PCM TTS engines (Kokoro, Kitten). One AudioContext,
// one source at a time — the narrator awaits playSamples() per sentence so chunks
// play in order without overlap. Created lazily on first use (which happens after a
// user gesture, so resume() is allowed by autoplay policy).
let _ctx = null
let _cur = null

function ctx() {
  if (!_ctx) _ctx = new (window.AudioContext || window.webkitAudioContext)()
  return _ctx
}

export async function playSamples(float32, sampleRate) {
  const ac = ctx()
  if (ac.state === 'suspended') { try { await ac.resume() } catch { /* ignore */ } }
  const buf = ac.createBuffer(1, float32.length, sampleRate)
  buf.getChannelData(0).set(float32)
  return new Promise((resolve) => {
    const src = ac.createBufferSource()
    src.buffer = buf
    src.connect(ac.destination)
    _cur = src
    src.onended = () => { if (_cur === src) _cur = null; resolve() }
    src.start()
  })
}

export function stopAudio() {
  try { if (_cur) _cur.stop() } catch { /* ignore */ }
  _cur = null
}

// Encode mono Float32 samples to a 16-bit PCM WAV ArrayBuffer — so PCM engines
// (Kokoro/Kitten) can produce a cacheable voice file like Qwen3-TTS does.
export function encodeWav(float32, sampleRate) {
  const n = float32.length
  const buf = new ArrayBuffer(44 + n * 2)
  const dv = new DataView(buf)
  const str = (off, s) => { for (let i = 0; i < s.length; i++) dv.setUint8(off + i, s.charCodeAt(i)) }
  str(0, 'RIFF'); dv.setUint32(4, 36 + n * 2, true); str(8, 'WAVE')
  str(12, 'fmt '); dv.setUint32(16, 16, true); dv.setUint16(20, 1, true); dv.setUint16(22, 1, true)
  dv.setUint32(24, sampleRate, true); dv.setUint32(28, sampleRate * 2, true)
  dv.setUint16(32, 2, true); dv.setUint16(34, 16, true)
  str(36, 'data'); dv.setUint32(40, n * 2, true)
  let off = 44
  for (let i = 0; i < n; i++) { const s = Math.max(-1, Math.min(1, float32[i])); dv.setInt16(off, s < 0 ? s * 0x8000 : s * 0x7fff, true); off += 2 }
  return buf
}

// Decode a WAV/audio ArrayBuffer to { audio: Float32Array, sampleRate } via the shared
// AudioContext (decoding needs no user gesture; only playback does).
export async function decodeAudio(arrayBuffer) {
  const ac = ctx()
  const buf = await ac.decodeAudioData(arrayBuffer)
  return { audio: buf.getChannelData(0), sampleRate: buf.sampleRate }
}