jam-tracks / frontend /src /hooks /useAudioEngine.js
Mina Emadi
some minor UI changes: adding the playback functionality for the chosen section with no key and bpm change and replacing the upload new with choose a different song
35fde27
import { useState, useRef, useCallback, useEffect } from 'react'
/**
* Core Web Audio API abstraction
*
* Creates and manages:
* - AudioContext
* - One AudioBufferSourceNode per stem (for playback)
* - One GainNode per stem (for volume control)
* - One master GainNode
* - One AnalyserNode (for visualization)
*
* Graph: stem sources → individual gains → master gain → analyser → destination
*/
export function useAudioEngine() {
const [isPlaying, setIsPlaying] = useState(false)
const [currentTime, setCurrentTime] = useState(0)
const [duration, setDuration] = useState(0)
const [volumes, setVolumes] = useState({})
const [solos, setSolos] = useState({})
const [mutes, setMutes] = useState({})
const [analyserData, setAnalyserData] = useState(null)
const [isLoaded, setIsLoaded] = useState(false)
const [reverbs, setReverbs] = useState({}) // Reverb amount per stem (0-1)
const [pans, setPans] = useState({}) // Pan position per stem (-1 to 1)
const audioContextRef = useRef(null)
const buffersRef = useRef({}) // AudioBuffers (raw data)
const sourcesRef = useRef({}) // AudioBufferSourceNodes (one-shot, recreated each play)
const gainsRef = useRef({}) // GainNodes per stem
const compressorsRef = useRef({}) // DynamicsCompressorNodes per stem
const reverbGainsRef = useRef({}) // Reverb wet/dry mix GainNodes
const reverbSendsRef = useRef({}) // Reverb send GainNodes
const pannersRef = useRef({}) // StereoPannerNodes per stem
const masterGainRef = useRef(null) // Master GainNode
const analyserRef = useRef(null) // AnalyserNode for visualization
const convolverRef = useRef(null) // Shared ConvolverNode for reverb
const startTimeRef = useRef(0) // AudioContext time when playback started
const pauseTimeRef = useRef(0) // Offset in seconds where we paused
const animationRef = useRef(null)
const lastTimeUpdateRef = useRef(0) // Last time we updated currentTime state (for throttling)
const loopRef = useRef(false) // Whether to loop playback
const rawRegionRef = useRef(null) // { start, end } in seconds for raw-region loop (no processing)
const [isRawRegionActive, setIsRawRegionActive] = useState(false)
// Persistent AudioBuffer cache: keys like "drums_full", "drums_region" → AudioBuffer
// Survives across loadStems calls so we skip fetch+decode on replay
const bufferCacheRef = useRef({})
// Generate impulse response for reverb
const createReverbImpulse = useCallback((ctx, duration = 2, decay = 2) => {
const sampleRate = ctx.sampleRate
const length = sampleRate * duration
const impulse = ctx.createBuffer(2, length, sampleRate)
const left = impulse.getChannelData(0)
const right = impulse.getChannelData(1)
for (let i = 0; i < length; i++) {
const n = i / sampleRate
const envelope = Math.exp(-n * decay)
left[i] = (Math.random() * 2 - 1) * envelope
right[i] = (Math.random() * 2 - 1) * envelope
}
return impulse
}, [])
// Initialize audio context and graph
const initAudio = useCallback(() => {
if (!audioContextRef.current) {
audioContextRef.current = new (window.AudioContext || window.webkitAudioContext)()
// Create master gain
masterGainRef.current = audioContextRef.current.createGain()
// Create shared reverb convolver
convolverRef.current = audioContextRef.current.createConvolver()
convolverRef.current.buffer = createReverbImpulse(audioContextRef.current, 2, 2.5)
convolverRef.current.connect(masterGainRef.current)
// Create analyser
analyserRef.current = audioContextRef.current.createAnalyser()
analyserRef.current.fftSize = 128
analyserRef.current.smoothingTimeConstant = 0.8
// Graph: master gain → analyser → destination
masterGainRef.current.connect(analyserRef.current)
analyserRef.current.connect(audioContextRef.current.destination)
}
return audioContextRef.current
}, [createReverbImpulse])
// Load stems from server: fetches WAVs IN PARALLEL, decodes to AudioBuffers
// Uses persistent bufferCacheRef to skip fetch+decode on replay
// If region=true, fetches region-processed stems instead of full-processed
const loadStems = useCallback(async (sessionId, stemNames, { region = false } = {}) => {
const totalStart = performance.now()
const cacheTag = region ? 'region' : 'full'
console.log(`=== STEM LOADING START (${cacheTag}) ===`)
console.log('Loading stems:', stemNames)
const ctx = initAudio()
console.log(`AudioContext state: ${ctx.state}, sampleRate: ${ctx.sampleRate}`)
// Disconnect existing audio graph nodes (cheap, must rebuild per-load)
Object.values(gainsRef.current).forEach(g => g?.disconnect())
Object.values(compressorsRef.current).forEach(c => c?.disconnect())
Object.values(pannersRef.current).forEach(p => p?.disconnect())
Object.values(reverbGainsRef.current).forEach(r => r?.disconnect())
Object.values(reverbSendsRef.current).forEach(r => r?.disconnect())
buffersRef.current = {}
gainsRef.current = {}
compressorsRef.current = {}
pannersRef.current = {}
reverbGainsRef.current = {}
reverbSendsRef.current = {}
setIsLoaded(false)
// For each stem, check the persistent cache first
const loadSingleStem = async (stem) => {
const cacheKey = `${stem}_${cacheTag}`
// Check persistent cache
if (bufferCacheRef.current[cacheKey]) {
console.log(`[${stem}] CACHE HIT (${cacheTag})`)
return { stem, audioBuffer: bufferCacheRef.current[cacheKey] }
}
// Cache miss — fetch raw PCM and construct AudioBuffer directly (no decodeAudioData)
const stemStart = performance.now()
try {
console.log(`[${stem}] CACHE MISS — fetching PCM...`)
const fetchStart = performance.now()
const regionParam = region ? '&region=true' : ''
let response = await fetch(`/api/stem/${sessionId}/${stem}?processed=true${regionParam}&format=pcm`)
if (!response.ok) {
response = await fetch(`/api/stem/${sessionId}/${stem}?processed=false&format=pcm`)
}
const fetchEnd = performance.now()
console.log(`[${stem}] Fetch completed in ${(fetchEnd - fetchStart).toFixed(0)}ms`)
if (response.ok) {
const sampleRate = parseInt(response.headers.get('X-Sample-Rate'))
const numChannels = parseInt(response.headers.get('X-Channels'))
const numFrames = parseInt(response.headers.get('X-Frames'))
const bufferStart = performance.now()
const arrayBuffer = await response.arrayBuffer()
const bufferEnd = performance.now()
const sizeMB = (arrayBuffer.byteLength / 1024 / 1024).toFixed(2)
console.log(`[${stem}] ArrayBuffer: ${sizeMB}MB in ${(bufferEnd - bufferStart).toFixed(0)}ms`)
const constructStart = performance.now()
const int16 = new Int16Array(arrayBuffer)
const float32 = new Float32Array(int16.length)
for (let i = 0; i < int16.length; i++) {
float32[i] = int16[i] / 32767
}
const audioBuffer = ctx.createBuffer(numChannels, numFrames, sampleRate)
audioBuffer.copyToChannel(float32, 0)
const constructEnd = performance.now()
console.log(`[${stem}] Constructed ${audioBuffer.duration.toFixed(1)}s AudioBuffer in ${(constructEnd - constructStart).toFixed(0)}ms`)
// Store in persistent cache
bufferCacheRef.current[cacheKey] = audioBuffer
const stemEnd = performance.now()
console.log(`[${stem}] TOTAL: ${(stemEnd - stemStart).toFixed(0)}ms`)
return { stem, audioBuffer }
}
} catch (err) {
console.error(`[${stem}] FAILED:`, err)
}
return null
}
// Load all stems in parallel
console.log('Starting stem loading (all at once)...')
const parallelStart = performance.now()
const batchResults = await Promise.all(stemNames.map(loadSingleStem))
const results = batchResults
const parallelEnd = performance.now()
console.log(`All stems loaded in ${(parallelEnd - parallelStart).toFixed(0)}ms`)
const newVolumes = {}
let maxDuration = 0
// Process results
for (const result of results) {
if (result) {
const { stem, audioBuffer } = result
buffersRef.current[stem] = audioBuffer
if (audioBuffer.duration > maxDuration) {
maxDuration = audioBuffer.duration
}
newVolumes[stem] = 1
// Create effect chain for this stem
// Chain: source → gain → compressor → panner → (dry + wet reverb) → master
// 1. Gain node for volume control
gainsRef.current[stem] = ctx.createGain()
// 2. Compressor to reduce artifacts and tame dynamics
compressorsRef.current[stem] = ctx.createDynamicsCompressor()
compressorsRef.current[stem].threshold.value = -24 // Start compression at -24dB
compressorsRef.current[stem].knee.value = 30 // Soft knee
compressorsRef.current[stem].ratio.value = 4 // 4:1 ratio
compressorsRef.current[stem].attack.value = 0.003 // Fast attack (3ms)
compressorsRef.current[stem].release.value = 0.25 // Medium release (250ms)
// 3. Stereo panner for spatialization
pannersRef.current[stem] = ctx.createStereoPanner()
// Set default panning based on instrument type
const stemLower = stem.toLowerCase()
let defaultPan = 0
if (stemLower.includes('bass')) defaultPan = 0 // Center
else if (stemLower.includes('drum')) defaultPan = 0 // Center
else if (stemLower.includes('guitar')) defaultPan = -0.3 // Left
else if (stemLower.includes('synth')) defaultPan = 0.3 // Right
else if (stemLower.includes('keys')) defaultPan = 0.2 // Right
else if (stemLower.includes('vocal')) defaultPan = 0 // Center
else defaultPan = (Math.random() - 0.5) * 0.4 // Random slight pan
pannersRef.current[stem].pan.value = defaultPan
// 4. Reverb send (wet signal)
reverbSendsRef.current[stem] = ctx.createGain()
reverbSendsRef.current[stem].gain.value = 0 // Start with no reverb
// Connect the chain
gainsRef.current[stem].connect(compressorsRef.current[stem])
compressorsRef.current[stem].connect(pannersRef.current[stem])
// Split to dry (direct) and wet (reverb)
pannersRef.current[stem].connect(masterGainRef.current) // Dry signal
pannersRef.current[stem].connect(reverbSendsRef.current[stem]) // Wet signal to reverb
reverbSendsRef.current[stem].connect(convolverRef.current) // Reverb convolver
}
}
// Set initial reverb and pan values
const newReverbs = {}
const newPans = {}
Object.keys(buffersRef.current).forEach(stem => {
newReverbs[stem] = 0.15 // 15% reverb by default for studio sound
newPans[stem] = pannersRef.current[stem]?.pan.value || 0
})
setDuration(maxDuration)
setVolumes(newVolumes)
setReverbs(newReverbs)
setPans(newPans)
setSolos({})
setMutes({})
setIsLoaded(true)
pauseTimeRef.current = 0
const totalEnd = performance.now()
console.log('=== STEM LOADING COMPLETE ===')
console.log(`Total loading time: ${(totalEnd - totalStart).toFixed(0)}ms`)
console.log(`Duration: ${maxDuration.toFixed(1)}s`)
}, [initAudio])
// Load stems directly from cached IndexedDB bytes — no network fetch needed.
// stemsData shape: { stemName: { bytes: ArrayBuffer, sampleRate, numChannels, numFrames } }
const loadStemsFromBytes = useCallback(async (stemsData) => {
const totalStart = performance.now()
console.log('=== STEM LOADING FROM CACHE START ===')
const ctx = initAudio()
// Disconnect existing audio graph nodes
Object.values(gainsRef.current).forEach(g => g?.disconnect())
Object.values(compressorsRef.current).forEach(c => c?.disconnect())
Object.values(pannersRef.current).forEach(p => p?.disconnect())
Object.values(reverbGainsRef.current).forEach(r => r?.disconnect())
Object.values(reverbSendsRef.current).forEach(r => r?.disconnect())
buffersRef.current = {}
gainsRef.current = {}
compressorsRef.current = {}
pannersRef.current = {}
reverbGainsRef.current = {}
reverbSendsRef.current = {}
setIsLoaded(false)
const newVolumes = {}
let maxDuration = 0
for (const [stem, { bytes, sampleRate, numChannels, numFrames }] of Object.entries(stemsData)) {
const int16 = new Int16Array(bytes)
const float32 = new Float32Array(int16.length)
for (let i = 0; i < int16.length; i++) {
float32[i] = int16[i] / 32767
}
const audioBuffer = ctx.createBuffer(numChannels, numFrames, sampleRate)
audioBuffer.copyToChannel(float32, 0)
buffersRef.current[stem] = audioBuffer
if (audioBuffer.duration > maxDuration) maxDuration = audioBuffer.duration
newVolumes[stem] = 1
// Effect chain: source → gain → compressor → panner → (dry + wet reverb) → master
gainsRef.current[stem] = ctx.createGain()
compressorsRef.current[stem] = ctx.createDynamicsCompressor()
compressorsRef.current[stem].threshold.value = -24
compressorsRef.current[stem].knee.value = 30
compressorsRef.current[stem].ratio.value = 4
compressorsRef.current[stem].attack.value = 0.003
compressorsRef.current[stem].release.value = 0.25
pannersRef.current[stem] = ctx.createStereoPanner()
const stemLower = stem.toLowerCase()
let defaultPan = 0
if (stemLower.includes('bass')) defaultPan = 0
else if (stemLower.includes('drum')) defaultPan = 0
else if (stemLower.includes('guitar')) defaultPan = -0.3
else if (stemLower.includes('synth')) defaultPan = 0.3
else if (stemLower.includes('keys')) defaultPan = 0.2
else if (stemLower.includes('vocal')) defaultPan = 0
else defaultPan = (Math.random() - 0.5) * 0.4
pannersRef.current[stem].pan.value = defaultPan
reverbSendsRef.current[stem] = ctx.createGain()
reverbSendsRef.current[stem].gain.value = 0
gainsRef.current[stem].connect(compressorsRef.current[stem])
compressorsRef.current[stem].connect(pannersRef.current[stem])
pannersRef.current[stem].connect(masterGainRef.current)
pannersRef.current[stem].connect(reverbSendsRef.current[stem])
reverbSendsRef.current[stem].connect(convolverRef.current)
}
const newReverbs = {}
const newPans = {}
Object.keys(buffersRef.current).forEach(stem => {
newReverbs[stem] = 0.15
newPans[stem] = pannersRef.current[stem]?.pan.value || 0
})
setDuration(maxDuration)
setVolumes(newVolumes)
setReverbs(newReverbs)
setPans(newPans)
setSolos({})
setMutes({})
setIsLoaded(true)
pauseTimeRef.current = 0
console.log(`=== STEM LOADING FROM CACHE COMPLETE in ${(performance.now() - totalStart).toFixed(0)}ms ===`)
}, [initAudio])
// Get frequency data for visualization
const getAnalyserData = useCallback(() => {
if (!analyserRef.current) return null
const data = new Uint8Array(analyserRef.current.frequencyBinCount)
analyserRef.current.getByteFrequencyData(data)
return data
}, [])
// Use refs to avoid stale closures in animation loop
const isPlayingRef = useRef(false)
const durationRef = useRef(0)
// Keep refs in sync with state
useEffect(() => {
isPlayingRef.current = isPlaying
}, [isPlaying])
useEffect(() => {
durationRef.current = duration
}, [duration])
// Animation loop for visualization and time updates
useEffect(() => {
let running = true
const updateLoop = (timestamp) => {
if (!running) return
// Update analyser data every frame (60fps for smooth visualization)
setAnalyserData(getAnalyserData())
// Update current time - throttled to every 100ms for performance
if (audioContextRef.current && isPlayingRef.current) {
const elapsed = audioContextRef.current.currentTime - startTimeRef.current + pauseTimeRef.current
const rawRegion = rawRegionRef.current
// For raw region loops, treat region end as the effective song end
const effectiveDur = rawRegion ? rawRegion.end : durationRef.current
const newTime = Math.min(elapsed, effectiveDur)
// Only update state every 100ms (10fps) to reduce re-renders
const timeSinceLastUpdate = timestamp - lastTimeUpdateRef.current
if (timeSinceLastUpdate >= 100) {
setCurrentTime(newTime)
lastTimeUpdateRef.current = timestamp
}
// Check if playback ended
if (elapsed >= effectiveDur && effectiveDur > 0) {
if (loopRef.current) {
// Loop: restart from region start (or song beginning)
Object.values(sourcesRef.current).forEach(source => {
try { source.stop() } catch (e) {}
})
sourcesRef.current = {}
const loopStart = rawRegion ? rawRegion.start : 0
const loopDur = rawRegion ? (rawRegion.end - rawRegion.start) : undefined
pauseTimeRef.current = loopStart
setCurrentTime(loopStart)
const ctx = audioContextRef.current
Object.entries(buffersRef.current).forEach(([stem, buffer]) => {
if (!buffer || !gainsRef.current[stem]) return
const source = ctx.createBufferSource()
source.buffer = buffer
source.connect(gainsRef.current[stem])
sourcesRef.current[stem] = source
source.start(0, loopStart, loopDur)
})
startTimeRef.current = ctx.currentTime
} else {
// Stop playback
Object.values(sourcesRef.current).forEach(source => {
try { source.stop() } catch (e) {}
})
sourcesRef.current = {}
pauseTimeRef.current = 0
startTimeRef.current = 0
setCurrentTime(0)
setIsPlaying(false)
return
}
}
}
animationRef.current = requestAnimationFrame(updateLoop)
}
// Start the loop
animationRef.current = requestAnimationFrame(updateLoop)
return () => {
running = false
if (animationRef.current) {
cancelAnimationFrame(animationRef.current)
}
}
}, [getAnalyserData])
// Update gain nodes based on volume/solo/mute state (instant, no server call)
useEffect(() => {
const hasSolos = Object.values(solos).some(s => s)
Object.entries(gainsRef.current).forEach(([stem, gain]) => {
if (!gain) return
const volume = volumes[stem] ?? 1
const isMuted = mutes[stem] ?? false
const isSolo = solos[stem] ?? false
// Solo/Mute logic per spec:
// - Mute: set gain to 0 regardless
// - Solo: only soloed stems play (set others to 0)
// - Mute overrides solo
let targetGain = volume
if (isMuted) {
targetGain = 0
} else if (hasSolos && !isSolo) {
targetGain = 0
}
gain.gain.setValueAtTime(targetGain, audioContextRef.current?.currentTime || 0)
})
}, [volumes, solos, mutes])
// Update reverb and pan in real-time
useEffect(() => {
Object.entries(reverbSendsRef.current).forEach(([stem, reverbSend]) => {
if (!reverbSend) return
const reverbAmount = reverbs[stem] ?? 0.15
reverbSend.gain.setValueAtTime(reverbAmount, audioContextRef.current?.currentTime || 0)
})
Object.entries(pannersRef.current).forEach(([stem, panner]) => {
if (!panner) return
const panValue = pans[stem] ?? 0
panner.pan.setValueAtTime(panValue, audioContextRef.current?.currentTime || 0)
})
}, [reverbs, pans])
// Play: starts all sources simultaneously
const play = useCallback(async () => {
if (!isLoaded || Object.keys(buffersRef.current).length === 0) {
console.log('Cannot play: no stems loaded')
return
}
const ctx = initAudio()
// Resume context if suspended (browser autoplay policy)
// Must await — source.start() on a suspended context is silent in Firefox/Safari
if (ctx.state === 'suspended') {
await ctx.resume()
}
// Stop any existing sources
Object.values(sourcesRef.current).forEach(source => {
try { source.stop() } catch (e) {}
})
sourcesRef.current = {}
// Create new source nodes for each stem (AudioBufferSourceNode is one-shot)
const rawRegion = rawRegionRef.current
Object.entries(buffersRef.current).forEach(([stem, buffer]) => {
if (!buffer || !gainsRef.current[stem]) return
const source = ctx.createBufferSource()
source.buffer = buffer
source.connect(gainsRef.current[stem])
sourcesRef.current[stem] = source
if (rawRegion) {
// Confine playback to the selected region
const remaining = Math.max(0, rawRegion.end - pauseTimeRef.current)
source.start(0, pauseTimeRef.current, remaining)
} else {
source.start(0, pauseTimeRef.current)
}
})
startTimeRef.current = ctx.currentTime
isPlayingRef.current = true // Update ref immediately (no waiting for useEffect)
setIsPlaying(true)
console.log('Play started, startTime:', startTimeRef.current)
}, [initAudio, isLoaded])
// Pause: suspends playback, saves position
const pause = useCallback(() => {
console.log('Pause called')
isPlayingRef.current = false // Update ref immediately
// Stop all sources
Object.values(sourcesRef.current).forEach(source => {
try { source.stop() } catch (e) {}
})
sourcesRef.current = {}
// Save pause position
if (audioContextRef.current && startTimeRef.current) {
pauseTimeRef.current += audioContextRef.current.currentTime - startTimeRef.current
}
setIsPlaying(false)
}, [])
// Stop: stops all sources, resets position
const stop = useCallback(() => {
Object.values(sourcesRef.current).forEach(source => {
try { source.stop() } catch (e) {}
})
sourcesRef.current = {}
pauseTimeRef.current = 0
startTimeRef.current = 0
setCurrentTime(0)
setIsPlaying(false)
}, [])
// Seek: stop and restart from given position
const seek = useCallback((time) => {
const wasPlaying = isPlaying
// Optimistic update: set position immediately for instant visual feedback
const newPosition = Math.max(0, Math.min(time, duration))
pauseTimeRef.current = newPosition
setCurrentTime(newPosition)
// Stop current playback
Object.values(sourcesRef.current).forEach(source => {
try { source.stop() } catch (e) {}
})
sourcesRef.current = {}
setIsPlaying(false)
// Resume if was playing (immediately, no delay)
if (wasPlaying) {
play()
}
}, [isPlaying, duration, play])
// setVolume: sets GainNode value (0-1), instant, no server call
const setVolume = useCallback((stemName, value) => {
setVolumes(prev => ({ ...prev, [stemName]: value }))
}, [])
// setSolo
const setSolo = useCallback((stemName, soloed) => {
setSolos(prev => ({ ...prev, [stemName]: soloed }))
}, [])
// setMute
const setMute = useCallback((stemName, muted) => {
setMutes(prev => ({ ...prev, [stemName]: muted }))
}, [])
// setReverb: sets reverb amount (0-1), instant
const setReverb = useCallback((stemName, value) => {
setReverbs(prev => ({ ...prev, [stemName]: value }))
}, [])
// setPan: sets stereo position (-1 to 1), instant
const setPan = useCallback((stemName, value) => {
setPans(prev => ({ ...prev, [stemName]: value }))
}, [])
// Set loop mode
const setLoop = useCallback((enabled) => {
loopRef.current = enabled
}, [])
// Set raw region for looping a selection without processing.
// When active, play() confines audio to [start, end] and loops there.
const setRawRegion = useCallback((start, end) => {
if (start !== null && end !== null) {
rawRegionRef.current = { start, end }
setIsRawRegionActive(true)
// Snap playhead to region start if it's currently outside the region
if (pauseTimeRef.current < start || pauseTimeRef.current >= end) {
pauseTimeRef.current = start
setCurrentTime(start)
}
} else {
rawRegionRef.current = null
setIsRawRegionActive(false)
}
}, [])
// Clear cached AudioBuffers for a specific tag ('full', 'region', or all)
const clearBufferCache = useCallback((tag = null) => {
if (tag) {
const suffix = `_${tag}`
for (const key of Object.keys(bufferCacheRef.current)) {
if (key.endsWith(suffix)) {
delete bufferCacheRef.current[key]
}
}
console.log(`Buffer cache cleared for tag: ${tag}`)
} else {
bufferCacheRef.current = {}
console.log('Buffer cache fully cleared')
}
}, [])
// Reset all volumes to 1.0
const resetVolumes = useCallback(() => {
const reset = {}
Object.keys(buffersRef.current).forEach(stem => {
reset[stem] = 1
})
setVolumes(reset)
setSolos({})
setMutes({})
}, [])
return {
// State
isPlaying,
isLoaded,
isRawRegionActive,
currentTime,
duration,
volumes,
solos,
mutes,
reverbs,
pans,
analyserData,
// Methods
loadStems,
loadStemsFromBytes,
play,
pause,
stop,
seek,
setVolume,
setSolo,
setMute,
setReverb,
setPan,
resetVolumes,
setLoop,
setRawRegion,
clearBufferCache,
getAnalyserData
}
}