humair025 commited on
Commit
fa7ee39
·
verified ·
1 Parent(s): 12d2fec

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -12
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import subprocess
2
  import sys
3
 
4
- # Install neucodec if not already installed
5
  try:
6
  import neucodec
7
  except ImportError:
@@ -14,28 +14,32 @@ import torch
14
  import torchaudio
15
  from torchaudio import transforms as T
16
  from neucodec import DistillNeuCodec
 
 
 
17
 
18
  # Load model on CPU
19
  model = DistillNeuCodec.from_pretrained("neuphonic/distill-neucodec")
20
  model.eval() # CPU only
21
 
22
  def reconstruct_audio(audio_file):
23
- # Load uploaded audio
24
- y, sr = torchaudio.load(audio_file)
 
 
 
 
25
 
26
- # Resample if needed
27
- if sr != 16_000:
28
- y = T.Resample(sr, 16_000)(y)
29
- y = y[None, ...] # Add batch dim (B, 1, T)
30
-
31
- # Encode and decode on CPU
32
  with torch.no_grad():
33
  fsq_codes = model.encode_code(y)
34
  recon = model.decode_code(fsq_codes)
35
 
36
- # Save to temporary file
 
 
37
  recon_path = "reconstructed.wav"
38
- torchaudio.save(recon_path, recon[0], 24_000)
39
 
40
  return recon_path
41
 
@@ -44,7 +48,7 @@ iface = gr.Interface(
44
  fn=reconstruct_audio,
45
  inputs=gr.Audio(type="filepath", label="Upload Audio"),
46
  outputs=gr.Audio(type="filepath", label="Reconstructed Audio"),
47
- title="Audio Reconstruction with DistillNeuCodec (CPU)",
48
  description="Upload any audio file, and this app will reconstruct it using DistillNeuCodec at 24kHz on CPU."
49
  )
50
 
 
1
  import subprocess
2
  import sys
3
 
4
+ # Auto-install neucodec if missing
5
  try:
6
  import neucodec
7
  except ImportError:
 
14
  import torchaudio
15
  from torchaudio import transforms as T
16
  from neucodec import DistillNeuCodec
17
+ import librosa
18
+ import soundfile as sf
19
+ import numpy as np
20
 
21
  # Load model on CPU
22
  model = DistillNeuCodec.from_pretrained("neuphonic/distill-neucodec")
23
  model.eval() # CPU only
24
 
25
  def reconstruct_audio(audio_file):
26
+ # Load audio with librosa (avoids torchcodec issues)
27
+ y, sr = librosa.load(audio_file, sr=None, mono=True) # Keep original sr
28
+ if sr != 16000:
29
+ y = librosa.resample(y, orig_sr=sr, target_sr=16000)
30
+ sr = 16000
31
+ y = torch.from_numpy(y).unsqueeze(0).unsqueeze(0) # (1, 1, T)
32
 
33
+ # Encode & decode
 
 
 
 
 
34
  with torch.no_grad():
35
  fsq_codes = model.encode_code(y)
36
  recon = model.decode_code(fsq_codes)
37
 
38
+ recon = recon.squeeze().cpu().numpy()
39
+
40
+ # Save reconstructed audio
41
  recon_path = "reconstructed.wav"
42
+ sf.write(recon_path, recon, 24000)
43
 
44
  return recon_path
45
 
 
48
  fn=reconstruct_audio,
49
  inputs=gr.Audio(type="filepath", label="Upload Audio"),
50
  outputs=gr.Audio(type="filepath", label="Reconstructed Audio"),
51
+ title="Audio Reconstruction with DistillNeuCodec (CPU + Librosa)",
52
  description="Upload any audio file, and this app will reconstruct it using DistillNeuCodec at 24kHz on CPU."
53
  )
54