Upload app.py with huggingface_hub
Browse files
app.py
CHANGED
|
@@ -59,7 +59,7 @@ if not hasattr(torchaudio, "info"):
|
|
| 59 |
|
| 60 |
from df.enhance import enhance, init_df, load_audio, save_audio
|
| 61 |
|
| 62 |
-
# FORCE BUILD TRIGGER: 15:
|
| 63 |
|
| 64 |
# 🛠️ Monkeypatch torchaudio.load
|
| 65 |
try:
|
|
@@ -187,6 +187,7 @@ def core_process(request_dict):
|
|
| 187 |
return {"error": f"Unknown action: {action}"}
|
| 188 |
|
| 189 |
def create_wav_header(sample_rate=24000, channels=1, bit_depth=16):
|
|
|
|
| 190 |
header = bytearray(b'RIFF')
|
| 191 |
header.extend((1000000000).to_bytes(4, 'little'))
|
| 192 |
header.extend(b'WAVEfmt ')
|
|
@@ -206,7 +207,7 @@ def create_wav_header(sample_rate=24000, channels=1, bit_depth=16):
|
|
| 206 |
def gpu_tts_generator(text, lang, speaker_wav_path):
|
| 207 |
load_models()
|
| 208 |
try:
|
| 209 |
-
yield create_wav_header(sample_rate=24000)
|
| 210 |
# inference_stream is a generator
|
| 211 |
for chunk in MODELS["tts"].synthesizer.tts_model.inference_stream(
|
| 212 |
text,
|
|
@@ -214,7 +215,7 @@ def gpu_tts_generator(text, lang, speaker_wav_path):
|
|
| 214 |
*MODELS["tts"].synthesizer.tts_model.get_conditioning_latents(audio_path=[speaker_wav_path]),
|
| 215 |
stream_chunk_size=20
|
| 216 |
):
|
| 217 |
-
yield (chunk * 32767).to(torch.int16).cpu().numpy().tobytes()
|
| 218 |
print("✨ [Generator Complete]")
|
| 219 |
except Exception as e:
|
| 220 |
print(f"❌ [Generator Error]: {e}")
|
|
|
|
| 59 |
|
| 60 |
from df.enhance import enhance, init_df, load_audio, save_audio
|
| 61 |
|
| 62 |
+
# FORCE BUILD TRIGGER: 15:45:00 Jan 20 2026
|
| 63 |
|
| 64 |
# 🛠️ Monkeypatch torchaudio.load
|
| 65 |
try:
|
|
|
|
| 187 |
return {"error": f"Unknown action: {action}"}
|
| 188 |
|
| 189 |
def create_wav_header(sample_rate=24000, channels=1, bit_depth=16):
|
| 190 |
+
"""Returns a standard WAV header as standard BYTES"""
|
| 191 |
header = bytearray(b'RIFF')
|
| 192 |
header.extend((1000000000).to_bytes(4, 'little'))
|
| 193 |
header.extend(b'WAVEfmt ')
|
|
|
|
| 207 |
def gpu_tts_generator(text, lang, speaker_wav_path):
|
| 208 |
load_models()
|
| 209 |
try:
|
| 210 |
+
yield bytes(create_wav_header(sample_rate=24000))
|
| 211 |
# inference_stream is a generator
|
| 212 |
for chunk in MODELS["tts"].synthesizer.tts_model.inference_stream(
|
| 213 |
text,
|
|
|
|
| 215 |
*MODELS["tts"].synthesizer.tts_model.get_conditioning_latents(audio_path=[speaker_wav_path]),
|
| 216 |
stream_chunk_size=20
|
| 217 |
):
|
| 218 |
+
yield bytes((chunk * 32767).to(torch.int16).cpu().numpy().tobytes())
|
| 219 |
print("✨ [Generator Complete]")
|
| 220 |
except Exception as e:
|
| 221 |
print(f"❌ [Generator Error]: {e}")
|