TTS-ASMR / app.py
ak6868674's picture
Update app.py
ba530c7 verified
import gradio as gr
import torch
from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
import soundfile as sf
from pydub import AudioSegment
import os
import requests
# Load SpeechT5 models
processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
# Generate a random but fixed speaker embedding
speaker_embeddings = torch.rand(1, 512)
# Rain background sound
DEFAULT_RAIN = "rain.mp3"
RAIN_URL = "https://cdn.pixabay.com/download/audio/2022/03/15/audio_7e9f0b47b6.mp3?filename=gentle-rain-ambient-11022.mp3"
if not os.path.exists(DEFAULT_RAIN):
try:
r = requests.get(RAIN_URL)
with open(DEFAULT_RAIN, "wb") as f:
f.write(r.content)
except Exception as e:
print(f"Error downloading rain: {e}")
def generate_audio(prompt, emotion, speed, background_audio):
if not prompt:
raise gr.Error("Text cannot be empty.")
# Add ASMR effect for calm emotion
if emotion == "calm":
prompt = "... " + prompt.replace(".", "... ")
inputs = processor(text=prompt, return_tensors="pt")
with torch.no_grad():
speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder)
temp_wav = "voice.wav"
sf.write(temp_wav, speech.numpy(), samplerate=16000)
# Load audio and apply adjustments
final_audio = AudioSegment.from_file(temp_wav)
# Adjust speed for ASMR
if speed != 1.0:
final_audio = final_audio._spawn(final_audio.raw_data, overrides={
"frame_rate": int(final_audio.frame_rate * speed)
}).set_frame_rate(final_audio.frame_rate)
# Add background rain or user-uploaded audio
try:
if background_audio:
bg = AudioSegment.from_file(background_audio).apply_gain(-20)
else:
bg = AudioSegment.from_file(DEFAULT_RAIN).apply_gain(-25)
bg = bg[:len(final_audio)]
final_audio = final_audio.overlay(bg)
except Exception as e:
print(f"Background merge failed: {e}")
output_path = "final_output.mp3"
final_audio.export(output_path, format="mp3")
return output_path, "✅ Audio generated successfully!"
# Gradio UI
with gr.Blocks() as app:
gr.Markdown("# 🎧 Midnight History ASMR TTS")
gr.Markdown("Convert your text into soothing ASMR audio with background rain.")
with gr.Row():
with gr.Column():
text_input = gr.Textbox(label="Enter Text", placeholder="Paste your script...", lines=8)
emotion_choice = gr.Dropdown(["calm", "neutral"], value="calm", label="Emotion")
speed_slider = gr.Slider(0.7, 1.3, value=0.9, step=0.05, label="Speed")
bg_audio = gr.Audio(label="Upload Background (Optional)", type="filepath")
btn = gr.Button("Generate")
with gr.Column():
audio_out = gr.Audio(label="Output", type="filepath")
status = gr.Textbox(label="Status")
btn.click(generate_audio, [text_input, emotion_choice, speed_slider, bg_audio], [audio_out, status])
app.launch(share=True)