Spaces:

ak6868674
/

TTS-ASMR

Sleeping

App Files Files Community

TTS-ASMR / app.py

ak6868674

Update app.py

ba530c7 verified 7 months ago

raw

history blame contribute delete

3.22 kB

	import gradio as gr
	import torch
	from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
	import soundfile as sf
	from pydub import AudioSegment
	import os
	import requests

	# Load SpeechT5 models
	processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
	model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
	vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")

	# Generate a random but fixed speaker embedding
	speaker_embeddings = torch.rand(1, 512)

	# Rain background sound
	DEFAULT_RAIN = "rain.mp3"
	RAIN_URL = "https://cdn.pixabay.com/download/audio/2022/03/15/audio_7e9f0b47b6.mp3?filename=gentle-rain-ambient-11022.mp3"

	if not os.path.exists(DEFAULT_RAIN):
	try:
	r = requests.get(RAIN_URL)
	with open(DEFAULT_RAIN, "wb") as f:
	f.write(r.content)
	except Exception as e:
	print(f"Error downloading rain: {e}")

	def generate_audio(prompt, emotion, speed, background_audio):
	if not prompt:
	raise gr.Error("Text cannot be empty.")

	# Add ASMR effect for calm emotion
	if emotion == "calm":
	prompt = "... " + prompt.replace(".", "... ")

	inputs = processor(text=prompt, return_tensors="pt")
	with torch.no_grad():
	speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder)

	temp_wav = "voice.wav"
	sf.write(temp_wav, speech.numpy(), samplerate=16000)

	# Load audio and apply adjustments
	final_audio = AudioSegment.from_file(temp_wav)

	# Adjust speed for ASMR
	if speed != 1.0:
	final_audio = final_audio._spawn(final_audio.raw_data, overrides={
	"frame_rate": int(final_audio.frame_rate * speed)
	}).set_frame_rate(final_audio.frame_rate)

	# Add background rain or user-uploaded audio
	try:
	if background_audio:
	bg = AudioSegment.from_file(background_audio).apply_gain(-20)
	else:
	bg = AudioSegment.from_file(DEFAULT_RAIN).apply_gain(-25)
	bg = bg[:len(final_audio)]
	final_audio = final_audio.overlay(bg)
	except Exception as e:
	print(f"Background merge failed: {e}")

	output_path = "final_output.mp3"
	final_audio.export(output_path, format="mp3")
	return output_path, "✅ Audio generated successfully!"

	# Gradio UI
	with gr.Blocks() as app:
	gr.Markdown("# 🎧 Midnight History ASMR TTS")
	gr.Markdown("Convert your text into soothing ASMR audio with background rain.")

	with gr.Row():
	with gr.Column():
	text_input = gr.Textbox(label="Enter Text", placeholder="Paste your script...", lines=8)
	emotion_choice = gr.Dropdown(["calm", "neutral"], value="calm", label="Emotion")
	speed_slider = gr.Slider(0.7, 1.3, value=0.9, step=0.05, label="Speed")
	bg_audio = gr.Audio(label="Upload Background (Optional)", type="filepath")
	btn = gr.Button("Generate")
	with gr.Column():
	audio_out = gr.Audio(label="Output", type="filepath")
	status = gr.Textbox(label="Status")

	btn.click(generate_audio, [text_input, emotion_choice, speed_slider, bg_audio], [audio_out, status])

	app.launch(share=True)