Spaces:

Nick021402
/

Text2podcast

Build error

App Files Files Community

Text2podcast / app.py

Nick021402

Update app.py

7eae664 verified 9 months ago

raw

history blame contribute delete

3.6 kB

	import re
	import numpy as np
	from transformers import pipeline
	import gradio as gr

	# Available voices and their corresponding models
	VOICES = {
	"Amy (Female)": "microsoft/vits-piper-en-us-amy",
	"Joe (Male)": "microsoft/vits-piper-en-us-joe",
	"Clara (Female)": "microsoft/vits-piper-en-us-clb",
	"Ryan (Male)": "microsoft/vits-piper-en-us-jvs"
	}

	def parse_segments(text):
	"""Parse input text for speaker segments with improved validation"""
	pattern = re.compile(r'$$(?P<speaker>[^$$]+?)$$(?P<text>.*?)$$/(?P=speaker)$$', re.DOTALL)
	matches = list(pattern.finditer(text))

	# Validate speaker names and collect results
	valid_segments = []
	for match in matches:
	speaker = match.group('speaker')
	if speaker in VOICES:
	valid_segments.append((speaker, match.group('text').strip()))

	# Find any invalid segments
	if len(matches) < len(text.strip()):
	return valid_segments, f"Warning: Found {len(matches)} valid segments, but text contains untagged content or invalid speaker names"

	return valid_segments, None

	def generate_podcast(input_text):
	"""Convert text to podcast with multiple voices"""
	try:
	segments, warning = parse_segments(input_text)

	if not segments:
	return (22050, np.zeros(0)), "No valid speaker segments found. Please use the format: [Speaker Name]text[/Speaker Name]"

	all_audio = []
	current_pipe = None
	current_model = ""

	for speaker, text in segments:
	model_name = VOICES[speaker]

	# Load model only when needed
	if current_model != model_name:
	if current_pipe: del current_pipe
	current_pipe = pipeline("text-to-speech", model=model_name)
	current_model = model_name

	# Generate audio for this segment
	output = current_pipe(text)
	all_audio.append(output["audio"])

	# Combine all audio segments with short pauses
	final_audio = np.concatenate([np.concatenate((audio, np.zeros(5000))) for audio in all_audio])

	status = "Podcast generated successfully!"
	if warning:
	status += " " + warning

	return (output["sampling_rate"], final_audio), status

	except Exception as e:
	return (22050, np.zeros(0)), f"Error: {str(e)}"

	# Create Gradio interface
	def podcast_interface(text):
	(sr, audio), status = generate_podcast(text)
	return (sr, audio) if audio.size > 0 else gr.update(), status

	demo = gr.Interface(
	fn=podcast_interface,
	inputs=gr.Textbox(
	label="Input Text with Speaker Tags",
	lines=12,
	placeholder="""Example format:
	[Amy (Female)]Welcome to our podcast![/Amy (Female)]
	[Joe (Male)]Today we're discussing AI innovations.[/Joe (Male)]"""
	),
	outputs=[
	gr.Audio(label="Generated Podcast", type="numpy"),
	gr.Textbox(label="Status", value="Ready")
	],
	examples=[
	["""[Amy (Female)]Welcome to our podcast![/Amy (Female)]
	[Joe (Male)]Today we're discussing AI innovations.[/Joe (Male)]
	[Clara (Female)]This is Clara speaking![/Clara (Female)]"""]
	],
	title="🎙️ Multi-Voice Podcast Generator",
	description="Generate podcasts with multiple free AI voices using Microsoft's Piper TTS models. Use [SpeakerName] tags to assign different voices to different text segments.",
	theme="soft",
	allow_flagging="never"
	)

	if __name__ == "__main__":
	demo.launch()