Spaces:

jonloporto
/

ImageToVoiceForClass

Sleeping

App Files Files Community

ImageToVoiceForClass / app.py

jonloporto

Upload 3 files

3c5d69c verified 24 days ago

raw

history blame contribute delete

2.34 kB

	# -- coding: utf-8 --
	"""
	Image to Voice - Hugging Face Spaces
	Converts images to text and then to speech
	"""

	import gradio as gr
	from supertonic import TTS
	from transformers import pipeline

	# Initialize the image-to-text pipeline
	image_to_text = pipeline("image-to-text")

	# Initialize TTS (will be loaded on first use)
	tts = None

	def get_tts():
	"""Lazy load TTS to avoid loading on startup"""
	global tts
	if tts is None:
	tts = TTS(auto_download=True)
	return tts

	def image_to_voice(image):
	"""
	Convert image to text and then to speech

	Args:
	image: PIL Image or numpy array from Gradio

	Returns:
	tuple: (audio_file_path, text_description)
	"""
	if image is None:
	return None, "Please upload an image."

	try:
	# Convert image to text
	result = image_to_text(image)
	text = result[0]['generated_text']

	# Convert text to speech
	tts_model = get_tts()
	style = tts_model.get_voice_style(voice_name="M5")
	wav, duration = tts_model.synthesize(text, voice_style=style)

	# Save audio to a temporary file
	output_path = "output.wav"
	tts_model.save_audio(wav, output_path)

	return output_path, text

	except Exception as e:
	return None, f"Error: {str(e)}"

	# Create Gradio interface
	with gr.Blocks(title="Image to Voice") as demo:
	gr.Markdown("# 🖼️ Image to Voice Converter")
	gr.Markdown("Upload an image and get an audio description of it!")

	with gr.Row():
	with gr.Column():
	image_input = gr.Image(type="pil", label="Upload Image")
	generate_btn = gr.Button("Generate Audio", variant="primary")

	with gr.Column():
	audio_output = gr.Audio(label="Generated Audio", type="filepath")
	text_output = gr.Textbox(label="Image Description", lines=5)

	generate_btn.click(
	fn=image_to_voice,
	inputs=image_input,
	outputs=[audio_output, text_output]
	)

	gr.Examples(
	examples=[],
	inputs=image_input,
	label="Example Images (add your own examples)"
	)

	if __name__ == "__main__":
	demo.launch()