Spaces:

abersbail
/

au

Sleeping

App Files Files Community

au / app.py

abersbail

Upload 3 files

3469362 verified 21 days ago

raw

history blame contribute delete

1.81 kB

	import gradio as gr
	from huggingface_hub import InferenceClient
	import torch

	# Using Hugging Face Inference API for speed and no local GPU requirement
	# Image Model: FLUX.1-schnell (State-of-the-art fast generation)
	# TTS Model: facebook/mms-tts-eng (Simple, reliable TTS)

	client = InferenceClient()

	def generate_all(text):
	# 1. Generate Image
	print(f"Generating image for: {text}")
	image = client.text_to_image(text, model="black-forest-labs/FLUX.1-schnell")

	# 2. Generate Audio (TTS)
	print(f"Generating audio for: {text}")
	# We'll use a widely available TTS model via the API
	audio_response = client.text_to_speech(text, model="facebook/mms-tts-eng")

	# Save audio to a temporary file for Gradio to play
	audio_path = "output.wav"
	with open(audio_path, "wb") as f:
	f.write(audio_response)

	return image, audio_path

	# Create the UI
	with gr.Blocks(title="AI Image & Voice Creator") as demo:
	gr.Markdown("# 🎨 AI Image & Voice Creator")
	gr.Markdown("Type a prompt below to generate an image and hear it spoken!")

	with gr.Row():
	with gr.Column():
	input_text = gr.Textbox(label="Enter your prompt", placeholder="A futuristic city at sunset...")
	btn = gr.Button("Generate ✨", variant="primary")

	with gr.Row():
	output_img = gr.Image(label="Generated Image")
	output_audio = gr.Audio(label="Spoken Prompt", type="filepath")

	btn.click(fn=generate_all, inputs=input_text, outputs=[output_img, output_audio])

	gr.Examples(
	examples=["A cute robot painting a masterpiece", "A mysterious forest with glowing mushrooms"],
	inputs=input_text
	)

	if __name__ == "__main__":
	demo.launch()