| import gradio as gr
|
| from huggingface_hub import InferenceClient
|
| import torch
|
|
|
|
|
|
|
|
|
|
|
| client = InferenceClient()
|
|
|
| def generate_all(text):
|
|
|
| print(f"Generating image for: {text}")
|
| image = client.text_to_image(text, model="black-forest-labs/FLUX.1-schnell")
|
|
|
|
|
| print(f"Generating audio for: {text}")
|
|
|
| audio_response = client.text_to_speech(text, model="facebook/mms-tts-eng")
|
|
|
|
|
| audio_path = "output.wav"
|
| with open(audio_path, "wb") as f:
|
| f.write(audio_response)
|
|
|
| return image, audio_path
|
|
|
|
|
| with gr.Blocks(title="AI Image & Voice Creator") as demo:
|
| gr.Markdown("# 🎨 AI Image & Voice Creator")
|
| gr.Markdown("Type a prompt below to generate an image and hear it spoken!")
|
|
|
| with gr.Row():
|
| with gr.Column():
|
| input_text = gr.Textbox(label="Enter your prompt", placeholder="A futuristic city at sunset...")
|
| btn = gr.Button("Generate ✨", variant="primary")
|
|
|
| with gr.Row():
|
| output_img = gr.Image(label="Generated Image")
|
| output_audio = gr.Audio(label="Spoken Prompt", type="filepath")
|
|
|
| btn.click(fn=generate_all, inputs=input_text, outputs=[output_img, output_audio])
|
|
|
| gr.Examples(
|
| examples=["A cute robot painting a masterpiece", "A mysterious forest with glowing mushrooms"],
|
| inputs=input_text
|
| )
|
|
|
| if __name__ == "__main__":
|
| demo.launch()
|
|
|