|
|
import gradio as gr |
|
|
from transformers import pipeline |
|
|
|
|
|
|
|
|
txt2img = pipeline("text-to-image", model="stabilityai/stable-diffusion-3") |
|
|
txt2speech = pipeline("text-to-speech", model="suno-ai/bark") |
|
|
|
|
|
|
|
|
def multimodal_boost(input_text): |
|
|
|
|
|
img = txt2img(input_text)[0] |
|
|
aud = txt2speech(input_text)[0] |
|
|
return img, (aud["audio"], "audio/wav") |
|
|
|
|
|
|
|
|
iface = gr.Interface( |
|
|
multimodal_boost, |
|
|
gr.Textbox(label="Digite ou fale"), |
|
|
outputs=["image", "audio"], |
|
|
live=True, |
|
|
description="Boost Multimodal HF: Texto para Imagem e Áudio, tudo livre!" |
|
|
) |
|
|
iface.launch() |