File size: 631 Bytes
345b405
5a916c5
88e81cb
54f265c
5a916c5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29fbf15
5a916c5
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
import gradio as gr

#gr.load("models/stabilityai/stable-diffusion-3.5-large").launch()
gr.load("tts_models/multilingual/multi-dataset/xtts_v2").launch()




# Gradio interface
interface = gr.Interface(
    fn=process_audio_and_generate_image,
    inputs=gr.Audio(type="filepath", label="Upload an Audio File (WAV/MP3)"),
    outputs=[
        gr.Image(label="Generated Image"),
        gr.Textbox(label="Transcription"),
    ],
    title="Voice-to-Image Generator",
    description="Upload an audio file to transcribe speech to text and generate an image based on the transcription.",
)

# Launch the interface
interface.launch()