Spaces:

Sayiqa
/

stabilityai-stable-diffusion-3.5-large

Runtime error

App Files Files Community

Sayiqa commited on Dec 15, 2024

Commit

5a916c5

verified ·

1 Parent(s): 90b8ddc

Update app.py

Browse files

Files changed (1) hide show

app.py +63 -2

app.py CHANGED Viewed

@@ -1,4 +1,65 @@
 import gradio as gr
-gr.load("models/stabilityai/stable-diffusion-3.5-large").launch()
-gr.load("tts_models/multilingual/multi-dataset/xtts_v2").launch()

+# import gradio as gr
+# gr.load("models/stabilityai/stable-diffusion-3.5-large").launch()
+# gr.load("tts_models/multilingual/multi-dataset/xtts_v2").launch()
 import gradio as gr
+from transformers import pipeline
+from diffusers import StableDiffusionPipeline
+import torch
+# Load the speech-to-text model (OpenAI Whisper)
+speech_to_text = pipeline("automatic-speech-recognition", model="openai/whisper-base")
+# Load the Stable Diffusion model
+text_to_image = StableDiffusionPipeline.from_pretrained(
+    "stabilityai/stable-diffusion-3.5-large", torch_dtype=torch.float16
+).to("cuda" if torch.cuda.is_available() else "cpu")
+# Function to transcribe audio
+def transcribe_audio(audio_file):
+    try:
+        result = speech_to_text(audio_file)
+        transcription = result["text"]
+        return transcription
+    except Exception as e:
+        return f"Error in transcription: {str(e)}"
+# Function to generate image from text
+def generate_image_from_text(prompt):
+    try:
+        image = text_to_image(prompt).images[0]  # Generate one image
+        return image
+    except Exception as e:
+        return f"Error in image generation: {str(e)}"
+# Combined function: Transcribe and generate image
+def process_audio_and_generate_image(audio_file):
+    transcription = transcribe_audio(audio_file)
+    if "Error" in transcription:
+        return None, transcription
+    image = generate_image_from_text(transcription)
+    if isinstance(image, str) and "Error" in image:
+        return None, image
+    return image, transcription
+# Gradio interface
+interface = gr.Interface(
+    fn=process_audio_and_generate_image,
+    inputs=gr.Audio(type="filepath", label="Upload an Audio File (WAV/MP3)"),
+    outputs=[
+        gr.Image(label="Generated Image"),
+        gr.Textbox(label="Transcription"),
+    ],
+    title="Voice-to-Image Generator",
+    description="Upload an audio file to transcribe speech to text and generate an image based on the transcription.",
+)
+# Launch the interface
+interface.launch()