Spaces:

Sayiqa
/

voice_clone

Runtime error

Sayiqa commited on Dec 15, 2024

Commit

b27f82d

verified ·

1 Parent(s): 6b5e201

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,6 +1,8 @@
 import gradio as gr
 import requests
 import time
 # AssemblyAI API Key
 ASSEMBLYAI_API_KEY = "YOUR_ASSEMBLYAI_API_KEY"
@@ -67,6 +69,15 @@ def generate_image_from_text(text):
     else:
         return "Failed to generate image."
 # Gradio Interface function
 def process_audio(audio_file):
     # Convert speech to text
@@ -74,15 +85,20 @@ def process_audio(audio_file):
     if text and text != "Error uploading audio." and text != "Error requesting transcription.":
         # Generate image from the transcribed text
         image_url = generate_image_from_text(text)
-        return image_url
     else:
         return "Error processing audio."
 # Set up Gradio interface
 iface = gr.Interface(fn=process_audio,
-                     inputs=gr.Audio(type="filepath"),  # Changed to 'filepath' for Gradio 3.x
-                     outputs=gr.Image(type="url"),
                      live=True,
                      title="Speech-to-Text to Image Generator")
 iface.launch()

 import gradio as gr
 import requests
 import time
+from PIL import Image
+from io import BytesIO
 # AssemblyAI API Key
 ASSEMBLYAI_API_KEY = "YOUR_ASSEMBLYAI_API_KEY"
     else:
         return "Failed to generate image."
+# Function to download image from URL and return as a PIL image
+def get_image_from_url(image_url):
+    try:
+        response = requests.get(image_url)
+        img = Image.open(BytesIO(response.content))
+        return img
+    except Exception as e:
+        return "Error downloading image: " + str(e)
 # Gradio Interface function
 def process_audio(audio_file):
     # Convert speech to text
     if text and text != "Error uploading audio." and text != "Error requesting transcription.":
         # Generate image from the transcribed text
         image_url = generate_image_from_text(text)
+        if "Failed" not in image_url:
+            # Download the image from URL and return it as a PIL image
+            return get_image_from_url(image_url)
+        else:
+            return image_url
     else:
         return "Error processing audio."
 # Set up Gradio interface
 iface = gr.Interface(fn=process_audio,
+                     inputs=gr.Audio(type="filepath"),  # Audio input
+                     outputs=gr.Image(type="pil"),  # Image output as PIL image
                      live=True,
                      title="Speech-to-Text to Image Generator")
 iface.launch()