Spaces:

Sayiqa
/

voice_clone

Runtime error

App Files Files Community

Sayiqa commited on Dec 15, 2024

Commit

5a352b6

verified ·

1 Parent(s): 7c56c39

Update app.py

Browse files

Files changed (1) hide show

app.py +131 -17

app.py CHANGED Viewed

@@ -1,17 +1,128 @@
 import gradio as gr
 import requests
 import time
 from PIL import Image
 from io import BytesIO
-# AssemblyAI API Key
 ASSEMBLYAI_API_KEY = "your_assemblyai_api_key_here"
-# DeepAI API Key
-DEEPAI_API_KEY = "your_deepai_api_key_here"
 # Function to convert speech to text using AssemblyAI API
 def speech_to_text(audio_file):
-    # Upload audio to AssemblyAI for transcription
     upload_url = "https://api.assemblyai.com/v2/upload"
     headers = {
         "authorization": ASSEMBLYAI_API_KEY
@@ -54,25 +165,30 @@ def speech_to_text(audio_file):
         time.sleep(5)  # Wait 5 seconds before polling again
-# Function to generate an image based on text using DeepAI's Image Generation API
 def generate_image_from_text(text):
-    image_generation_url = "https://api.deepai.org/api/text2img"
     headers = {
-        "api-key": DEEPAI_API_KEY
     }
     payload = {
-        "text": text
     }
-    # Request image generation from DeepAI
-    response = requests.post(image_generation_url, data=payload, headers=headers)
     if response.status_code == 200:
-        # Get the image URL from the response
-        image_url = response.json()["output_url"]
-        return image_url
     else:
-        return "Failed to generate image."
 # Function to download image from URL and return as a PIL image
 def get_image_from_url(image_url):
@@ -81,7 +197,7 @@ def get_image_from_url(image_url):
         img = Image.open(BytesIO(response.content))
         return img
     except Exception as e:
-        return "Error downloading image: " + str(e)
 # Gradio Interface function
 def process_audio(audio_file):
@@ -111,5 +227,3 @@ iface = gr.Interface(fn=process_audio,
 iface.launch()

+# import gradio as gr
+# import requests
+# import time
+# from PIL import Image
+# from io import BytesIO
+# # AssemblyAI API Key
+# ASSEMBLYAI_API_KEY = "your_assemblyai_api_key_here"
+# # DeepAI API Key
+# DEEPAI_API_KEY = "your_deepai_api_key_here"
+# # Function to convert speech to text using AssemblyAI API
+# def speech_to_text(audio_file):
+#     # Upload audio to AssemblyAI for transcription
+#     upload_url = "https://api.assemblyai.com/v2/upload"
+#     headers = {
+#         "authorization": ASSEMBLYAI_API_KEY
+#     }
+#     # Upload the audio file to AssemblyAI
+#     with open(audio_file, 'rb') as file:
+#         response = requests.post(upload_url, headers=headers, files={"file": file})
+#     if response.status_code != 200:
+#         return "Error uploading audio."
+#     audio_url = response.json()["upload_url"]
+#     # Request transcription from AssemblyAI
+#     transcript_url = "https://api.assemblyai.com/v2/transcript"
+#     transcript_request = {
+#         "audio_url": audio_url
+#     }
+#     transcript_response = requests.post(transcript_url, json=transcript_request, headers=headers)
+#     if transcript_response.status_code != 200:
+#         return "Error requesting transcription."
+#     transcript_id = transcript_response.json()["id"]
+#     # Poll for transcription completion
+#     while True:
+#         polling_url = f"https://api.assemblyai.com/v2/transcript/{transcript_id}"
+#         polling_response = requests.get(polling_url, headers=headers)
+#         if polling_response.status_code != 200:
+#             return "Error polling for transcription status."
+#         status = polling_response.json()["status"]
+#         if status == "completed":
+#             return polling_response.json()["text"]
+#         elif status == "failed":
+#             return "Transcription failed."
+#         time.sleep(5)  # Wait 5 seconds before polling again
+# # Function to generate an image based on text using DeepAI's Image Generation API
+# def generate_image_from_text(text):
+#     image_generation_url = "https://api.deepai.org/api/text2img"
+#     headers = {
+#         "api-key": DEEPAI_API_KEY
+#     }
+#     payload = {
+#         "text": text
+#     }
+#     # Request image generation from DeepAI
+#     response = requests.post(image_generation_url, data=payload, headers=headers)
+#     if response.status_code == 200:
+#         # Get the image URL from the response
+#         image_url = response.json()["output_url"]
+#         return image_url
+#     else:
+#         return "Failed to generate image."
+# # Function to download image from URL and return as a PIL image
+# def get_image_from_url(image_url):
+#     try:
+#         response = requests.get(image_url)
+#         img = Image.open(BytesIO(response.content))
+#         return img
+#     except Exception as e:
+#         return "Error downloading image: " + str(e)
+# # Gradio Interface function
+# def process_audio(audio_file):
+#     # Convert speech to text
+#     text = speech_to_text(audio_file)
+#     if text and text != "Error uploading audio." and text != "Error requesting transcription.":
+#         print(f"Transcribed text: {text}")  # Debug output for transcribed text
+#         # Generate image from the transcribed text
+#         image_url = generate_image_from_text(text)
+#         if "Failed" not in image_url:
+#             print(f"Image URL: {image_url}")  # Debug output for image URL
+#             # Download the image from URL and return it as a PIL image
+#             return get_image_from_url(image_url)
+#         else:
+#             return image_url
+#     else:
+#         return "Error processing audio."
+# # Set up Gradio interface
+# iface = gr.Interface(fn=process_audio,
+#                      inputs=gr.Audio(type="filepath"),  # Audio input
+#                      outputs=gr.Image(type="pil"),  # Image output as PIL image
+#                      live=True,
+#                      title="Speech-to-Text to Image Generator")
+# iface.launch()
 import gradio as gr
 import requests
 import time
 from PIL import Image
 from io import BytesIO
+# API keys
 ASSEMBLYAI_API_KEY = "your_assemblyai_api_key_here"
+STABILITY_AI_API_KEY = "your_stability_ai_api_key_here"
 # Function to convert speech to text using AssemblyAI API
 def speech_to_text(audio_file):
     upload_url = "https://api.assemblyai.com/v2/upload"
     headers = {
         "authorization": ASSEMBLYAI_API_KEY
         time.sleep(5)  # Wait 5 seconds before polling again
+# Function to generate an image based on text using Stability AI (Stable Diffusion)
 def generate_image_from_text(text):
+    image_generation_url = "https://stability.ai/api/v3/generate"  # Stability AI API endpoint (assuming)
     headers = {
+        "Authorization": f"Bearer {STABILITY_AI_API_KEY}"
     }
     payload = {
+        "text": text,
+        "width": 512,  # Adjust image dimensions as needed
+        "height": 512
     }
+    # Request image generation from Stability AI
+    response = requests.post(image_generation_url, json=payload, headers=headers)
     if response.status_code == 200:
+        # Get the image URL from the response (assuming the response contains a URL)
+        image_url = response.json().get("image_url", "")
+        if image_url:
+            return image_url
+        else:
+            return "Failed to generate image: No image URL found in response."
     else:
+        return f"Failed to generate image: {response.status_code}"
 # Function to download image from URL and return as a PIL image
 def get_image_from_url(image_url):
         img = Image.open(BytesIO(response.content))
         return img
     except Exception as e:
+        return f"Error downloading image: {str(e)}"
 # Gradio Interface function
 def process_audio(audio_file):
 iface.launch()