Spaces:

Sayiqa
/

IMAGE

Sleeping

App Files Files Community

Sayiqa commited on Dec 16, 2024

Commit

98755cd

verified ·

1 Parent(s): 8f51067

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -28

app.py CHANGED Viewed

@@ -133,7 +133,6 @@
 # # Launch Gradio interface
 # iface.launch(debug=True, share=True)
 import subprocess
 # Install required libraries
@@ -189,7 +188,9 @@ speech_to_text = pipeline(
 # Load Stable Diffusion model for text-to-image
 text_to_image = StableDiffusionPipeline.from_pretrained(
-    "runwayml/stable-diffusion-v1-5"
 )
 device = "cuda" if torch.cuda.is_available() else "cpu"
 text_to_image.to(device)
@@ -221,40 +222,23 @@ def transcribe_audio(audio_path):
 @lru_cache(maxsize=10)
 def generate_image_from_text(text):
     try:
-        image = text_to_image(text, height=256, width=256).images[0]  # Generate smaller images for speed
         return image
     except Exception as e:
         return f"Error in image generation: {str(e)}"
 # Optimized combined processing function
 def process_audio_and_generate_image(audio_path):
-    transcription_result = {"result": None}
-    image_result = {"result": None}
-    # Function to run transcription and image generation in parallel
-    def transcription_thread():
-        transcription_result["result"] = transcribe_audio(audio_path)
-    def image_generation_thread():
-        transcription = transcription_result["result"]
-        if transcription and "Error" not in transcription:
-            image_result["result"] = generate_image_from_text(transcription)
-    # Start both tasks in parallel
-    t1 = threading.Thread(target=transcription_thread)
-    t2 = threading.Thread(target=image_generation_thread)
-    t1.start()
-    t2.start()
-    t1.join()  # Wait for transcription to finish
-    t2.join()  # Wait for image generation to finish
-    transcription = transcription_result["result"]
-    image = image_result["result"]
     if "Error" in transcription:
         return None, transcription
     if isinstance(image, str) and "Error" in image:
         return None, image
@@ -271,3 +255,4 @@ iface = gr.Interface(
 # Launch Gradio interface
 iface.launch(debug=True, share=True)

 # # Launch Gradio interface
 # iface.launch(debug=True, share=True)
 import subprocess
 # Install required libraries
 # Load Stable Diffusion model for text-to-image
 text_to_image = StableDiffusionPipeline.from_pretrained(
+    "runwayml/stable-diffusion-v1-5",
+    torch_dtype=torch.float16,  # Use mixed precision for speed
+    revision="fp16",  # Ensure half precision
 )
 device = "cuda" if torch.cuda.is_available() else "cpu"
 text_to_image.to(device)
 @lru_cache(maxsize=10)
 def generate_image_from_text(text):
     try:
+        image = text_to_image(
+            text,
+            height=512,  # Reduced image size for speed
+            width=512
+        ).images[0]
         return image
     except Exception as e:
         return f"Error in image generation: {str(e)}"
 # Optimized combined processing function
 def process_audio_and_generate_image(audio_path):
+    transcription = transcribe_audio(audio_path)
     if "Error" in transcription:
         return None, transcription
+    # Start image generation after transcription
+    image = generate_image_from_text(transcription)
     if isinstance(image, str) and "Error" in image:
         return None, image
 # Launch Gradio interface
 iface.launch(debug=True, share=True)