Sayiqa commited on
Commit
98755cd
·
verified ·
1 Parent(s): 8f51067

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -28
app.py CHANGED
@@ -133,7 +133,6 @@
133
  # # Launch Gradio interface
134
  # iface.launch(debug=True, share=True)
135
 
136
-
137
  import subprocess
138
 
139
  # Install required libraries
@@ -189,7 +188,9 @@ speech_to_text = pipeline(
189
 
190
  # Load Stable Diffusion model for text-to-image
191
  text_to_image = StableDiffusionPipeline.from_pretrained(
192
- "runwayml/stable-diffusion-v1-5"
 
 
193
  )
194
  device = "cuda" if torch.cuda.is_available() else "cpu"
195
  text_to_image.to(device)
@@ -221,40 +222,23 @@ def transcribe_audio(audio_path):
221
  @lru_cache(maxsize=10)
222
  def generate_image_from_text(text):
223
  try:
224
- image = text_to_image(text, height=256, width=256).images[0] # Generate smaller images for speed
 
 
 
 
225
  return image
226
  except Exception as e:
227
  return f"Error in image generation: {str(e)}"
228
 
229
  # Optimized combined processing function
230
  def process_audio_and_generate_image(audio_path):
231
- transcription_result = {"result": None}
232
- image_result = {"result": None}
233
-
234
- # Function to run transcription and image generation in parallel
235
- def transcription_thread():
236
- transcription_result["result"] = transcribe_audio(audio_path)
237
-
238
- def image_generation_thread():
239
- transcription = transcription_result["result"]
240
- if transcription and "Error" not in transcription:
241
- image_result["result"] = generate_image_from_text(transcription)
242
-
243
- # Start both tasks in parallel
244
- t1 = threading.Thread(target=transcription_thread)
245
- t2 = threading.Thread(target=image_generation_thread)
246
-
247
- t1.start()
248
- t2.start()
249
-
250
- t1.join() # Wait for transcription to finish
251
- t2.join() # Wait for image generation to finish
252
-
253
- transcription = transcription_result["result"]
254
- image = image_result["result"]
255
-
256
  if "Error" in transcription:
257
  return None, transcription
 
 
 
258
  if isinstance(image, str) and "Error" in image:
259
  return None, image
260
 
@@ -271,3 +255,4 @@ iface = gr.Interface(
271
 
272
  # Launch Gradio interface
273
  iface.launch(debug=True, share=True)
 
 
133
  # # Launch Gradio interface
134
  # iface.launch(debug=True, share=True)
135
 
 
136
  import subprocess
137
 
138
  # Install required libraries
 
188
 
189
  # Load Stable Diffusion model for text-to-image
190
  text_to_image = StableDiffusionPipeline.from_pretrained(
191
+ "runwayml/stable-diffusion-v1-5",
192
+ torch_dtype=torch.float16, # Use mixed precision for speed
193
+ revision="fp16", # Ensure half precision
194
  )
195
  device = "cuda" if torch.cuda.is_available() else "cpu"
196
  text_to_image.to(device)
 
222
  @lru_cache(maxsize=10)
223
  def generate_image_from_text(text):
224
  try:
225
+ image = text_to_image(
226
+ text,
227
+ height=512, # Reduced image size for speed
228
+ width=512
229
+ ).images[0]
230
  return image
231
  except Exception as e:
232
  return f"Error in image generation: {str(e)}"
233
 
234
  # Optimized combined processing function
235
  def process_audio_and_generate_image(audio_path):
236
+ transcription = transcribe_audio(audio_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
237
  if "Error" in transcription:
238
  return None, transcription
239
+
240
+ # Start image generation after transcription
241
+ image = generate_image_from_text(transcription)
242
  if isinstance(image, str) and "Error" in image:
243
  return None, image
244
 
 
255
 
256
  # Launch Gradio interface
257
  iface.launch(debug=True, share=True)
258
+