Update app.py
Browse files
app.py
CHANGED
|
@@ -133,7 +133,6 @@
|
|
| 133 |
# # Launch Gradio interface
|
| 134 |
# iface.launch(debug=True, share=True)
|
| 135 |
|
| 136 |
-
|
| 137 |
import subprocess
|
| 138 |
|
| 139 |
# Install required libraries
|
|
@@ -189,7 +188,9 @@ speech_to_text = pipeline(
|
|
| 189 |
|
| 190 |
# Load Stable Diffusion model for text-to-image
|
| 191 |
text_to_image = StableDiffusionPipeline.from_pretrained(
|
| 192 |
-
"runwayml/stable-diffusion-v1-5"
|
|
|
|
|
|
|
| 193 |
)
|
| 194 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 195 |
text_to_image.to(device)
|
|
@@ -221,40 +222,23 @@ def transcribe_audio(audio_path):
|
|
| 221 |
@lru_cache(maxsize=10)
|
| 222 |
def generate_image_from_text(text):
|
| 223 |
try:
|
| 224 |
-
image = text_to_image(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 225 |
return image
|
| 226 |
except Exception as e:
|
| 227 |
return f"Error in image generation: {str(e)}"
|
| 228 |
|
| 229 |
# Optimized combined processing function
|
| 230 |
def process_audio_and_generate_image(audio_path):
|
| 231 |
-
|
| 232 |
-
image_result = {"result": None}
|
| 233 |
-
|
| 234 |
-
# Function to run transcription and image generation in parallel
|
| 235 |
-
def transcription_thread():
|
| 236 |
-
transcription_result["result"] = transcribe_audio(audio_path)
|
| 237 |
-
|
| 238 |
-
def image_generation_thread():
|
| 239 |
-
transcription = transcription_result["result"]
|
| 240 |
-
if transcription and "Error" not in transcription:
|
| 241 |
-
image_result["result"] = generate_image_from_text(transcription)
|
| 242 |
-
|
| 243 |
-
# Start both tasks in parallel
|
| 244 |
-
t1 = threading.Thread(target=transcription_thread)
|
| 245 |
-
t2 = threading.Thread(target=image_generation_thread)
|
| 246 |
-
|
| 247 |
-
t1.start()
|
| 248 |
-
t2.start()
|
| 249 |
-
|
| 250 |
-
t1.join() # Wait for transcription to finish
|
| 251 |
-
t2.join() # Wait for image generation to finish
|
| 252 |
-
|
| 253 |
-
transcription = transcription_result["result"]
|
| 254 |
-
image = image_result["result"]
|
| 255 |
-
|
| 256 |
if "Error" in transcription:
|
| 257 |
return None, transcription
|
|
|
|
|
|
|
|
|
|
| 258 |
if isinstance(image, str) and "Error" in image:
|
| 259 |
return None, image
|
| 260 |
|
|
@@ -271,3 +255,4 @@ iface = gr.Interface(
|
|
| 271 |
|
| 272 |
# Launch Gradio interface
|
| 273 |
iface.launch(debug=True, share=True)
|
|
|
|
|
|
| 133 |
# # Launch Gradio interface
|
| 134 |
# iface.launch(debug=True, share=True)
|
| 135 |
|
|
|
|
| 136 |
import subprocess
|
| 137 |
|
| 138 |
# Install required libraries
|
|
|
|
| 188 |
|
| 189 |
# Load Stable Diffusion model for text-to-image
|
| 190 |
text_to_image = StableDiffusionPipeline.from_pretrained(
|
| 191 |
+
"runwayml/stable-diffusion-v1-5",
|
| 192 |
+
torch_dtype=torch.float16, # Use mixed precision for speed
|
| 193 |
+
revision="fp16", # Ensure half precision
|
| 194 |
)
|
| 195 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 196 |
text_to_image.to(device)
|
|
|
|
| 222 |
@lru_cache(maxsize=10)
|
| 223 |
def generate_image_from_text(text):
|
| 224 |
try:
|
| 225 |
+
image = text_to_image(
|
| 226 |
+
text,
|
| 227 |
+
height=512, # Reduced image size for speed
|
| 228 |
+
width=512
|
| 229 |
+
).images[0]
|
| 230 |
return image
|
| 231 |
except Exception as e:
|
| 232 |
return f"Error in image generation: {str(e)}"
|
| 233 |
|
| 234 |
# Optimized combined processing function
|
| 235 |
def process_audio_and_generate_image(audio_path):
|
| 236 |
+
transcription = transcribe_audio(audio_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 237 |
if "Error" in transcription:
|
| 238 |
return None, transcription
|
| 239 |
+
|
| 240 |
+
# Start image generation after transcription
|
| 241 |
+
image = generate_image_from_text(transcription)
|
| 242 |
if isinstance(image, str) and "Error" in image:
|
| 243 |
return None, image
|
| 244 |
|
|
|
|
| 255 |
|
| 256 |
# Launch Gradio interface
|
| 257 |
iface.launch(debug=True, share=True)
|
| 258 |
+
|