Update app.py
Browse files
app.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
import gradio as gr
|
|
|
|
| 2 |
import whisper
|
| 3 |
from PIL import Image
|
| 4 |
|
|
@@ -9,7 +10,7 @@ from diffusers import StableDiffusionPipeline
|
|
| 9 |
|
| 10 |
whisper_model = whisper.load_model("small")
|
| 11 |
|
| 12 |
-
device="cpu"
|
| 13 |
|
| 14 |
pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", use_auth_token=MY_SECRET_TOKEN)
|
| 15 |
pipe.to(device)
|
|
@@ -50,7 +51,7 @@ gallery = gr.Gallery(label="Generated images", show_label=False, elem_id="galler
|
|
| 50 |
title="Whisper to Stable Diffusion"
|
| 51 |
description="""
|
| 52 |
<p style='text-align: center;'>
|
| 53 |
-
This demo is running on CPU
|
| 54 |
Record an audio description of an image, stop recording, then hit the Submit button to get 2 images from Stable Diffusion.<br />
|
| 55 |
Your audio will be translated to English through OpenAI's Whisper, then sent as a prompt to Stable Diffusion.
|
| 56 |
Try it in French ! ;)<br />
|
|
@@ -61,7 +62,7 @@ Try it in French ! ;)<br />
|
|
| 61 |
article="""
|
| 62 |
<p style='text-align: center;'>—<br />
|
| 63 |
Whisper is a general-purpose speech recognition model. <br />
|
| 64 |
-
It is trained on a large dataset of diverse audio and is also a multi-task model that can perform multilingual speech recognition as well as speech translation and language identification.<br />
|
| 65 |
Model by <a href="https://github.com/openai/whisper" style="text-decoration: underline;" target="_blank">OpenAI</a>
|
| 66 |
</p>
|
| 67 |
"""
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
+
import torch
|
| 3 |
import whisper
|
| 4 |
from PIL import Image
|
| 5 |
|
|
|
|
| 10 |
|
| 11 |
whisper_model = whisper.load_model("small")
|
| 12 |
|
| 13 |
+
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
| 14 |
|
| 15 |
pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", use_auth_token=MY_SECRET_TOKEN)
|
| 16 |
pipe.to(device)
|
|
|
|
| 51 |
title="Whisper to Stable Diffusion"
|
| 52 |
description="""
|
| 53 |
<p style='text-align: center;'>
|
| 54 |
+
This demo is running on CPU 🐢. Offered by Sylvain <a href='https://twitter.com/fffiloni' target='_blank'>@fffiloni</a> • <img id='visitor-badge' alt='visitor badge' src='https://visitor-badge.glitch.me/badge?page_id=gradio-blocks.whisper-to-stable-diffusion' style='display: inline-block' /><br />
|
| 55 |
Record an audio description of an image, stop recording, then hit the Submit button to get 2 images from Stable Diffusion.<br />
|
| 56 |
Your audio will be translated to English through OpenAI's Whisper, then sent as a prompt to Stable Diffusion.
|
| 57 |
Try it in French ! ;)<br />
|
|
|
|
| 62 |
article="""
|
| 63 |
<p style='text-align: center;'>—<br />
|
| 64 |
Whisper is a general-purpose speech recognition model. <br />
|
| 65 |
+
It is trained on a large dataset of diverse audio and is also a multi-task model that can perform<br />multilingual speech recognition as well as speech translation and language identification.<br />
|
| 66 |
Model by <a href="https://github.com/openai/whisper" style="text-decoration: underline;" target="_blank">OpenAI</a>
|
| 67 |
</p>
|
| 68 |
"""
|