alpercagann commited on
Commit
d9df526
·
1 Parent(s): e626454

Update: audio-to-image GPU-ready Gradio app

Browse files
Files changed (1) hide show
  1. app.py +32 -18
app.py CHANGED
@@ -1,24 +1,38 @@
1
  import gradio as gr
2
- from app import controller # Make sure this exists or replace with your function
 
 
3
 
 
 
 
 
 
 
 
 
 
4
  def generate_image(audio, prompt):
5
- image = controller.generate(
6
- audio=audio,
7
- prompt=prompt,
8
- cfg_scale=7.5,
9
- num_inference_steps=30,
10
- height=384,
11
- width=384,
12
- )
13
- return image
14
-
15
- demo = gr.Interface(
 
 
16
  fn=generate_image,
17
- inputs=["audio", "text"],
18
- outputs="image",
19
- title="SonicDiffusion",
20
- description="Turn audio into images with a diffusion model!",
 
 
21
  )
22
 
23
- demo.launch()
24
-
 
1
  import gradio as gr
2
+ import torch
3
+ from diffusers import StableDiffusionPipeline
4
+ from pydub import AudioSegment
5
 
6
+ # === Use GPU if available ===
7
+ device = "cuda" if torch.cuda.is_available() else "cpu"
8
+
9
+ # === Load model ===
10
+ model_id = "stabilityai/stable-diffusion-2-1"
11
+ pipe = StableDiffusionPipeline.from_pretrained(model_id)
12
+ pipe.to(device)
13
+
14
+ # === Define function ===
15
  def generate_image(audio, prompt):
16
+ if audio is None:
17
+ return None
18
+
19
+ # Save audio temporarily
20
+ audio_path = "train.wav"
21
+ audio.export(audio_path, format="wav")
22
+
23
+
24
+ result = pipe(prompt, guidance_scale=7.5, num_inference_steps=30).images[0]
25
+ return result
26
+
27
+
28
+ interface = gr.Interface(
29
  fn=generate_image,
30
+ inputs=[
31
+ gr.Audio(source="upload", type="pydub"),
32
+ gr.Textbox(label="Prompt", value="A surreal dreamscape made of music"),
33
+ ],
34
+ outputs=gr.Image(type="pil"),
35
+ title="🎧 SonicDiffusion: Audio → Image Generator"
36
  )
37
 
38
+ interface.launch()