jonloporto commited on
Commit
c1930c4
·
verified ·
1 Parent(s): 47960f1

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -93
app.py DELETED
@@ -1,93 +0,0 @@
1
- import gradio as gr
2
- from transformers import pipeline
3
- import torch
4
- from diffusers import DiffusionPipeline
5
-
6
- # Load speech-to-text model (Whisper)
7
- transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base")
8
-
9
- # Load image generation model (Stable Diffusion)
10
- device = "cuda" if torch.cuda.is_available() else "cpu"
11
- pipe = DiffusionPipeline.from_pretrained(
12
- "runwayml/stable-diffusion-v1-5",
13
- torch_dtype=torch.float16 if device == "cuda" else torch.float32
14
- )
15
- pipe = pipe.to(device)
16
-
17
- # Speech-to-text function
18
- def transcribe_audio(audio):
19
- """Convert audio to text using Whisper"""
20
- if audio is None:
21
- return ""
22
-
23
- try:
24
- result = transcriber(audio)
25
- return result["text"]
26
- except Exception as e:
27
- return f"Error transcribing audio: {str(e)}"
28
-
29
- # Image generation function
30
- def generate_image_from_text(prompt):
31
- """Generate an image from a text prompt using Stable Diffusion"""
32
- if not prompt or prompt.strip() == "":
33
- return None, "Please provide a text prompt"
34
-
35
- try:
36
- with torch.no_grad():
37
- image = pipe(prompt, num_inference_steps=50, guidance_scale=7.5).images[0]
38
- return image, f"✓ Generated image from prompt: '{prompt}'"
39
- except Exception as e:
40
- return None, f"Error generating image: {str(e)}"
41
-
42
- # Combined function: speech -> text -> image
43
- def speech_to_image(audio):
44
- """Convert speech to text, then generate image from the text"""
45
- # Step 1: Convert speech to text
46
- text_prompt = transcribe_audio(audio)
47
-
48
- if text_prompt.startswith("Error"):
49
- return None, text_prompt
50
-
51
- # Step 2: Generate image from text
52
- image, status = generate_image_from_text(text_prompt)
53
-
54
- return image, f"Transcript: '{text_prompt}'\n\n{status}"
55
-
56
- # Gradio interface with tabs
57
- with gr.Blocks(title="AI Image Generation from Speech") as demo:
58
- gr.Markdown("# 🎨 AI Image Generation from Speech")
59
- gr.Markdown("Speak your image description, and the AI will generate an image based on your words!")
60
-
61
- with gr.Tab("🎤 Speech to Image"):
62
- gr.Markdown("Record or upload audio with your image description")
63
- audio_input = gr.Audio(label="Record Audio", type="filepath")
64
- generate_btn = gr.Button("Generate Image from Speech", variant="primary")
65
- output_image = gr.Image(label="Generated Image")
66
- output_text = gr.Textbox(label="Status", interactive=False)
67
-
68
- generate_btn.click(
69
- fn=speech_to_image,
70
- inputs=audio_input,
71
- outputs=[output_image, output_text]
72
- )
73
-
74
- with gr.Tab("⌨️ Text to Image"):
75
- gr.Markdown("Or type a description directly")
76
- text_input = gr.Textbox(
77
- label="Enter Image Description",
78
- placeholder="e.g., a beautiful sunset over mountains",
79
- lines=3
80
- )
81
- text_generate_btn = gr.Button("Generate Image", variant="primary")
82
- text_output_image = gr.Image(label="Generated Image")
83
- text_output_status = gr.Textbox(label="Status", interactive=False)
84
-
85
- text_generate_btn.click(
86
- fn=generate_image_from_text,
87
- inputs=text_input,
88
- outputs=[text_output_image, text_output_status]
89
- )
90
-
91
- # Launch the interface
92
- if __name__ == "__main__":
93
- demo.launch()