jonloporto commited on
Commit
82a5b85
·
verified ·
1 Parent(s): e319ff4

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -100
app.py DELETED
@@ -1,100 +0,0 @@
1
- import gradio as gr
2
- from transformers import pipeline
3
- import torch
4
- from diffusers import DiffusionPipeline
5
-
6
- # Load speech-to-text model (Whisper)
7
- transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base")
8
-
9
- # Load image generation model (Stable Diffusion)
10
- device = "cuda" if torch.cuda.is_available() else "cpu"
11
- pipe = DiffusionPipeline.from_pretrained(
12
- "runwayml/stable-diffusion-v1-5",
13
- torch_dtype=torch.float16 if device == "cuda" else torch.float32
14
- )
15
- pipe = pipe.to(device)
16
-
17
- # Speech-to-text function
18
- def transcribe_audio(audio):
19
- """Convert audio to text using Whisper"""
20
- if audio is None:
21
- return ""
22
-
23
- try:
24
- # Gradio Audio with type="numpy" returns tuple of (sample_rate, audio_data)
25
- if isinstance(audio, tuple):
26
- sample_rate, audio_data = audio
27
- result = transcriber(audio_data, sampling_rate=sample_rate)
28
- else:
29
- result = transcriber(audio)
30
-
31
- text = result.get("text", "").strip()
32
- return text if text else "No speech detected"
33
- except Exception as e:
34
- return f"Error transcribing audio: {str(e)}"
35
-
36
- # Image generation function
37
- def generate_image_from_text(prompt):
38
- """Generate an image from a text prompt using Stable Diffusion"""
39
- if not prompt or prompt.strip() == "":
40
- return None, "Please provide a text prompt"
41
-
42
- try:
43
- with torch.no_grad():
44
- image = pipe(prompt, num_inference_steps=50, guidance_scale=7.5).images[0]
45
- return image, f"✓ Generated image from prompt: '{prompt}'"
46
- except Exception as e:
47
- return None, f"Error generating image: {str(e)}"
48
-
49
- # Combined function: speech -> text -> image
50
- def speech_to_image(audio):
51
- """Convert speech to text, then generate image from the text"""
52
- # Step 1: Convert speech to text
53
- text_prompt = transcribe_audio(audio)
54
-
55
- if text_prompt.startswith("Error"):
56
- return None, text_prompt
57
-
58
- # Step 2: Generate image from text
59
- image, status = generate_image_from_text(text_prompt)
60
-
61
- return image, f"Transcript: '{text_prompt}'\n\n{status}"
62
-
63
- # Gradio interface with tabs
64
- with gr.Blocks(title="AI Image Generation from Speech") as demo:
65
- gr.Markdown("# 🎨 AI Image Generation from Speech")
66
- gr.Markdown("Speak your image description, and the AI will generate an image based on your words!")
67
-
68
- with gr.Tab("🎤 Speech to Image"):
69
- gr.Markdown("Record or upload audio with your image description")
70
- audio_input = gr.Audio(label="Record Audio", type="numpy")
71
- generate_btn = gr.Button("Generate Image from Speech", variant="primary")
72
- output_image = gr.Image(label="Generated Image")
73
- output_text = gr.Textbox(label="Status", interactive=False)
74
-
75
- generate_btn.click(
76
- fn=speech_to_image,
77
- inputs=audio_input,
78
- outputs=[output_image, output_text]
79
- )
80
-
81
- with gr.Tab("⌨️ Text to Image"):
82
- gr.Markdown("Or type a description directly")
83
- text_input = gr.Textbox(
84
- label="Enter Image Description",
85
- placeholder="e.g., a beautiful sunset over mountains",
86
- lines=3
87
- )
88
- text_generate_btn = gr.Button("Generate Image", variant="primary")
89
- text_output_image = gr.Image(label="Generated Image")
90
- text_output_status = gr.Textbox(label="Status", interactive=False)
91
-
92
- text_generate_btn.click(
93
- fn=generate_image_from_text,
94
- inputs=text_input,
95
- outputs=[text_output_image, text_output_status]
96
- )
97
-
98
- # Launch the interface
99
- if __name__ == "__main__":
100
- demo.launch()