Sayiqa commited on
Commit
fd03042
·
verified ·
1 Parent(s): 14859e1

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -0
app.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline
2
+ from huggingface_hub import login
3
+ from diffusers import StableDiffusionPipeline
4
+ import gradio as gr
5
+ import torch
6
+
7
+ # Set Hugging Face token
8
+ hf_token = "your_huggingface_token_here" # Replace this with your token
9
+ login(hf_token)
10
+
11
+ # Load Hugging Face models
12
+ speech_to_text = pipeline("automatic-speech-recognition", model="openai/whisper-base")
13
+
14
+ # Load Stable Diffusion model using diffusers
15
+ text_to_image = StableDiffusionPipeline.from_pretrained(
16
+ "runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16
17
+ ).to("cuda" if torch.cuda.is_available() else "cpu")
18
+
19
+ # Speech-to-text function
20
+ def transcribe_audio(audio_file):
21
+ try:
22
+ result = speech_to_text(audio_file)
23
+ transcription = result["text"]
24
+ return transcription
25
+ except Exception as e:
26
+ return f"Error in transcription: {str(e)}"
27
+
28
+ # Text-to-image function
29
+ def generate_image_from_text(text):
30
+ try:
31
+ image = text_to_image(text).images[0] # Generate one image
32
+ return image
33
+ except Exception as e:
34
+ return f"Error in image generation: {str(e)}"
35
+
36
+ # Combined processing function
37
+ def process_audio_and_generate_image(audio_file):
38
+ transcription = transcribe_audio(audio_file)
39
+ if "Error" in transcription:
40
+ return None, transcription
41
+
42
+ image = generate_image_from_text(transcription)
43
+ if isinstance(image, str) and "Error" in image:
44
+ return None, image
45
+
46
+ return image, transcription
47
+
48
+ # Gradio interface
49
+ iface = gr.Interface(
50
+ fn=process_audio_and_generate_image,
51
+ inputs=gr.Audio(type="filepath", label="Upload audio file (WAV/MP3)"),
52
+ outputs=[
53
+ gr.Image(label="Generated Image"),
54
+ gr.Textbox(label="Transcription")
55
+ ],
56
+ title="Speech-to-Text and Image Generation",
57
+ description="Upload an audio file to transcribe speech to text, and then generate an image based on the transcription.",
58
+ )
59
+
60
+ # Launch the interface
61
+ iface.launch(share=True)