meraj12 commited on
Commit
c0f0405
·
verified ·
1 Parent(s): d915112

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -0
app.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ import streamlit as st
4
+ import torchaudio
5
+ import tempfile
6
+ from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
7
+ from diffusers import StableDiffusionPipeline
8
+ from groq import Groq
9
+
10
+ # Set up Groq API
11
+ client = Groq(api_key=os.getenv("GROQ_API_KEY"))
12
+
13
+ # Load Whisper model (Tiny)
14
+ device = "cpu"
15
+ whisper_model = AutoModelForSpeechSeq2Seq.from_pretrained("openai/whisper-tiny").to(device)
16
+ processor = AutoProcessor.from_pretrained("openai/whisper-tiny")
17
+ whisper_pipeline = pipeline("automatic-speech-recognition", model=whisper_model, processor=processor, device=device)
18
+
19
+ # Load Stable Diffusion model
20
+ sd_model = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5").to(device)
21
+
22
+ # Streamlit UI
23
+ st.title("Voice-to-Image Generator")
24
+
25
+ # Upload audio
26
+ audio_file = st.file_uploader("Upload an audio file", type=["wav", "mp3", "ogg"])
27
+ if audio_file:
28
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
29
+ temp_audio.write(audio_file.read())
30
+ temp_audio_path = temp_audio.name
31
+
32
+ # Convert speech to text
33
+ with torch.no_grad():
34
+ text_output = whisper_pipeline(temp_audio_path)["text"]
35
+
36
+ st.write("Transcribed Text:", text_output)
37
+
38
+ # Generate an image using Stable Diffusion
39
+ with st.spinner("Generating image..."):
40
+ image = sd_model(text_output).images[0]
41
+ st.image(image, caption="Generated Image")
42
+
43
+ # Optional: Use Groq API for additional processing
44
+ chat_completion = client.chat.completions.create(
45
+ messages=[{"role": "user", "content": text_output}],
46
+ model="llama-3.3-70b-versatile",
47
+ )
48
+ st.write("Groq AI Response:", chat_completion.choices[0].message.content)
49
+
50
+ st.write("Powered by Whisper, Stable Diffusion, and Groq API")