Spaces:
Sleeping
Sleeping
| import os | |
| import torch | |
| import streamlit as st | |
| import torchaudio | |
| import tempfile | |
| from transformers import pipeline | |
| from diffusers import StableDiffusionPipeline | |
| from groq import Groq | |
| # Set up Groq API | |
| client = Groq(api_key=os.getenv("GROQ_API_KEY")) | |
| # Load Whisper Tiny ASR model | |
| device = "cpu" | |
| whisper_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-tiny", device=device) | |
| # Load Stable Diffusion model | |
| sd_model = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5").to(device) | |
| # Streamlit UI | |
| st.title("Voice-to-Image Generator") | |
| # Upload audio | |
| audio_file = st.file_uploader("Upload an audio file generate image", type=["wav", "mp3", "ogg"]) | |
| if audio_file: | |
| # Save file temporarily | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio: | |
| temp_audio.write(audio_file.read()) | |
| temp_audio_path = temp_audio.name # Ensure temp_audio_path is defined | |
| # Transcribe speech to text | |
| with torch.no_grad(): | |
| result = whisper_pipeline(temp_audio_path, return_timestamps=True) | |
| text_output = result["text"] | |
| st.write("Transcribed Text:", text_output) | |
| # Generate an image using Stable Diffusion | |
| with st.spinner("Generating image..."): | |
| image = sd_model(text_output).images[0] | |
| st.image(image, caption="Generated Image") | |
| # Optional: Use Groq API for additional processing | |
| chat_completion = client.chat.completions.create( | |
| messages=[{"role": "user", "content": text_output}], | |
| model="llama-3.3-70b-versatile", | |
| ) | |
| st.write("Groq AI Response:", chat_completion.choices[0].message.content) | |
| st.write("Powered by MERAJ GRAPHICS") | |