File size: 1,686 Bytes
c0f0405
 
 
 
 
69b0d74
c0f0405
 
 
 
69b0d74
ac942b2
69b0d74
c0f0405
c2aeaad
c0f0405
 
 
 
 
 
 
 
29cc163
69b0d74
c0f0405
69b0d74
c0f0405
 
69b0d74
c0f0405
69b0d74
c0f0405
69b0d74
 
c0f0405
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29cc163
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import os
import torch
import streamlit as st
import torchaudio
import tempfile
from transformers import pipeline
from diffusers import StableDiffusionPipeline
from groq import Groq

# Set up Groq API
client = Groq(api_key=os.getenv("GROQ_API_KEY"))

# Load Whisper Tiny ASR model
device = "cpu"
whisper_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-tiny", device=device)

# Load Stable Diffusion model
sd_model = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5").to(device)

# Streamlit UI
st.title("Voice-to-Image Generator")

# Upload audio
audio_file = st.file_uploader("Upload an audio file generate image", type=["wav", "mp3", "ogg"])

if audio_file:
    # Save file temporarily
    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
        temp_audio.write(audio_file.read())
        temp_audio_path = temp_audio.name  # Ensure temp_audio_path is defined

    # Transcribe speech to text
    with torch.no_grad():
        result = whisper_pipeline(temp_audio_path, return_timestamps=True)
        text_output = result["text"]

    st.write("Transcribed Text:", text_output)

    # Generate an image using Stable Diffusion
    with st.spinner("Generating image..."):
        image = sd_model(text_output).images[0]
        st.image(image, caption="Generated Image")

    # Optional: Use Groq API for additional processing
    chat_completion = client.chat.completions.create(
        messages=[{"role": "user", "content": text_output}],
        model="llama-3.3-70b-versatile",
    )
    st.write("Groq AI Response:", chat_completion.choices[0].message.content)

st.write("Powered by MERAJ GRAPHICS")