File size: 2,738 Bytes
1432be0
4cdf0cd
1432be0
4cdf0cd
 
 
1432be0
4cdf0cd
1432be0
4cdf0cd
 
 
 
7ca1854
1432be0
4cdf0cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1432be0
4cdf0cd
 
1432be0
4cdf0cd
1432be0
4cdf0cd
 
 
1432be0
4cdf0cd
 
1432be0
4cdf0cd
1432be0
4cdf0cd
 
 
 
 
 
31eeafc
4cdf0cd
 
31eeafc
4cdf0cd
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import streamlit as st
from diffusers import StableDiffusionPipeline
import torch
from PIL import Image, ImageDraw, ImageFont
import tempfile
import speech_recognition as sr

# Load Stable Diffusion
@st.cache_resource
def load_model():
    model_id = "runwayml/stable-diffusion-v1-5"
    pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
    pipe = pipe.to("cuda" if torch.cuda.is_available() else "cpu")
    return pipe

sd_pipe = load_model()

# Meme generator function
def generate_meme(prompt, caption=""):
    # Step 1: Generate base image
    image = sd_pipe(prompt, num_inference_steps=30).images[0]

    # Step 2: Add caption text
    if caption:
        draw = ImageDraw.Draw(image)
        try:
            font = ImageFont.truetype("DejaVuSans-Bold.ttf", 48)
        except:
            font = ImageFont.load_default()

        W, H = image.size
        text = caption.upper()

        # Wrap long text
        import textwrap
        lines = textwrap.wrap(text, width=25)

        y_text = 20
        for line in lines:
            w, h = draw.textsize(line, font=font)
            draw.text(((W - w) / 2, y_text), line, font=font,
                      fill="white", stroke_width=3, stroke_fill="black")
            y_text += h + 10

    return image

# Speech-to-text
def speech_to_text(audio_file):
    recognizer = sr.Recognizer()
    with sr.AudioFile(audio_file) as source:
        audio = recognizer.record(source)
    try:
        return recognizer.recognize_google(audio)
    except:
        return "Could not recognize speech."

# Streamlit UI
st.title("🎭 Meme Generator (Text & Voice)")

mode = st.radio("Choose Input Mode:", ["Text", "Voice"])

if mode == "Text":
    text_input = st.text_area("Enter meme text:")
    if st.button("Generate Meme"):
        if text_input.strip():
            img = generate_meme("funny cartoon meme background", caption=text_input)
            st.image(img, caption="Generated Meme", use_column_width=True)
        else:
            st.warning("Please enter some text.")

else:
    audio_file = st.file_uploader("Upload voice file (.wav)", type=["wav"])
    if audio_file:
        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
            tmp_file.write(audio_file.read())
            tmp_path = tmp_file.name

        text = speech_to_text(tmp_path)
        st.write(f"📝 Transcribed Text: {text}")

        if st.button("Generate Meme from Voice"):
            if text.strip():
                img = generate_meme("funny cartoon meme background", caption=text)
                st.image(img, caption="Generated Meme", use_column_width=True)
            else:
                st.warning("Speech not recognized.")